isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
"""
|
2
|
+
Database Connection Dependencies
|
3
|
+
|
4
|
+
Provides database connections and transaction management
|
5
|
+
with automatic tenant context handling.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import asyncpg
|
10
|
+
import logging
|
11
|
+
import os
|
12
|
+
from contextlib import asynccontextmanager
|
13
|
+
from typing import Optional
|
14
|
+
|
15
|
+
from ..middleware.tenant_context import get_tenant_context
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
# Global connection pool
|
20
|
+
_connection_pool: Optional[asyncpg.Pool] = None
|
21
|
+
|
22
|
+
async def initialize_database_pool():
|
23
|
+
"""Initialize the database connection pool"""
|
24
|
+
global _connection_pool
|
25
|
+
|
26
|
+
if _connection_pool:
|
27
|
+
return _connection_pool
|
28
|
+
|
29
|
+
database_url = os.getenv("DATABASE_URL")
|
30
|
+
if not database_url:
|
31
|
+
raise RuntimeError("DATABASE_URL environment variable not set")
|
32
|
+
|
33
|
+
try:
|
34
|
+
_connection_pool = await asyncpg.create_pool(
|
35
|
+
database_url,
|
36
|
+
min_size=5,
|
37
|
+
max_size=20,
|
38
|
+
command_timeout=60,
|
39
|
+
server_settings={
|
40
|
+
'search_path': 'dev',
|
41
|
+
'timezone': 'UTC'
|
42
|
+
}
|
43
|
+
)
|
44
|
+
|
45
|
+
logger.info("Database connection pool initialized")
|
46
|
+
return _connection_pool
|
47
|
+
|
48
|
+
except Exception as e:
|
49
|
+
logger.error(f"Failed to initialize database pool: {e}")
|
50
|
+
raise
|
51
|
+
|
52
|
+
async def close_database_pool():
|
53
|
+
"""Close the database connection pool"""
|
54
|
+
global _connection_pool
|
55
|
+
|
56
|
+
if _connection_pool:
|
57
|
+
await _connection_pool.close()
|
58
|
+
_connection_pool = None
|
59
|
+
logger.info("Database connection pool closed")
|
60
|
+
|
61
|
+
@asynccontextmanager
|
62
|
+
async def get_database_connection():
|
63
|
+
"""
|
64
|
+
Get a database connection from the pool with automatic tenant context.
|
65
|
+
|
66
|
+
This context manager automatically:
|
67
|
+
1. Gets a connection from the pool
|
68
|
+
2. Sets the tenant context if available
|
69
|
+
3. Handles transactions
|
70
|
+
4. Returns the connection to the pool
|
71
|
+
"""
|
72
|
+
if not _connection_pool:
|
73
|
+
await initialize_database_pool()
|
74
|
+
|
75
|
+
async with _connection_pool.acquire() as conn:
|
76
|
+
try:
|
77
|
+
# Set tenant context if available
|
78
|
+
tenant_context = get_tenant_context()
|
79
|
+
if tenant_context:
|
80
|
+
await conn.execute(
|
81
|
+
"SELECT set_config('app.current_organization_id', $1, true)",
|
82
|
+
tenant_context.organization_id
|
83
|
+
)
|
84
|
+
|
85
|
+
yield conn
|
86
|
+
|
87
|
+
except Exception as e:
|
88
|
+
logger.error(f"Database operation error: {e}")
|
89
|
+
raise
|
90
|
+
finally:
|
91
|
+
# Clear tenant context
|
92
|
+
try:
|
93
|
+
await conn.execute(
|
94
|
+
"SELECT set_config('app.current_organization_id', '', true)"
|
95
|
+
)
|
96
|
+
except:
|
97
|
+
pass # Ignore cleanup errors
|
98
|
+
|
99
|
+
@asynccontextmanager
|
100
|
+
async def get_database_transaction():
|
101
|
+
"""
|
102
|
+
Get a database connection with an explicit transaction.
|
103
|
+
"""
|
104
|
+
async with get_database_connection() as conn:
|
105
|
+
async with conn.transaction():
|
106
|
+
yield conn
|
107
|
+
|
108
|
+
async def execute_query(query: str, *args, fetch_type: str = "fetch"):
|
109
|
+
"""
|
110
|
+
Execute a query with automatic connection management.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
query: SQL query
|
114
|
+
*args: Query parameters
|
115
|
+
fetch_type: 'fetch', 'fetchrow', 'fetchval', or 'execute'
|
116
|
+
"""
|
117
|
+
async with get_database_connection() as conn:
|
118
|
+
if fetch_type == "fetch":
|
119
|
+
return await conn.fetch(query, *args)
|
120
|
+
elif fetch_type == "fetchrow":
|
121
|
+
return await conn.fetchrow(query, *args)
|
122
|
+
elif fetch_type == "fetchval":
|
123
|
+
return await conn.fetchval(query, *args)
|
124
|
+
elif fetch_type == "execute":
|
125
|
+
return await conn.execute(query, *args)
|
126
|
+
else:
|
127
|
+
raise ValueError(f"Invalid fetch_type: {fetch_type}")
|
128
|
+
|
129
|
+
# FastAPI dependency functions
|
130
|
+
|
131
|
+
async def get_db_connection():
|
132
|
+
"""FastAPI dependency to get database connection"""
|
133
|
+
async with get_database_connection() as conn:
|
134
|
+
yield conn
|
135
|
+
|
136
|
+
async def get_db_transaction():
|
137
|
+
"""FastAPI dependency to get database transaction"""
|
138
|
+
async with get_database_transaction() as conn:
|
139
|
+
yield conn
|
@@ -0,0 +1,284 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Improved Error Handling for ISA Model API
|
6
|
+
Provides consistent error responses and better user experience
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
import traceback
|
11
|
+
from typing import Dict, Any, Optional, Union
|
12
|
+
from enum import Enum
|
13
|
+
from fastapi import HTTPException, status
|
14
|
+
from fastapi.responses import JSONResponse
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
class ErrorCode(str, Enum):
|
19
|
+
"""Standard error codes for ISA Model API"""
|
20
|
+
|
21
|
+
# Input/Request errors (4xx)
|
22
|
+
INVALID_INPUT = "INVALID_INPUT"
|
23
|
+
MISSING_PARAMETER = "MISSING_PARAMETER"
|
24
|
+
INVALID_MODEL = "INVALID_MODEL"
|
25
|
+
INVALID_PROVIDER = "INVALID_PROVIDER"
|
26
|
+
INVALID_SERVICE_TYPE = "INVALID_SERVICE_TYPE"
|
27
|
+
INVALID_TASK = "INVALID_TASK"
|
28
|
+
UNSUPPORTED_FORMAT = "UNSUPPORTED_FORMAT"
|
29
|
+
FILE_TOO_LARGE = "FILE_TOO_LARGE"
|
30
|
+
RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED"
|
31
|
+
AUTHENTICATION_FAILED = "AUTHENTICATION_FAILED"
|
32
|
+
AUTHORIZATION_FAILED = "AUTHORIZATION_FAILED"
|
33
|
+
|
34
|
+
# Service errors (5xx)
|
35
|
+
SERVICE_UNAVAILABLE = "SERVICE_UNAVAILABLE"
|
36
|
+
MODEL_LOAD_FAILED = "MODEL_LOAD_FAILED"
|
37
|
+
INFERENCE_FAILED = "INFERENCE_FAILED"
|
38
|
+
EXTERNAL_API_ERROR = "EXTERNAL_API_ERROR"
|
39
|
+
DATABASE_ERROR = "DATABASE_ERROR"
|
40
|
+
TIMEOUT_ERROR = "TIMEOUT_ERROR"
|
41
|
+
INTERNAL_ERROR = "INTERNAL_ERROR"
|
42
|
+
|
43
|
+
# Configuration errors
|
44
|
+
CONFIG_ERROR = "CONFIG_ERROR"
|
45
|
+
API_KEY_MISSING = "API_KEY_MISSING"
|
46
|
+
API_KEY_INVALID = "API_KEY_INVALID"
|
47
|
+
|
48
|
+
class ISAModelError(Exception):
|
49
|
+
"""Base exception for ISA Model errors"""
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
message: str,
|
54
|
+
error_code: ErrorCode,
|
55
|
+
status_code: int = 500,
|
56
|
+
details: Optional[Dict[str, Any]] = None,
|
57
|
+
user_message: Optional[str] = None
|
58
|
+
):
|
59
|
+
self.message = message
|
60
|
+
self.error_code = error_code
|
61
|
+
self.status_code = status_code
|
62
|
+
self.details = details or {}
|
63
|
+
self.user_message = user_message or self._generate_user_message()
|
64
|
+
super().__init__(self.message)
|
65
|
+
|
66
|
+
def _generate_user_message(self) -> str:
|
67
|
+
"""Generate user-friendly error message"""
|
68
|
+
user_messages = {
|
69
|
+
ErrorCode.INVALID_INPUT: "请检查您的输入数据格式是否正确。",
|
70
|
+
ErrorCode.MISSING_PARAMETER: "请提供必需的参数。",
|
71
|
+
ErrorCode.INVALID_MODEL: "指定的模型不存在或不可用,请选择其他模型。",
|
72
|
+
ErrorCode.INVALID_PROVIDER: "指定的提供商不支持,请选择其他提供商。",
|
73
|
+
ErrorCode.INVALID_SERVICE_TYPE: "不支持的服务类型,请选择text、vision、audio、image或embedding。",
|
74
|
+
ErrorCode.INVALID_TASK: "不支持的任务类型,请查看API文档了解支持的任务。",
|
75
|
+
ErrorCode.UNSUPPORTED_FORMAT: "不支持的文件格式,请使用支持的格式。",
|
76
|
+
ErrorCode.FILE_TOO_LARGE: "文件太大,请压缩后重试。",
|
77
|
+
ErrorCode.RATE_LIMIT_EXCEEDED: "请求过于频繁,请稍后再试。",
|
78
|
+
ErrorCode.AUTHENTICATION_FAILED: "身份验证失败,请检查您的凭据。",
|
79
|
+
ErrorCode.AUTHORIZATION_FAILED: "您没有权限执行此操作。",
|
80
|
+
ErrorCode.SERVICE_UNAVAILABLE: "服务暂时不可用,请稍后再试。",
|
81
|
+
ErrorCode.MODEL_LOAD_FAILED: "模型加载失败,请稍后再试或选择其他模型。",
|
82
|
+
ErrorCode.INFERENCE_FAILED: "推理过程出现错误,请重试。",
|
83
|
+
ErrorCode.EXTERNAL_API_ERROR: "外部服务出现问题,请稍后再试。",
|
84
|
+
ErrorCode.DATABASE_ERROR: "数据库连接问题,请稍后再试。",
|
85
|
+
ErrorCode.TIMEOUT_ERROR: "请求超时,请稍后再试。",
|
86
|
+
ErrorCode.INTERNAL_ERROR: "内部服务器错误,请联系技术支持。",
|
87
|
+
ErrorCode.CONFIG_ERROR: "配置错误,请联系管理员。",
|
88
|
+
ErrorCode.API_KEY_MISSING: "缺少API密钥,请在配置中提供。",
|
89
|
+
ErrorCode.API_KEY_INVALID: "API密钥无效,请检查配置。"
|
90
|
+
}
|
91
|
+
|
92
|
+
return user_messages.get(self.error_code, "出现了未知错误,请稍后再试。")
|
93
|
+
|
94
|
+
def to_dict(self) -> Dict[str, Any]:
|
95
|
+
"""Convert error to dictionary for API response"""
|
96
|
+
return {
|
97
|
+
"error_code": self.error_code.value,
|
98
|
+
"message": self.message,
|
99
|
+
"user_message": self.user_message,
|
100
|
+
"status_code": self.status_code,
|
101
|
+
"details": self.details
|
102
|
+
}
|
103
|
+
|
104
|
+
def create_error_response(
|
105
|
+
error: Union[Exception, ISAModelError, str],
|
106
|
+
status_code: Optional[int] = None,
|
107
|
+
error_code: Optional[ErrorCode] = None,
|
108
|
+
details: Optional[Dict[str, Any]] = None,
|
109
|
+
include_traceback: bool = False
|
110
|
+
) -> Dict[str, Any]:
|
111
|
+
"""Create standardized error response"""
|
112
|
+
|
113
|
+
if isinstance(error, ISAModelError):
|
114
|
+
response = {
|
115
|
+
"success": False,
|
116
|
+
"error": error.message,
|
117
|
+
"error_code": error.error_code.value,
|
118
|
+
"user_message": error.user_message,
|
119
|
+
"details": error.details,
|
120
|
+
"metadata": {
|
121
|
+
"error_type": "ISAModelError",
|
122
|
+
"status_code": error.status_code
|
123
|
+
}
|
124
|
+
}
|
125
|
+
elif isinstance(error, Exception):
|
126
|
+
# Convert generic exception to ISAModelError
|
127
|
+
error_message = str(error)
|
128
|
+
final_error_code = error_code or ErrorCode.INTERNAL_ERROR
|
129
|
+
final_status_code = status_code or 500
|
130
|
+
|
131
|
+
isa_error = ISAModelError(
|
132
|
+
message=error_message,
|
133
|
+
error_code=final_error_code,
|
134
|
+
status_code=final_status_code,
|
135
|
+
details=details
|
136
|
+
)
|
137
|
+
|
138
|
+
response = {
|
139
|
+
"success": False,
|
140
|
+
"error": isa_error.message,
|
141
|
+
"error_code": isa_error.error_code.value,
|
142
|
+
"user_message": isa_error.user_message,
|
143
|
+
"details": isa_error.details,
|
144
|
+
"metadata": {
|
145
|
+
"error_type": type(error).__name__,
|
146
|
+
"status_code": isa_error.status_code
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
if include_traceback:
|
151
|
+
response["metadata"]["traceback"] = traceback.format_exc()
|
152
|
+
|
153
|
+
else:
|
154
|
+
# String error
|
155
|
+
final_error_code = error_code or ErrorCode.INTERNAL_ERROR
|
156
|
+
final_status_code = status_code or 500
|
157
|
+
|
158
|
+
isa_error = ISAModelError(
|
159
|
+
message=str(error),
|
160
|
+
error_code=final_error_code,
|
161
|
+
status_code=final_status_code,
|
162
|
+
details=details or {}
|
163
|
+
)
|
164
|
+
|
165
|
+
response = {
|
166
|
+
"success": False,
|
167
|
+
"error": isa_error.message,
|
168
|
+
"error_code": isa_error.error_code.value,
|
169
|
+
"user_message": isa_error.user_message,
|
170
|
+
"details": isa_error.details,
|
171
|
+
"metadata": {
|
172
|
+
"error_type": "StringError",
|
173
|
+
"status_code": isa_error.status_code
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
return response
|
178
|
+
|
179
|
+
def handle_validation_error(exc: Exception) -> Dict[str, Any]:
|
180
|
+
"""Handle Pydantic validation errors"""
|
181
|
+
details = {}
|
182
|
+
|
183
|
+
if hasattr(exc, 'errors'):
|
184
|
+
# Pydantic validation error
|
185
|
+
validation_errors = []
|
186
|
+
for error in exc.errors():
|
187
|
+
field = " -> ".join(str(loc) for loc in error.get('loc', []))
|
188
|
+
message = error.get('msg', '')
|
189
|
+
validation_errors.append({
|
190
|
+
"field": field,
|
191
|
+
"message": message,
|
192
|
+
"type": error.get('type', '')
|
193
|
+
})
|
194
|
+
details["validation_errors"] = validation_errors
|
195
|
+
|
196
|
+
return create_error_response(
|
197
|
+
error="请求数据格式不正确",
|
198
|
+
status_code=400,
|
199
|
+
error_code=ErrorCode.INVALID_INPUT,
|
200
|
+
details=details
|
201
|
+
)
|
202
|
+
|
203
|
+
def handle_service_error(
|
204
|
+
service_name: str,
|
205
|
+
error: Exception,
|
206
|
+
fallback_available: bool = False
|
207
|
+
) -> Dict[str, Any]:
|
208
|
+
"""Handle service-specific errors with context"""
|
209
|
+
|
210
|
+
details = {
|
211
|
+
"service": service_name,
|
212
|
+
"fallback_available": fallback_available
|
213
|
+
}
|
214
|
+
|
215
|
+
# Determine error code based on service and error type
|
216
|
+
if "connection" in str(error).lower():
|
217
|
+
error_code = ErrorCode.EXTERNAL_API_ERROR
|
218
|
+
if fallback_available:
|
219
|
+
user_message = f"{service_name}服务暂时不可用,已切换到备用服务。"
|
220
|
+
else:
|
221
|
+
user_message = f"{service_name}服务连接失败,请稍后再试。"
|
222
|
+
elif "timeout" in str(error).lower():
|
223
|
+
error_code = ErrorCode.TIMEOUT_ERROR
|
224
|
+
user_message = f"{service_name}服务响应超时,请稍后再试。"
|
225
|
+
elif "authentication" in str(error).lower() or "api key" in str(error).lower():
|
226
|
+
error_code = ErrorCode.API_KEY_INVALID
|
227
|
+
user_message = f"{service_name}服务认证失败,请检查API密钥配置。"
|
228
|
+
elif "rate limit" in str(error).lower():
|
229
|
+
error_code = ErrorCode.RATE_LIMIT_EXCEEDED
|
230
|
+
user_message = f"{service_name}服务请求频率过高,请稍后再试。"
|
231
|
+
else:
|
232
|
+
error_code = ErrorCode.SERVICE_UNAVAILABLE
|
233
|
+
if fallback_available:
|
234
|
+
user_message = f"{service_name}服务出现问题,已切换到备用服务。"
|
235
|
+
else:
|
236
|
+
user_message = f"{service_name}服务暂时不可用,请稍后再试。"
|
237
|
+
|
238
|
+
return create_error_response(
|
239
|
+
error=str(error),
|
240
|
+
status_code=503 if not fallback_available else 200,
|
241
|
+
error_code=error_code,
|
242
|
+
details=details
|
243
|
+
)
|
244
|
+
|
245
|
+
def create_http_exception(
|
246
|
+
message: str,
|
247
|
+
status_code: int = 500,
|
248
|
+
error_code: Optional[ErrorCode] = None,
|
249
|
+
details: Optional[Dict[str, Any]] = None
|
250
|
+
) -> HTTPException:
|
251
|
+
"""Create HTTPException with standardized error format"""
|
252
|
+
|
253
|
+
error_response = create_error_response(
|
254
|
+
error=message,
|
255
|
+
status_code=status_code,
|
256
|
+
error_code=error_code,
|
257
|
+
details=details
|
258
|
+
)
|
259
|
+
|
260
|
+
return HTTPException(
|
261
|
+
status_code=status_code,
|
262
|
+
detail=error_response
|
263
|
+
)
|
264
|
+
|
265
|
+
# Convenience functions for common errors
|
266
|
+
def invalid_input_error(message: str, details: Optional[Dict] = None) -> HTTPException:
|
267
|
+
return create_http_exception(message, 400, ErrorCode.INVALID_INPUT, details)
|
268
|
+
|
269
|
+
def model_not_found_error(model_name: str) -> HTTPException:
|
270
|
+
return create_http_exception(
|
271
|
+
f"模型 '{model_name}' 不存在或不可用",
|
272
|
+
404,
|
273
|
+
ErrorCode.INVALID_MODEL,
|
274
|
+
{"model": model_name, "suggestion": "请查看 /api/v1/models 获取可用模型列表"}
|
275
|
+
)
|
276
|
+
|
277
|
+
def service_unavailable_error(service_name: str, fallback: bool = False) -> HTTPException:
|
278
|
+
status_code = 200 if fallback else 503
|
279
|
+
return create_http_exception(
|
280
|
+
f"{service_name}服务{'已切换到备用模式' if fallback else '暂时不可用'}",
|
281
|
+
status_code,
|
282
|
+
ErrorCode.SERVICE_UNAVAILABLE,
|
283
|
+
{"service": service_name, "fallback_mode": fallback}
|
284
|
+
)
|
@@ -15,34 +15,29 @@ import logging
|
|
15
15
|
import os
|
16
16
|
from typing import Dict, Any, Optional
|
17
17
|
|
18
|
-
from .routes import health, unified, deployments, logs, analytics, settings, evaluations
|
18
|
+
from .routes import health, unified, deployments, logs, analytics, settings, inference_monitoring, webhooks, tenants # config, training, annotation, and evaluations temporarily disabled
|
19
19
|
from .middleware.request_logger import RequestLoggerMiddleware
|
20
20
|
from .middleware.security import setup_security_middleware, check_redis_health
|
21
|
+
from .middleware.tenant_context import TenantContextMiddleware
|
21
22
|
from .startup import run_startup_initialization
|
23
|
+
from ...core.logging import api_logger, setup_logger
|
22
24
|
|
23
|
-
logger = logging
|
25
|
+
logger = api_logger # Use Loki-configured logger instead of standard logging
|
24
26
|
|
25
27
|
def configure_logging():
|
26
|
-
"""Configure logging based on environment variables
|
28
|
+
"""Configure logging based on environment variables
|
29
|
+
|
30
|
+
Note: Loki integration is handled automatically by isa_model.core.logging.setup_logger
|
31
|
+
This function only sets log levels for existing loggers.
|
32
|
+
"""
|
27
33
|
log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
|
28
34
|
verbose_logging = os.getenv('VERBOSE_LOGGING', 'false').lower() == 'true'
|
29
|
-
|
35
|
+
|
30
36
|
# Set log level
|
31
37
|
level = getattr(logging, log_level, logging.INFO)
|
32
|
-
|
33
|
-
#
|
34
|
-
|
35
|
-
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
|
36
|
-
else:
|
37
|
-
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
38
|
-
|
39
|
-
# Configure root logger
|
40
|
-
logging.basicConfig(
|
41
|
-
level=level,
|
42
|
-
format=log_format,
|
43
|
-
datefmt='%Y-%m-%d %H:%M:%S',
|
44
|
-
force=True # Override existing configuration
|
45
|
-
)
|
38
|
+
|
39
|
+
# Note: Don't call logging.basicConfig() here as it conflicts with Loki handlers
|
40
|
+
# The Loki logger (api_logger) is already configured with proper handlers
|
46
41
|
|
47
42
|
# Set uvicorn logger level to match
|
48
43
|
uvicorn_logger = logging.getLogger("uvicorn")
|
@@ -52,6 +47,69 @@ def configure_logging():
|
|
52
47
|
app_logger = logging.getLogger("isa_model")
|
53
48
|
app_logger.setLevel(level)
|
54
49
|
|
50
|
+
# Suppress verbose third-party library logs
|
51
|
+
# HTTP libraries - only show WARNING and above
|
52
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
53
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
54
|
+
logging.getLogger("httpcore.http11").setLevel(logging.WARNING)
|
55
|
+
logging.getLogger("httpcore.connection").setLevel(logging.WARNING)
|
56
|
+
|
57
|
+
# Database and ORM libraries
|
58
|
+
logging.getLogger("supabase").setLevel(logging.WARNING)
|
59
|
+
logging.getLogger("postgrest").setLevel(logging.WARNING)
|
60
|
+
|
61
|
+
# AI/ML libraries
|
62
|
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
63
|
+
logging.getLogger("anthropic").setLevel(logging.WARNING)
|
64
|
+
logging.getLogger("google").setLevel(logging.WARNING)
|
65
|
+
logging.getLogger("google.cloud").setLevel(logging.WARNING)
|
66
|
+
logging.getLogger("google.generativeai").setLevel(logging.WARNING)
|
67
|
+
|
68
|
+
# Other verbose libraries
|
69
|
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
70
|
+
logging.getLogger("requests").setLevel(logging.WARNING)
|
71
|
+
logging.getLogger("aiohttp").setLevel(logging.WARNING)
|
72
|
+
|
73
|
+
# Reduce startup debug logs
|
74
|
+
if not verbose_logging:
|
75
|
+
# Reduce startup initialization debug logs
|
76
|
+
startup_logger = logging.getLogger("isa_model.serving.api.startup")
|
77
|
+
startup_logger.setLevel(logging.WARNING)
|
78
|
+
|
79
|
+
# Reduce model registry debug logs
|
80
|
+
model_logger = logging.getLogger("isa_model.core.models.model_repo")
|
81
|
+
model_logger.setLevel(logging.WARNING)
|
82
|
+
|
83
|
+
# Reduce intelligent selector debug logs
|
84
|
+
selector_logger = logging.getLogger("isa_model.core.services.intelligent_model_selector")
|
85
|
+
selector_logger.setLevel(logging.WARNING)
|
86
|
+
|
87
|
+
# Training module removed - logger configuration no longer needed
|
88
|
+
|
89
|
+
# Reduce knowledge base logs
|
90
|
+
kb_logger = logging.getLogger("isa_model.core.knowledge_base")
|
91
|
+
kb_logger.setLevel(logging.WARNING)
|
92
|
+
|
93
|
+
# Reduce database migration logs
|
94
|
+
migration_logger = logging.getLogger("isa_model.core.database.migrations")
|
95
|
+
migration_logger.setLevel(logging.WARNING)
|
96
|
+
|
97
|
+
# Reduce AI factory logs
|
98
|
+
ai_factory_logger = logging.getLogger("isa_model.inference.ai_factory")
|
99
|
+
ai_factory_logger.setLevel(logging.WARNING)
|
100
|
+
|
101
|
+
# Reduce embedding service logs
|
102
|
+
embed_logger = logging.getLogger("isa_model.inference.services.embedding")
|
103
|
+
embed_logger.setLevel(logging.WARNING)
|
104
|
+
|
105
|
+
# Reduce config manager logs
|
106
|
+
config_logger = logging.getLogger("isa_model.core.config")
|
107
|
+
config_logger.setLevel(logging.WARNING)
|
108
|
+
|
109
|
+
# Reduce core integration logs
|
110
|
+
core_logger = logging.getLogger("isa_model.core")
|
111
|
+
core_logger.setLevel(logging.WARNING)
|
112
|
+
|
55
113
|
logger.info(f"Logging configured - Level: {log_level}, Verbose: {verbose_logging}")
|
56
114
|
|
57
115
|
def create_app(config: Dict[str, Any] = None) -> FastAPI:
|
@@ -79,6 +137,9 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
|
|
79
137
|
# This includes CORS, rate limiting, security headers, request validation
|
80
138
|
setup_security_middleware(app)
|
81
139
|
|
140
|
+
# Add tenant context middleware (before request logger)
|
141
|
+
app.add_middleware(TenantContextMiddleware)
|
142
|
+
|
82
143
|
# Add custom middleware
|
83
144
|
app.add_middleware(RequestLoggerMiddleware)
|
84
145
|
|
@@ -112,8 +173,26 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
|
|
112
173
|
# SETTINGS API - Configuration and API key management
|
113
174
|
app.include_router(settings.router, prefix="/api/v1/settings", tags=["settings"])
|
114
175
|
|
115
|
-
# EVALUATIONS API -
|
116
|
-
app.include_router(evaluations.router, prefix="/api/v1/evaluations", tags=["evaluations"])
|
176
|
+
# EVALUATIONS API - Temporarily disabled for staging optimization
|
177
|
+
# app.include_router(evaluations.router, prefix="/api/v1/evaluations", tags=["evaluations"])
|
178
|
+
|
179
|
+
# INFERENCE MONITORING API - InfluxDB-based inference monitoring and analytics
|
180
|
+
app.include_router(inference_monitoring.router, prefix="/api/v1/monitoring", tags=["monitoring"])
|
181
|
+
|
182
|
+
# TRAINING API - Disabled for staging optimization
|
183
|
+
# app.include_router(training.router, prefix="/api/v1/training", tags=["training"])
|
184
|
+
|
185
|
+
# WEBHOOKS API - Webhook management and notifications
|
186
|
+
app.include_router(webhooks.router, prefix="/api/v1/webhooks", tags=["webhooks"])
|
187
|
+
|
188
|
+
# TENANTS API - Multi-tenancy and organization management
|
189
|
+
app.include_router(tenants.router, prefix="/api/v1/tenants", tags=["tenants"])
|
190
|
+
|
191
|
+
# ANNOTATION API - Temporarily disabled for staging optimization
|
192
|
+
# app.include_router(annotation.router, prefix="/api/v1/annotations", tags=["annotations"])
|
193
|
+
|
194
|
+
# CONFIG API - Configuration management
|
195
|
+
# app.include_router(config.router, prefix="/api/v1/config", tags=["config"]) # Temporarily disabled
|
117
196
|
|
118
197
|
# Mount static files
|
119
198
|
static_path = os.path.join(os.path.dirname(__file__), "../static")
|
@@ -149,7 +228,45 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
|
|
149
228
|
logger.info("✅ Application startup completed successfully")
|
150
229
|
except Exception as e:
|
151
230
|
logger.error(f"❌ Application startup failed: {e}")
|
152
|
-
|
231
|
+
logger.error("⚠️ Server will continue but may have reduced functionality")
|
232
|
+
# Store startup failure state for health checks
|
233
|
+
app.state.startup_failed = True
|
234
|
+
app.state.startup_error = str(e)
|
235
|
+
# Continue running to allow debugging and partial functionality
|
236
|
+
|
237
|
+
# Add shutdown event handler
|
238
|
+
@app.on_event("shutdown")
|
239
|
+
async def shutdown_event():
|
240
|
+
logger.info("🧹 Starting application shutdown cleanup...")
|
241
|
+
try:
|
242
|
+
# Close database connections
|
243
|
+
try:
|
244
|
+
from .dependencies.database import close_database_pool
|
245
|
+
await close_database_pool()
|
246
|
+
logger.info("✅ Database connections closed")
|
247
|
+
except Exception as e:
|
248
|
+
logger.error(f"❌ Error closing database connections: {e}")
|
249
|
+
|
250
|
+
# Clean up AI factory and services
|
251
|
+
try:
|
252
|
+
from ...inference.ai_factory import AIFactory
|
253
|
+
factory = AIFactory.get_instance()
|
254
|
+
await factory.cleanup()
|
255
|
+
logger.info("✅ AI Factory cleaned up")
|
256
|
+
except Exception as e:
|
257
|
+
logger.error(f"❌ Error cleaning up AI Factory: {e}")
|
258
|
+
|
259
|
+
# Clean up startup initializer resources
|
260
|
+
try:
|
261
|
+
from .startup import startup_initializer
|
262
|
+
await startup_initializer.cleanup()
|
263
|
+
logger.info("✅ Startup resources cleaned up")
|
264
|
+
except Exception as e:
|
265
|
+
logger.error(f"❌ Error cleaning up startup resources: {e}")
|
266
|
+
|
267
|
+
logger.info("✅ Application shutdown completed successfully")
|
268
|
+
except Exception as e:
|
269
|
+
logger.error(f"❌ Error during application shutdown: {e}")
|
153
270
|
|
154
271
|
return app
|
155
272
|
|
@@ -158,4 +275,37 @@ app = create_app()
|
|
158
275
|
|
159
276
|
if __name__ == "__main__":
|
160
277
|
import uvicorn
|
161
|
-
|
278
|
+
import os
|
279
|
+
import signal
|
280
|
+
|
281
|
+
port = int(os.getenv("PORT", 8082))
|
282
|
+
|
283
|
+
# Configure uvicorn for graceful shutdown
|
284
|
+
config = uvicorn.Config(
|
285
|
+
app,
|
286
|
+
host="0.0.0.0",
|
287
|
+
port=port,
|
288
|
+
log_level=os.getenv("LOG_LEVEL", "info").lower(),
|
289
|
+
access_log=True,
|
290
|
+
loop="asyncio",
|
291
|
+
# Graceful shutdown configuration
|
292
|
+
timeout_keep_alive=30, # Keep connections alive for 30 seconds
|
293
|
+
timeout_graceful_shutdown=30, # 30 second graceful shutdown timeout
|
294
|
+
)
|
295
|
+
|
296
|
+
server = uvicorn.Server(config)
|
297
|
+
|
298
|
+
# Setup signal handlers for graceful shutdown
|
299
|
+
def signal_handler(signum, frame):
|
300
|
+
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
301
|
+
server.should_exit = True
|
302
|
+
|
303
|
+
signal.signal(signal.SIGINT, signal_handler)
|
304
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
305
|
+
|
306
|
+
try:
|
307
|
+
server.run()
|
308
|
+
except KeyboardInterrupt:
|
309
|
+
logger.info("Keyboard interrupt received, shutting down...")
|
310
|
+
finally:
|
311
|
+
logger.info("Server shutdown complete")
|
@@ -35,10 +35,16 @@ class APIKeyManager:
|
|
35
35
|
# Load API keys first to check if auth should be enabled
|
36
36
|
self.load_api_keys()
|
37
37
|
|
38
|
-
# Determine auth state:
|
38
|
+
# Determine auth state: check explicit setting first, then auto-detect from keys
|
39
39
|
explicit_auth = AUTH_ENABLED
|
40
40
|
has_keys = len(self.api_keys) > 0
|
41
|
-
|
41
|
+
|
42
|
+
# If explicitly disabled (REQUIRE_API_KEYS=false), respect that setting
|
43
|
+
if os.getenv("REQUIRE_API_KEYS", "").lower() == "false":
|
44
|
+
self.auth_enabled = False
|
45
|
+
else:
|
46
|
+
# Otherwise, enable if explicitly set OR if API keys exist
|
47
|
+
self.auth_enabled = explicit_auth or has_keys
|
42
48
|
|
43
49
|
if self.auth_enabled:
|
44
50
|
logger.info(f"API Key authentication is ENABLED ({'explicit' if explicit_auth else 'auto-detected from keys'})")
|