isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
1
+ """
2
+ Database Connection Dependencies
3
+
4
+ Provides database connections and transaction management
5
+ with automatic tenant context handling.
6
+ """
7
+
8
+ import asyncio
9
+ import asyncpg
10
+ import logging
11
+ import os
12
+ from contextlib import asynccontextmanager
13
+ from typing import Optional
14
+
15
+ from ..middleware.tenant_context import get_tenant_context
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Global connection pool
20
+ _connection_pool: Optional[asyncpg.Pool] = None
21
+
22
+ async def initialize_database_pool():
23
+ """Initialize the database connection pool"""
24
+ global _connection_pool
25
+
26
+ if _connection_pool:
27
+ return _connection_pool
28
+
29
+ database_url = os.getenv("DATABASE_URL")
30
+ if not database_url:
31
+ raise RuntimeError("DATABASE_URL environment variable not set")
32
+
33
+ try:
34
+ _connection_pool = await asyncpg.create_pool(
35
+ database_url,
36
+ min_size=5,
37
+ max_size=20,
38
+ command_timeout=60,
39
+ server_settings={
40
+ 'search_path': 'dev',
41
+ 'timezone': 'UTC'
42
+ }
43
+ )
44
+
45
+ logger.info("Database connection pool initialized")
46
+ return _connection_pool
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to initialize database pool: {e}")
50
+ raise
51
+
52
+ async def close_database_pool():
53
+ """Close the database connection pool"""
54
+ global _connection_pool
55
+
56
+ if _connection_pool:
57
+ await _connection_pool.close()
58
+ _connection_pool = None
59
+ logger.info("Database connection pool closed")
60
+
61
+ @asynccontextmanager
62
+ async def get_database_connection():
63
+ """
64
+ Get a database connection from the pool with automatic tenant context.
65
+
66
+ This context manager automatically:
67
+ 1. Gets a connection from the pool
68
+ 2. Sets the tenant context if available
69
+ 3. Handles transactions
70
+ 4. Returns the connection to the pool
71
+ """
72
+ if not _connection_pool:
73
+ await initialize_database_pool()
74
+
75
+ async with _connection_pool.acquire() as conn:
76
+ try:
77
+ # Set tenant context if available
78
+ tenant_context = get_tenant_context()
79
+ if tenant_context:
80
+ await conn.execute(
81
+ "SELECT set_config('app.current_organization_id', $1, true)",
82
+ tenant_context.organization_id
83
+ )
84
+
85
+ yield conn
86
+
87
+ except Exception as e:
88
+ logger.error(f"Database operation error: {e}")
89
+ raise
90
+ finally:
91
+ # Clear tenant context
92
+ try:
93
+ await conn.execute(
94
+ "SELECT set_config('app.current_organization_id', '', true)"
95
+ )
96
+ except:
97
+ pass # Ignore cleanup errors
98
+
99
+ @asynccontextmanager
100
+ async def get_database_transaction():
101
+ """
102
+ Get a database connection with an explicit transaction.
103
+ """
104
+ async with get_database_connection() as conn:
105
+ async with conn.transaction():
106
+ yield conn
107
+
108
+ async def execute_query(query: str, *args, fetch_type: str = "fetch"):
109
+ """
110
+ Execute a query with automatic connection management.
111
+
112
+ Args:
113
+ query: SQL query
114
+ *args: Query parameters
115
+ fetch_type: 'fetch', 'fetchrow', 'fetchval', or 'execute'
116
+ """
117
+ async with get_database_connection() as conn:
118
+ if fetch_type == "fetch":
119
+ return await conn.fetch(query, *args)
120
+ elif fetch_type == "fetchrow":
121
+ return await conn.fetchrow(query, *args)
122
+ elif fetch_type == "fetchval":
123
+ return await conn.fetchval(query, *args)
124
+ elif fetch_type == "execute":
125
+ return await conn.execute(query, *args)
126
+ else:
127
+ raise ValueError(f"Invalid fetch_type: {fetch_type}")
128
+
129
+ # FastAPI dependency functions
130
+
131
+ async def get_db_connection():
132
+ """FastAPI dependency to get database connection"""
133
+ async with get_database_connection() as conn:
134
+ yield conn
135
+
136
+ async def get_db_transaction():
137
+ """FastAPI dependency to get database transaction"""
138
+ async with get_database_transaction() as conn:
139
+ yield conn
@@ -0,0 +1,284 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Improved Error Handling for ISA Model API
6
+ Provides consistent error responses and better user experience
7
+ """
8
+
9
+ import logging
10
+ import traceback
11
+ from typing import Dict, Any, Optional, Union
12
+ from enum import Enum
13
+ from fastapi import HTTPException, status
14
+ from fastapi.responses import JSONResponse
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class ErrorCode(str, Enum):
19
+ """Standard error codes for ISA Model API"""
20
+
21
+ # Input/Request errors (4xx)
22
+ INVALID_INPUT = "INVALID_INPUT"
23
+ MISSING_PARAMETER = "MISSING_PARAMETER"
24
+ INVALID_MODEL = "INVALID_MODEL"
25
+ INVALID_PROVIDER = "INVALID_PROVIDER"
26
+ INVALID_SERVICE_TYPE = "INVALID_SERVICE_TYPE"
27
+ INVALID_TASK = "INVALID_TASK"
28
+ UNSUPPORTED_FORMAT = "UNSUPPORTED_FORMAT"
29
+ FILE_TOO_LARGE = "FILE_TOO_LARGE"
30
+ RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED"
31
+ AUTHENTICATION_FAILED = "AUTHENTICATION_FAILED"
32
+ AUTHORIZATION_FAILED = "AUTHORIZATION_FAILED"
33
+
34
+ # Service errors (5xx)
35
+ SERVICE_UNAVAILABLE = "SERVICE_UNAVAILABLE"
36
+ MODEL_LOAD_FAILED = "MODEL_LOAD_FAILED"
37
+ INFERENCE_FAILED = "INFERENCE_FAILED"
38
+ EXTERNAL_API_ERROR = "EXTERNAL_API_ERROR"
39
+ DATABASE_ERROR = "DATABASE_ERROR"
40
+ TIMEOUT_ERROR = "TIMEOUT_ERROR"
41
+ INTERNAL_ERROR = "INTERNAL_ERROR"
42
+
43
+ # Configuration errors
44
+ CONFIG_ERROR = "CONFIG_ERROR"
45
+ API_KEY_MISSING = "API_KEY_MISSING"
46
+ API_KEY_INVALID = "API_KEY_INVALID"
47
+
48
+ class ISAModelError(Exception):
49
+ """Base exception for ISA Model errors"""
50
+
51
+ def __init__(
52
+ self,
53
+ message: str,
54
+ error_code: ErrorCode,
55
+ status_code: int = 500,
56
+ details: Optional[Dict[str, Any]] = None,
57
+ user_message: Optional[str] = None
58
+ ):
59
+ self.message = message
60
+ self.error_code = error_code
61
+ self.status_code = status_code
62
+ self.details = details or {}
63
+ self.user_message = user_message or self._generate_user_message()
64
+ super().__init__(self.message)
65
+
66
+ def _generate_user_message(self) -> str:
67
+ """Generate user-friendly error message"""
68
+ user_messages = {
69
+ ErrorCode.INVALID_INPUT: "请检查您的输入数据格式是否正确。",
70
+ ErrorCode.MISSING_PARAMETER: "请提供必需的参数。",
71
+ ErrorCode.INVALID_MODEL: "指定的模型不存在或不可用,请选择其他模型。",
72
+ ErrorCode.INVALID_PROVIDER: "指定的提供商不支持,请选择其他提供商。",
73
+ ErrorCode.INVALID_SERVICE_TYPE: "不支持的服务类型,请选择text、vision、audio、image或embedding。",
74
+ ErrorCode.INVALID_TASK: "不支持的任务类型,请查看API文档了解支持的任务。",
75
+ ErrorCode.UNSUPPORTED_FORMAT: "不支持的文件格式,请使用支持的格式。",
76
+ ErrorCode.FILE_TOO_LARGE: "文件太大,请压缩后重试。",
77
+ ErrorCode.RATE_LIMIT_EXCEEDED: "请求过于频繁,请稍后再试。",
78
+ ErrorCode.AUTHENTICATION_FAILED: "身份验证失败,请检查您的凭据。",
79
+ ErrorCode.AUTHORIZATION_FAILED: "您没有权限执行此操作。",
80
+ ErrorCode.SERVICE_UNAVAILABLE: "服务暂时不可用,请稍后再试。",
81
+ ErrorCode.MODEL_LOAD_FAILED: "模型加载失败,请稍后再试或选择其他模型。",
82
+ ErrorCode.INFERENCE_FAILED: "推理过程出现错误,请重试。",
83
+ ErrorCode.EXTERNAL_API_ERROR: "外部服务出现问题,请稍后再试。",
84
+ ErrorCode.DATABASE_ERROR: "数据库连接问题,请稍后再试。",
85
+ ErrorCode.TIMEOUT_ERROR: "请求超时,请稍后再试。",
86
+ ErrorCode.INTERNAL_ERROR: "内部服务器错误,请联系技术支持。",
87
+ ErrorCode.CONFIG_ERROR: "配置错误,请联系管理员。",
88
+ ErrorCode.API_KEY_MISSING: "缺少API密钥,请在配置中提供。",
89
+ ErrorCode.API_KEY_INVALID: "API密钥无效,请检查配置。"
90
+ }
91
+
92
+ return user_messages.get(self.error_code, "出现了未知错误,请稍后再试。")
93
+
94
+ def to_dict(self) -> Dict[str, Any]:
95
+ """Convert error to dictionary for API response"""
96
+ return {
97
+ "error_code": self.error_code.value,
98
+ "message": self.message,
99
+ "user_message": self.user_message,
100
+ "status_code": self.status_code,
101
+ "details": self.details
102
+ }
103
+
104
+ def create_error_response(
105
+ error: Union[Exception, ISAModelError, str],
106
+ status_code: Optional[int] = None,
107
+ error_code: Optional[ErrorCode] = None,
108
+ details: Optional[Dict[str, Any]] = None,
109
+ include_traceback: bool = False
110
+ ) -> Dict[str, Any]:
111
+ """Create standardized error response"""
112
+
113
+ if isinstance(error, ISAModelError):
114
+ response = {
115
+ "success": False,
116
+ "error": error.message,
117
+ "error_code": error.error_code.value,
118
+ "user_message": error.user_message,
119
+ "details": error.details,
120
+ "metadata": {
121
+ "error_type": "ISAModelError",
122
+ "status_code": error.status_code
123
+ }
124
+ }
125
+ elif isinstance(error, Exception):
126
+ # Convert generic exception to ISAModelError
127
+ error_message = str(error)
128
+ final_error_code = error_code or ErrorCode.INTERNAL_ERROR
129
+ final_status_code = status_code or 500
130
+
131
+ isa_error = ISAModelError(
132
+ message=error_message,
133
+ error_code=final_error_code,
134
+ status_code=final_status_code,
135
+ details=details
136
+ )
137
+
138
+ response = {
139
+ "success": False,
140
+ "error": isa_error.message,
141
+ "error_code": isa_error.error_code.value,
142
+ "user_message": isa_error.user_message,
143
+ "details": isa_error.details,
144
+ "metadata": {
145
+ "error_type": type(error).__name__,
146
+ "status_code": isa_error.status_code
147
+ }
148
+ }
149
+
150
+ if include_traceback:
151
+ response["metadata"]["traceback"] = traceback.format_exc()
152
+
153
+ else:
154
+ # String error
155
+ final_error_code = error_code or ErrorCode.INTERNAL_ERROR
156
+ final_status_code = status_code or 500
157
+
158
+ isa_error = ISAModelError(
159
+ message=str(error),
160
+ error_code=final_error_code,
161
+ status_code=final_status_code,
162
+ details=details or {}
163
+ )
164
+
165
+ response = {
166
+ "success": False,
167
+ "error": isa_error.message,
168
+ "error_code": isa_error.error_code.value,
169
+ "user_message": isa_error.user_message,
170
+ "details": isa_error.details,
171
+ "metadata": {
172
+ "error_type": "StringError",
173
+ "status_code": isa_error.status_code
174
+ }
175
+ }
176
+
177
+ return response
178
+
179
+ def handle_validation_error(exc: Exception) -> Dict[str, Any]:
180
+ """Handle Pydantic validation errors"""
181
+ details = {}
182
+
183
+ if hasattr(exc, 'errors'):
184
+ # Pydantic validation error
185
+ validation_errors = []
186
+ for error in exc.errors():
187
+ field = " -> ".join(str(loc) for loc in error.get('loc', []))
188
+ message = error.get('msg', '')
189
+ validation_errors.append({
190
+ "field": field,
191
+ "message": message,
192
+ "type": error.get('type', '')
193
+ })
194
+ details["validation_errors"] = validation_errors
195
+
196
+ return create_error_response(
197
+ error="请求数据格式不正确",
198
+ status_code=400,
199
+ error_code=ErrorCode.INVALID_INPUT,
200
+ details=details
201
+ )
202
+
203
+ def handle_service_error(
204
+ service_name: str,
205
+ error: Exception,
206
+ fallback_available: bool = False
207
+ ) -> Dict[str, Any]:
208
+ """Handle service-specific errors with context"""
209
+
210
+ details = {
211
+ "service": service_name,
212
+ "fallback_available": fallback_available
213
+ }
214
+
215
+ # Determine error code based on service and error type
216
+ if "connection" in str(error).lower():
217
+ error_code = ErrorCode.EXTERNAL_API_ERROR
218
+ if fallback_available:
219
+ user_message = f"{service_name}服务暂时不可用,已切换到备用服务。"
220
+ else:
221
+ user_message = f"{service_name}服务连接失败,请稍后再试。"
222
+ elif "timeout" in str(error).lower():
223
+ error_code = ErrorCode.TIMEOUT_ERROR
224
+ user_message = f"{service_name}服务响应超时,请稍后再试。"
225
+ elif "authentication" in str(error).lower() or "api key" in str(error).lower():
226
+ error_code = ErrorCode.API_KEY_INVALID
227
+ user_message = f"{service_name}服务认证失败,请检查API密钥配置。"
228
+ elif "rate limit" in str(error).lower():
229
+ error_code = ErrorCode.RATE_LIMIT_EXCEEDED
230
+ user_message = f"{service_name}服务请求频率过高,请稍后再试。"
231
+ else:
232
+ error_code = ErrorCode.SERVICE_UNAVAILABLE
233
+ if fallback_available:
234
+ user_message = f"{service_name}服务出现问题,已切换到备用服务。"
235
+ else:
236
+ user_message = f"{service_name}服务暂时不可用,请稍后再试。"
237
+
238
+ return create_error_response(
239
+ error=str(error),
240
+ status_code=503 if not fallback_available else 200,
241
+ error_code=error_code,
242
+ details=details
243
+ )
244
+
245
+ def create_http_exception(
246
+ message: str,
247
+ status_code: int = 500,
248
+ error_code: Optional[ErrorCode] = None,
249
+ details: Optional[Dict[str, Any]] = None
250
+ ) -> HTTPException:
251
+ """Create HTTPException with standardized error format"""
252
+
253
+ error_response = create_error_response(
254
+ error=message,
255
+ status_code=status_code,
256
+ error_code=error_code,
257
+ details=details
258
+ )
259
+
260
+ return HTTPException(
261
+ status_code=status_code,
262
+ detail=error_response
263
+ )
264
+
265
+ # Convenience functions for common errors
266
+ def invalid_input_error(message: str, details: Optional[Dict] = None) -> HTTPException:
267
+ return create_http_exception(message, 400, ErrorCode.INVALID_INPUT, details)
268
+
269
+ def model_not_found_error(model_name: str) -> HTTPException:
270
+ return create_http_exception(
271
+ f"模型 '{model_name}' 不存在或不可用",
272
+ 404,
273
+ ErrorCode.INVALID_MODEL,
274
+ {"model": model_name, "suggestion": "请查看 /api/v1/models 获取可用模型列表"}
275
+ )
276
+
277
+ def service_unavailable_error(service_name: str, fallback: bool = False) -> HTTPException:
278
+ status_code = 200 if fallback else 503
279
+ return create_http_exception(
280
+ f"{service_name}服务{'已切换到备用模式' if fallback else '暂时不可用'}",
281
+ status_code,
282
+ ErrorCode.SERVICE_UNAVAILABLE,
283
+ {"service": service_name, "fallback_mode": fallback}
284
+ )
@@ -15,34 +15,29 @@ import logging
15
15
  import os
16
16
  from typing import Dict, Any, Optional
17
17
 
18
- from .routes import health, unified, deployments, logs, analytics, settings, evaluations
18
+ from .routes import health, unified, deployments, logs, analytics, settings, inference_monitoring, webhooks, tenants # config, training, annotation, and evaluations temporarily disabled
19
19
  from .middleware.request_logger import RequestLoggerMiddleware
20
20
  from .middleware.security import setup_security_middleware, check_redis_health
21
+ from .middleware.tenant_context import TenantContextMiddleware
21
22
  from .startup import run_startup_initialization
23
+ from ...core.logging import api_logger, setup_logger
22
24
 
23
- logger = logging.getLogger(__name__)
25
+ logger = api_logger # Use Loki-configured logger instead of standard logging
24
26
 
25
27
  def configure_logging():
26
- """Configure logging based on environment variables"""
28
+ """Configure logging based on environment variables
29
+
30
+ Note: Loki integration is handled automatically by isa_model.core.logging.setup_logger
31
+ This function only sets log levels for existing loggers.
32
+ """
27
33
  log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
28
34
  verbose_logging = os.getenv('VERBOSE_LOGGING', 'false').lower() == 'true'
29
-
35
+
30
36
  # Set log level
31
37
  level = getattr(logging, log_level, logging.INFO)
32
-
33
- # Configure format
34
- if verbose_logging:
35
- log_format = '%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
36
- else:
37
- log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
38
-
39
- # Configure root logger
40
- logging.basicConfig(
41
- level=level,
42
- format=log_format,
43
- datefmt='%Y-%m-%d %H:%M:%S',
44
- force=True # Override existing configuration
45
- )
38
+
39
+ # Note: Don't call logging.basicConfig() here as it conflicts with Loki handlers
40
+ # The Loki logger (api_logger) is already configured with proper handlers
46
41
 
47
42
  # Set uvicorn logger level to match
48
43
  uvicorn_logger = logging.getLogger("uvicorn")
@@ -52,6 +47,69 @@ def configure_logging():
52
47
  app_logger = logging.getLogger("isa_model")
53
48
  app_logger.setLevel(level)
54
49
 
50
+ # Suppress verbose third-party library logs
51
+ # HTTP libraries - only show WARNING and above
52
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
53
+ logging.getLogger("httpx").setLevel(logging.WARNING)
54
+ logging.getLogger("httpcore.http11").setLevel(logging.WARNING)
55
+ logging.getLogger("httpcore.connection").setLevel(logging.WARNING)
56
+
57
+ # Database and ORM libraries
58
+ logging.getLogger("supabase").setLevel(logging.WARNING)
59
+ logging.getLogger("postgrest").setLevel(logging.WARNING)
60
+
61
+ # AI/ML libraries
62
+ logging.getLogger("openai").setLevel(logging.WARNING)
63
+ logging.getLogger("anthropic").setLevel(logging.WARNING)
64
+ logging.getLogger("google").setLevel(logging.WARNING)
65
+ logging.getLogger("google.cloud").setLevel(logging.WARNING)
66
+ logging.getLogger("google.generativeai").setLevel(logging.WARNING)
67
+
68
+ # Other verbose libraries
69
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
70
+ logging.getLogger("requests").setLevel(logging.WARNING)
71
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
72
+
73
+ # Reduce startup debug logs
74
+ if not verbose_logging:
75
+ # Reduce startup initialization debug logs
76
+ startup_logger = logging.getLogger("isa_model.serving.api.startup")
77
+ startup_logger.setLevel(logging.WARNING)
78
+
79
+ # Reduce model registry debug logs
80
+ model_logger = logging.getLogger("isa_model.core.models.model_repo")
81
+ model_logger.setLevel(logging.WARNING)
82
+
83
+ # Reduce intelligent selector debug logs
84
+ selector_logger = logging.getLogger("isa_model.core.services.intelligent_model_selector")
85
+ selector_logger.setLevel(logging.WARNING)
86
+
87
+ # Training module removed - logger configuration no longer needed
88
+
89
+ # Reduce knowledge base logs
90
+ kb_logger = logging.getLogger("isa_model.core.knowledge_base")
91
+ kb_logger.setLevel(logging.WARNING)
92
+
93
+ # Reduce database migration logs
94
+ migration_logger = logging.getLogger("isa_model.core.database.migrations")
95
+ migration_logger.setLevel(logging.WARNING)
96
+
97
+ # Reduce AI factory logs
98
+ ai_factory_logger = logging.getLogger("isa_model.inference.ai_factory")
99
+ ai_factory_logger.setLevel(logging.WARNING)
100
+
101
+ # Reduce embedding service logs
102
+ embed_logger = logging.getLogger("isa_model.inference.services.embedding")
103
+ embed_logger.setLevel(logging.WARNING)
104
+
105
+ # Reduce config manager logs
106
+ config_logger = logging.getLogger("isa_model.core.config")
107
+ config_logger.setLevel(logging.WARNING)
108
+
109
+ # Reduce core integration logs
110
+ core_logger = logging.getLogger("isa_model.core")
111
+ core_logger.setLevel(logging.WARNING)
112
+
55
113
  logger.info(f"Logging configured - Level: {log_level}, Verbose: {verbose_logging}")
56
114
 
57
115
  def create_app(config: Dict[str, Any] = None) -> FastAPI:
@@ -79,6 +137,9 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
79
137
  # This includes CORS, rate limiting, security headers, request validation
80
138
  setup_security_middleware(app)
81
139
 
140
+ # Add tenant context middleware (before request logger)
141
+ app.add_middleware(TenantContextMiddleware)
142
+
82
143
  # Add custom middleware
83
144
  app.add_middleware(RequestLoggerMiddleware)
84
145
 
@@ -112,8 +173,26 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
112
173
  # SETTINGS API - Configuration and API key management
113
174
  app.include_router(settings.router, prefix="/api/v1/settings", tags=["settings"])
114
175
 
115
- # EVALUATIONS API - Model evaluation and benchmarking
116
- app.include_router(evaluations.router, prefix="/api/v1/evaluations", tags=["evaluations"])
176
+ # EVALUATIONS API - Temporarily disabled for staging optimization
177
+ # app.include_router(evaluations.router, prefix="/api/v1/evaluations", tags=["evaluations"])
178
+
179
+ # INFERENCE MONITORING API - InfluxDB-based inference monitoring and analytics
180
+ app.include_router(inference_monitoring.router, prefix="/api/v1/monitoring", tags=["monitoring"])
181
+
182
+ # TRAINING API - Disabled for staging optimization
183
+ # app.include_router(training.router, prefix="/api/v1/training", tags=["training"])
184
+
185
+ # WEBHOOKS API - Webhook management and notifications
186
+ app.include_router(webhooks.router, prefix="/api/v1/webhooks", tags=["webhooks"])
187
+
188
+ # TENANTS API - Multi-tenancy and organization management
189
+ app.include_router(tenants.router, prefix="/api/v1/tenants", tags=["tenants"])
190
+
191
+ # ANNOTATION API - Temporarily disabled for staging optimization
192
+ # app.include_router(annotation.router, prefix="/api/v1/annotations", tags=["annotations"])
193
+
194
+ # CONFIG API - Configuration management
195
+ # app.include_router(config.router, prefix="/api/v1/config", tags=["config"]) # Temporarily disabled
117
196
 
118
197
  # Mount static files
119
198
  static_path = os.path.join(os.path.dirname(__file__), "../static")
@@ -149,7 +228,45 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
149
228
  logger.info("✅ Application startup completed successfully")
150
229
  except Exception as e:
151
230
  logger.error(f"❌ Application startup failed: {e}")
152
- # Don't raise - let the app start anyway
231
+ logger.error("⚠️ Server will continue but may have reduced functionality")
232
+ # Store startup failure state for health checks
233
+ app.state.startup_failed = True
234
+ app.state.startup_error = str(e)
235
+ # Continue running to allow debugging and partial functionality
236
+
237
+ # Add shutdown event handler
238
+ @app.on_event("shutdown")
239
+ async def shutdown_event():
240
+ logger.info("🧹 Starting application shutdown cleanup...")
241
+ try:
242
+ # Close database connections
243
+ try:
244
+ from .dependencies.database import close_database_pool
245
+ await close_database_pool()
246
+ logger.info("✅ Database connections closed")
247
+ except Exception as e:
248
+ logger.error(f"❌ Error closing database connections: {e}")
249
+
250
+ # Clean up AI factory and services
251
+ try:
252
+ from ...inference.ai_factory import AIFactory
253
+ factory = AIFactory.get_instance()
254
+ await factory.cleanup()
255
+ logger.info("✅ AI Factory cleaned up")
256
+ except Exception as e:
257
+ logger.error(f"❌ Error cleaning up AI Factory: {e}")
258
+
259
+ # Clean up startup initializer resources
260
+ try:
261
+ from .startup import startup_initializer
262
+ await startup_initializer.cleanup()
263
+ logger.info("✅ Startup resources cleaned up")
264
+ except Exception as e:
265
+ logger.error(f"❌ Error cleaning up startup resources: {e}")
266
+
267
+ logger.info("✅ Application shutdown completed successfully")
268
+ except Exception as e:
269
+ logger.error(f"❌ Error during application shutdown: {e}")
153
270
 
154
271
  return app
155
272
 
@@ -158,4 +275,37 @@ app = create_app()
158
275
 
159
276
  if __name__ == "__main__":
160
277
  import uvicorn
161
- uvicorn.run(app, host="0.0.0.0", port=8000)
278
+ import os
279
+ import signal
280
+
281
+ port = int(os.getenv("PORT", 8082))
282
+
283
+ # Configure uvicorn for graceful shutdown
284
+ config = uvicorn.Config(
285
+ app,
286
+ host="0.0.0.0",
287
+ port=port,
288
+ log_level=os.getenv("LOG_LEVEL", "info").lower(),
289
+ access_log=True,
290
+ loop="asyncio",
291
+ # Graceful shutdown configuration
292
+ timeout_keep_alive=30, # Keep connections alive for 30 seconds
293
+ timeout_graceful_shutdown=30, # 30 second graceful shutdown timeout
294
+ )
295
+
296
+ server = uvicorn.Server(config)
297
+
298
+ # Setup signal handlers for graceful shutdown
299
+ def signal_handler(signum, frame):
300
+ logger.info(f"Received signal {signum}, initiating graceful shutdown...")
301
+ server.should_exit = True
302
+
303
+ signal.signal(signal.SIGINT, signal_handler)
304
+ signal.signal(signal.SIGTERM, signal_handler)
305
+
306
+ try:
307
+ server.run()
308
+ except KeyboardInterrupt:
309
+ logger.info("Keyboard interrupt received, shutting down...")
310
+ finally:
311
+ logger.info("Server shutdown complete")
@@ -35,10 +35,16 @@ class APIKeyManager:
35
35
  # Load API keys first to check if auth should be enabled
36
36
  self.load_api_keys()
37
37
 
38
- # Determine auth state: enabled if explicitly set OR if API keys exist
38
+ # Determine auth state: check explicit setting first, then auto-detect from keys
39
39
  explicit_auth = AUTH_ENABLED
40
40
  has_keys = len(self.api_keys) > 0
41
- self.auth_enabled = explicit_auth or has_keys
41
+
42
+ # If explicitly disabled (REQUIRE_API_KEYS=false), respect that setting
43
+ if os.getenv("REQUIRE_API_KEYS", "").lower() == "false":
44
+ self.auth_enabled = False
45
+ else:
46
+ # Otherwise, enable if explicitly set OR if API keys exist
47
+ self.auth_enabled = explicit_auth or has_keys
42
48
 
43
49
  if self.auth_enabled:
44
50
  logger.info(f"API Key authentication is ENABLED ({'explicit' if explicit_auth else 'auto-detected from keys'})")