isa-model 0.3.91__py3-none-any.whl โ†’ 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info โ†’ isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py โ†’ deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services โ†’ modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info โ†’ isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info โ†’ isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,284 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Improved Error Handling for ISA Model API
6
+ Provides consistent error responses and better user experience
7
+ """
8
+
9
+ import logging
10
+ import traceback
11
+ from typing import Dict, Any, Optional, Union
12
+ from enum import Enum
13
+ from fastapi import HTTPException, status
14
+ from fastapi.responses import JSONResponse
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class ErrorCode(str, Enum):
19
+ """Standard error codes for ISA Model API"""
20
+
21
+ # Input/Request errors (4xx)
22
+ INVALID_INPUT = "INVALID_INPUT"
23
+ MISSING_PARAMETER = "MISSING_PARAMETER"
24
+ INVALID_MODEL = "INVALID_MODEL"
25
+ INVALID_PROVIDER = "INVALID_PROVIDER"
26
+ INVALID_SERVICE_TYPE = "INVALID_SERVICE_TYPE"
27
+ INVALID_TASK = "INVALID_TASK"
28
+ UNSUPPORTED_FORMAT = "UNSUPPORTED_FORMAT"
29
+ FILE_TOO_LARGE = "FILE_TOO_LARGE"
30
+ RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED"
31
+ AUTHENTICATION_FAILED = "AUTHENTICATION_FAILED"
32
+ AUTHORIZATION_FAILED = "AUTHORIZATION_FAILED"
33
+
34
+ # Service errors (5xx)
35
+ SERVICE_UNAVAILABLE = "SERVICE_UNAVAILABLE"
36
+ MODEL_LOAD_FAILED = "MODEL_LOAD_FAILED"
37
+ INFERENCE_FAILED = "INFERENCE_FAILED"
38
+ EXTERNAL_API_ERROR = "EXTERNAL_API_ERROR"
39
+ DATABASE_ERROR = "DATABASE_ERROR"
40
+ TIMEOUT_ERROR = "TIMEOUT_ERROR"
41
+ INTERNAL_ERROR = "INTERNAL_ERROR"
42
+
43
+ # Configuration errors
44
+ CONFIG_ERROR = "CONFIG_ERROR"
45
+ API_KEY_MISSING = "API_KEY_MISSING"
46
+ API_KEY_INVALID = "API_KEY_INVALID"
47
+
48
+ class ISAModelError(Exception):
49
+ """Base exception for ISA Model errors"""
50
+
51
+ def __init__(
52
+ self,
53
+ message: str,
54
+ error_code: ErrorCode,
55
+ status_code: int = 500,
56
+ details: Optional[Dict[str, Any]] = None,
57
+ user_message: Optional[str] = None
58
+ ):
59
+ self.message = message
60
+ self.error_code = error_code
61
+ self.status_code = status_code
62
+ self.details = details or {}
63
+ self.user_message = user_message or self._generate_user_message()
64
+ super().__init__(self.message)
65
+
66
+ def _generate_user_message(self) -> str:
67
+ """Generate user-friendly error message"""
68
+ user_messages = {
69
+ ErrorCode.INVALID_INPUT: "่ฏทๆฃ€ๆŸฅๆ‚จ็š„่พ“ๅ…ฅๆ•ฐๆฎๆ ผๅผๆ˜ฏๅฆๆญฃ็กฎใ€‚",
70
+ ErrorCode.MISSING_PARAMETER: "่ฏทๆไพ›ๅฟ…้œ€็š„ๅ‚ๆ•ฐใ€‚",
71
+ ErrorCode.INVALID_MODEL: "ๆŒ‡ๅฎš็š„ๆจกๅž‹ไธๅญ˜ๅœจๆˆ–ไธๅฏ็”จ๏ผŒ่ฏท้€‰ๆ‹ฉๅ…ถไป–ๆจกๅž‹ใ€‚",
72
+ ErrorCode.INVALID_PROVIDER: "ๆŒ‡ๅฎš็š„ๆไพ›ๅ•†ไธๆ”ฏๆŒ๏ผŒ่ฏท้€‰ๆ‹ฉๅ…ถไป–ๆไพ›ๅ•†ใ€‚",
73
+ ErrorCode.INVALID_SERVICE_TYPE: "ไธๆ”ฏๆŒ็š„ๆœๅŠก็ฑปๅž‹๏ผŒ่ฏท้€‰ๆ‹ฉtextใ€visionใ€audioใ€imageๆˆ–embeddingใ€‚",
74
+ ErrorCode.INVALID_TASK: "ไธๆ”ฏๆŒ็š„ไปปๅŠก็ฑปๅž‹๏ผŒ่ฏทๆŸฅ็œ‹APIๆ–‡ๆกฃไบ†่งฃๆ”ฏๆŒ็š„ไปปๅŠกใ€‚",
75
+ ErrorCode.UNSUPPORTED_FORMAT: "ไธๆ”ฏๆŒ็š„ๆ–‡ไปถๆ ผๅผ๏ผŒ่ฏทไฝฟ็”จๆ”ฏๆŒ็š„ๆ ผๅผใ€‚",
76
+ ErrorCode.FILE_TOO_LARGE: "ๆ–‡ไปถๅคชๅคง๏ผŒ่ฏทๅŽ‹็ผฉๅŽ้‡่ฏ•ใ€‚",
77
+ ErrorCode.RATE_LIMIT_EXCEEDED: "่ฏทๆฑ‚่ฟ‡ไบŽ้ข‘็น๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚",
78
+ ErrorCode.AUTHENTICATION_FAILED: "่บซไปฝ้ชŒ่ฏๅคฑ่ดฅ๏ผŒ่ฏทๆฃ€ๆŸฅๆ‚จ็š„ๅ‡ญๆฎใ€‚",
79
+ ErrorCode.AUTHORIZATION_FAILED: "ๆ‚จๆฒกๆœ‰ๆƒ้™ๆ‰ง่กŒๆญคๆ“ไฝœใ€‚",
80
+ ErrorCode.SERVICE_UNAVAILABLE: "ๆœๅŠกๆš‚ๆ—ถไธๅฏ็”จ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚",
81
+ ErrorCode.MODEL_LOAD_FAILED: "ๆจกๅž‹ๅŠ ่ฝฝๅคฑ่ดฅ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ๆˆ–้€‰ๆ‹ฉๅ…ถไป–ๆจกๅž‹ใ€‚",
82
+ ErrorCode.INFERENCE_FAILED: "ๆŽจ็†่ฟ‡็จ‹ๅ‡บ็Žฐ้”™่ฏฏ๏ผŒ่ฏท้‡่ฏ•ใ€‚",
83
+ ErrorCode.EXTERNAL_API_ERROR: "ๅค–้ƒจๆœๅŠกๅ‡บ็Žฐ้—ฎ้ข˜๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚",
84
+ ErrorCode.DATABASE_ERROR: "ๆ•ฐๆฎๅบ“่ฟžๆŽฅ้—ฎ้ข˜๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚",
85
+ ErrorCode.TIMEOUT_ERROR: "่ฏทๆฑ‚่ถ…ๆ—ถ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚",
86
+ ErrorCode.INTERNAL_ERROR: "ๅ†…้ƒจๆœๅŠกๅ™จ้”™่ฏฏ๏ผŒ่ฏท่”็ณปๆŠ€ๆœฏๆ”ฏๆŒใ€‚",
87
+ ErrorCode.CONFIG_ERROR: "้…็ฝฎ้”™่ฏฏ๏ผŒ่ฏท่”็ณป็ฎก็†ๅ‘˜ใ€‚",
88
+ ErrorCode.API_KEY_MISSING: "็ผบๅฐ‘APIๅฏ†้’ฅ๏ผŒ่ฏทๅœจ้…็ฝฎไธญๆไพ›ใ€‚",
89
+ ErrorCode.API_KEY_INVALID: "APIๅฏ†้’ฅๆ— ๆ•ˆ๏ผŒ่ฏทๆฃ€ๆŸฅ้…็ฝฎใ€‚"
90
+ }
91
+
92
+ return user_messages.get(self.error_code, "ๅ‡บ็Žฐไบ†ๆœช็Ÿฅ้”™่ฏฏ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚")
93
+
94
+ def to_dict(self) -> Dict[str, Any]:
95
+ """Convert error to dictionary for API response"""
96
+ return {
97
+ "error_code": self.error_code.value,
98
+ "message": self.message,
99
+ "user_message": self.user_message,
100
+ "status_code": self.status_code,
101
+ "details": self.details
102
+ }
103
+
104
+ def create_error_response(
105
+ error: Union[Exception, ISAModelError, str],
106
+ status_code: Optional[int] = None,
107
+ error_code: Optional[ErrorCode] = None,
108
+ details: Optional[Dict[str, Any]] = None,
109
+ include_traceback: bool = False
110
+ ) -> Dict[str, Any]:
111
+ """Create standardized error response"""
112
+
113
+ if isinstance(error, ISAModelError):
114
+ response = {
115
+ "success": False,
116
+ "error": error.message,
117
+ "error_code": error.error_code.value,
118
+ "user_message": error.user_message,
119
+ "details": error.details,
120
+ "metadata": {
121
+ "error_type": "ISAModelError",
122
+ "status_code": error.status_code
123
+ }
124
+ }
125
+ elif isinstance(error, Exception):
126
+ # Convert generic exception to ISAModelError
127
+ error_message = str(error)
128
+ final_error_code = error_code or ErrorCode.INTERNAL_ERROR
129
+ final_status_code = status_code or 500
130
+
131
+ isa_error = ISAModelError(
132
+ message=error_message,
133
+ error_code=final_error_code,
134
+ status_code=final_status_code,
135
+ details=details
136
+ )
137
+
138
+ response = {
139
+ "success": False,
140
+ "error": isa_error.message,
141
+ "error_code": isa_error.error_code.value,
142
+ "user_message": isa_error.user_message,
143
+ "details": isa_error.details,
144
+ "metadata": {
145
+ "error_type": type(error).__name__,
146
+ "status_code": isa_error.status_code
147
+ }
148
+ }
149
+
150
+ if include_traceback:
151
+ response["metadata"]["traceback"] = traceback.format_exc()
152
+
153
+ else:
154
+ # String error
155
+ final_error_code = error_code or ErrorCode.INTERNAL_ERROR
156
+ final_status_code = status_code or 500
157
+
158
+ isa_error = ISAModelError(
159
+ message=str(error),
160
+ error_code=final_error_code,
161
+ status_code=final_status_code,
162
+ details=details or {}
163
+ )
164
+
165
+ response = {
166
+ "success": False,
167
+ "error": isa_error.message,
168
+ "error_code": isa_error.error_code.value,
169
+ "user_message": isa_error.user_message,
170
+ "details": isa_error.details,
171
+ "metadata": {
172
+ "error_type": "StringError",
173
+ "status_code": isa_error.status_code
174
+ }
175
+ }
176
+
177
+ return response
178
+
179
+ def handle_validation_error(exc: Exception) -> Dict[str, Any]:
180
+ """Handle Pydantic validation errors"""
181
+ details = {}
182
+
183
+ if hasattr(exc, 'errors'):
184
+ # Pydantic validation error
185
+ validation_errors = []
186
+ for error in exc.errors():
187
+ field = " -> ".join(str(loc) for loc in error.get('loc', []))
188
+ message = error.get('msg', '')
189
+ validation_errors.append({
190
+ "field": field,
191
+ "message": message,
192
+ "type": error.get('type', '')
193
+ })
194
+ details["validation_errors"] = validation_errors
195
+
196
+ return create_error_response(
197
+ error="่ฏทๆฑ‚ๆ•ฐๆฎๆ ผๅผไธๆญฃ็กฎ",
198
+ status_code=400,
199
+ error_code=ErrorCode.INVALID_INPUT,
200
+ details=details
201
+ )
202
+
203
+ def handle_service_error(
204
+ service_name: str,
205
+ error: Exception,
206
+ fallback_available: bool = False
207
+ ) -> Dict[str, Any]:
208
+ """Handle service-specific errors with context"""
209
+
210
+ details = {
211
+ "service": service_name,
212
+ "fallback_available": fallback_available
213
+ }
214
+
215
+ # Determine error code based on service and error type
216
+ if "connection" in str(error).lower():
217
+ error_code = ErrorCode.EXTERNAL_API_ERROR
218
+ if fallback_available:
219
+ user_message = f"{service_name}ๆœๅŠกๆš‚ๆ—ถไธๅฏ็”จ๏ผŒๅทฒๅˆ‡ๆขๅˆฐๅค‡็”จๆœๅŠกใ€‚"
220
+ else:
221
+ user_message = f"{service_name}ๆœๅŠก่ฟžๆŽฅๅคฑ่ดฅ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚"
222
+ elif "timeout" in str(error).lower():
223
+ error_code = ErrorCode.TIMEOUT_ERROR
224
+ user_message = f"{service_name}ๆœๅŠกๅ“ๅบ”่ถ…ๆ—ถ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚"
225
+ elif "authentication" in str(error).lower() or "api key" in str(error).lower():
226
+ error_code = ErrorCode.API_KEY_INVALID
227
+ user_message = f"{service_name}ๆœๅŠก่ฎค่ฏๅคฑ่ดฅ๏ผŒ่ฏทๆฃ€ๆŸฅAPIๅฏ†้’ฅ้…็ฝฎใ€‚"
228
+ elif "rate limit" in str(error).lower():
229
+ error_code = ErrorCode.RATE_LIMIT_EXCEEDED
230
+ user_message = f"{service_name}ๆœๅŠก่ฏทๆฑ‚้ข‘็އ่ฟ‡้ซ˜๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚"
231
+ else:
232
+ error_code = ErrorCode.SERVICE_UNAVAILABLE
233
+ if fallback_available:
234
+ user_message = f"{service_name}ๆœๅŠกๅ‡บ็Žฐ้—ฎ้ข˜๏ผŒๅทฒๅˆ‡ๆขๅˆฐๅค‡็”จๆœๅŠกใ€‚"
235
+ else:
236
+ user_message = f"{service_name}ๆœๅŠกๆš‚ๆ—ถไธๅฏ็”จ๏ผŒ่ฏท็จๅŽๅ†่ฏ•ใ€‚"
237
+
238
+ return create_error_response(
239
+ error=str(error),
240
+ status_code=503 if not fallback_available else 200,
241
+ error_code=error_code,
242
+ details=details
243
+ )
244
+
245
+ def create_http_exception(
246
+ message: str,
247
+ status_code: int = 500,
248
+ error_code: Optional[ErrorCode] = None,
249
+ details: Optional[Dict[str, Any]] = None
250
+ ) -> HTTPException:
251
+ """Create HTTPException with standardized error format"""
252
+
253
+ error_response = create_error_response(
254
+ error=message,
255
+ status_code=status_code,
256
+ error_code=error_code,
257
+ details=details
258
+ )
259
+
260
+ return HTTPException(
261
+ status_code=status_code,
262
+ detail=error_response
263
+ )
264
+
265
+ # Convenience functions for common errors
266
+ def invalid_input_error(message: str, details: Optional[Dict] = None) -> HTTPException:
267
+ return create_http_exception(message, 400, ErrorCode.INVALID_INPUT, details)
268
+
269
+ def model_not_found_error(model_name: str) -> HTTPException:
270
+ return create_http_exception(
271
+ f"ๆจกๅž‹ '{model_name}' ไธๅญ˜ๅœจๆˆ–ไธๅฏ็”จ",
272
+ 404,
273
+ ErrorCode.INVALID_MODEL,
274
+ {"model": model_name, "suggestion": "่ฏทๆŸฅ็œ‹ /api/v1/models ่Žทๅ–ๅฏ็”จๆจกๅž‹ๅˆ—่กจ"}
275
+ )
276
+
277
+ def service_unavailable_error(service_name: str, fallback: bool = False) -> HTTPException:
278
+ status_code = 200 if fallback else 503
279
+ return create_http_exception(
280
+ f"{service_name}ๆœๅŠก{'ๅทฒๅˆ‡ๆขๅˆฐๅค‡็”จๆจกๅผ' if fallback else 'ๆš‚ๆ—ถไธๅฏ็”จ'}",
281
+ status_code,
282
+ ErrorCode.SERVICE_UNAVAILABLE,
283
+ {"service": service_name, "fallback_mode": fallback}
284
+ )
@@ -4,17 +4,113 @@ FastAPI Server for ISA Model Serving
4
4
  Main FastAPI application that serves model inference endpoints
5
5
  """
6
6
 
7
- from fastapi import FastAPI, Request
7
+ from fastapi import FastAPI, Request, HTTPException, Depends
8
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.middleware.trustedhost import TrustedHostMiddleware
9
10
  from fastapi.responses import JSONResponse
11
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
12
+ from fastapi.staticfiles import StaticFiles
10
13
  import time
11
14
  import logging
12
- from typing import Dict, Any
15
+ import os
16
+ from typing import Dict, Any, Optional
13
17
 
14
- from .routes import ui_analysis, vision, llm, health, unified
18
+ from .routes import health, unified, deployments, logs, analytics, settings, inference_monitoring, webhooks, tenants # config, training, annotation, and evaluations temporarily disabled
15
19
  from .middleware.request_logger import RequestLoggerMiddleware
20
+ from .middleware.security import setup_security_middleware, check_redis_health
21
+ from .middleware.tenant_context import TenantContextMiddleware
22
+ from .startup import run_startup_initialization
23
+ from ...core.logging import api_logger, setup_logger
16
24
 
17
- logger = logging.getLogger(__name__)
25
+ logger = api_logger # Use Loki-configured logger instead of standard logging
26
+
27
+ def configure_logging():
28
+ """Configure logging based on environment variables
29
+
30
+ Note: Loki integration is handled automatically by isa_model.core.logging.setup_logger
31
+ This function only sets log levels for existing loggers.
32
+ """
33
+ log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
34
+ verbose_logging = os.getenv('VERBOSE_LOGGING', 'false').lower() == 'true'
35
+
36
+ # Set log level
37
+ level = getattr(logging, log_level, logging.INFO)
38
+
39
+ # Note: Don't call logging.basicConfig() here as it conflicts with Loki handlers
40
+ # The Loki logger (api_logger) is already configured with proper handlers
41
+
42
+ # Set uvicorn logger level to match
43
+ uvicorn_logger = logging.getLogger("uvicorn")
44
+ uvicorn_logger.setLevel(level)
45
+
46
+ # Set app logger level
47
+ app_logger = logging.getLogger("isa_model")
48
+ app_logger.setLevel(level)
49
+
50
+ # Suppress verbose third-party library logs
51
+ # HTTP libraries - only show WARNING and above
52
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
53
+ logging.getLogger("httpx").setLevel(logging.WARNING)
54
+ logging.getLogger("httpcore.http11").setLevel(logging.WARNING)
55
+ logging.getLogger("httpcore.connection").setLevel(logging.WARNING)
56
+
57
+ # Database and ORM libraries
58
+ logging.getLogger("supabase").setLevel(logging.WARNING)
59
+ logging.getLogger("postgrest").setLevel(logging.WARNING)
60
+
61
+ # AI/ML libraries
62
+ logging.getLogger("openai").setLevel(logging.WARNING)
63
+ logging.getLogger("anthropic").setLevel(logging.WARNING)
64
+ logging.getLogger("google").setLevel(logging.WARNING)
65
+ logging.getLogger("google.cloud").setLevel(logging.WARNING)
66
+ logging.getLogger("google.generativeai").setLevel(logging.WARNING)
67
+
68
+ # Other verbose libraries
69
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
70
+ logging.getLogger("requests").setLevel(logging.WARNING)
71
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
72
+
73
+ # Reduce startup debug logs
74
+ if not verbose_logging:
75
+ # Reduce startup initialization debug logs
76
+ startup_logger = logging.getLogger("isa_model.serving.api.startup")
77
+ startup_logger.setLevel(logging.WARNING)
78
+
79
+ # Reduce model registry debug logs
80
+ model_logger = logging.getLogger("isa_model.core.models.model_repo")
81
+ model_logger.setLevel(logging.WARNING)
82
+
83
+ # Reduce intelligent selector debug logs
84
+ selector_logger = logging.getLogger("isa_model.core.services.intelligent_model_selector")
85
+ selector_logger.setLevel(logging.WARNING)
86
+
87
+ # Training module removed - logger configuration no longer needed
88
+
89
+ # Reduce knowledge base logs
90
+ kb_logger = logging.getLogger("isa_model.core.knowledge_base")
91
+ kb_logger.setLevel(logging.WARNING)
92
+
93
+ # Reduce database migration logs
94
+ migration_logger = logging.getLogger("isa_model.core.database.migrations")
95
+ migration_logger.setLevel(logging.WARNING)
96
+
97
+ # Reduce AI factory logs
98
+ ai_factory_logger = logging.getLogger("isa_model.inference.ai_factory")
99
+ ai_factory_logger.setLevel(logging.WARNING)
100
+
101
+ # Reduce embedding service logs
102
+ embed_logger = logging.getLogger("isa_model.inference.services.embedding")
103
+ embed_logger.setLevel(logging.WARNING)
104
+
105
+ # Reduce config manager logs
106
+ config_logger = logging.getLogger("isa_model.core.config")
107
+ config_logger.setLevel(logging.WARNING)
108
+
109
+ # Reduce core integration logs
110
+ core_logger = logging.getLogger("isa_model.core")
111
+ core_logger.setLevel(logging.WARNING)
112
+
113
+ logger.info(f"Logging configured - Level: {log_level}, Verbose: {verbose_logging}")
18
114
 
19
115
  def create_app(config: Dict[str, Any] = None) -> FastAPI:
20
116
  """
@@ -26,6 +122,9 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
26
122
  Returns:
27
123
  Configured FastAPI application
28
124
  """
125
+ # Configure logging first
126
+ configure_logging()
127
+
29
128
  app = FastAPI(
30
129
  title="ISA Model Serving API",
31
130
  description="High-performance model inference API",
@@ -34,14 +133,12 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
34
133
  redoc_url="/redoc"
35
134
  )
36
135
 
37
- # Configure CORS
38
- app.add_middleware(
39
- CORSMiddleware,
40
- allow_origins=["*"], # Configure appropriately for production
41
- allow_credentials=True,
42
- allow_methods=["*"],
43
- allow_headers=["*"],
44
- )
136
+ # Setup comprehensive security middleware
137
+ # This includes CORS, rate limiting, security headers, request validation
138
+ setup_security_middleware(app)
139
+
140
+ # Add tenant context middleware (before request logger)
141
+ app.add_middleware(TenantContextMiddleware)
45
142
 
46
143
  # Add custom middleware
47
144
  app.add_middleware(RequestLoggerMiddleware)
@@ -64,10 +161,52 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
64
161
  # MAIN UNIFIED API - Single endpoint for all AI services
65
162
  app.include_router(unified.router, prefix="/api/v1", tags=["unified-api"])
66
163
 
67
- # Legacy specific endpoints (kept for backward compatibility)
68
- app.include_router(ui_analysis.router, prefix="/ui-analysis", tags=["ui-analysis"])
69
- app.include_router(vision.router, prefix="/vision", tags=["vision"])
70
- app.include_router(llm.router, prefix="/llm", tags=["llm"])
164
+ # DEPLOYMENTS API - Model deployment management
165
+ app.include_router(deployments.router, prefix="/api/v1/deployments", tags=["deployments"])
166
+
167
+ # LOGS API - Log management and streaming
168
+ app.include_router(logs.router, prefix="/api/v1/logs", tags=["logs"])
169
+
170
+ # ANALYTICS API - Usage analytics and reporting
171
+ app.include_router(analytics.router, prefix="/api/v1/analytics", tags=["analytics"])
172
+
173
+ # SETTINGS API - Configuration and API key management
174
+ app.include_router(settings.router, prefix="/api/v1/settings", tags=["settings"])
175
+
176
+ # EVALUATIONS API - Temporarily disabled for staging optimization
177
+ # app.include_router(evaluations.router, prefix="/api/v1/evaluations", tags=["evaluations"])
178
+
179
+ # INFERENCE MONITORING API - InfluxDB-based inference monitoring and analytics
180
+ app.include_router(inference_monitoring.router, prefix="/api/v1/monitoring", tags=["monitoring"])
181
+
182
+ # TRAINING API - Disabled for staging optimization
183
+ # app.include_router(training.router, prefix="/api/v1/training", tags=["training"])
184
+
185
+ # WEBHOOKS API - Webhook management and notifications
186
+ app.include_router(webhooks.router, prefix="/api/v1/webhooks", tags=["webhooks"])
187
+
188
+ # TENANTS API - Multi-tenancy and organization management
189
+ app.include_router(tenants.router, prefix="/api/v1/tenants", tags=["tenants"])
190
+
191
+ # ANNOTATION API - Temporarily disabled for staging optimization
192
+ # app.include_router(annotation.router, prefix="/api/v1/annotations", tags=["annotations"])
193
+
194
+ # CONFIG API - Configuration management
195
+ # app.include_router(config.router, prefix="/api/v1/config", tags=["config"]) # Temporarily disabled
196
+
197
+ # Mount static files
198
+ static_path = os.path.join(os.path.dirname(__file__), "../static")
199
+ if os.path.exists(static_path):
200
+ app.mount("/static", StaticFiles(directory=static_path), name="static")
201
+
202
+ # Serve management dashboard at /admin
203
+ @app.get("/admin")
204
+ async def admin_dashboard():
205
+ from fastapi.responses import FileResponse
206
+ index_path = os.path.join(static_path, "index.html")
207
+ if os.path.exists(index_path):
208
+ return FileResponse(index_path)
209
+ return {"error": "Management dashboard not found"}
71
210
 
72
211
  # Root endpoint
73
212
  @app.get("/")
@@ -76,9 +215,59 @@ def create_app(config: Dict[str, Any] = None) -> FastAPI:
76
215
  "service": "isa-model-serving",
77
216
  "version": "1.0.0",
78
217
  "status": "running",
79
- "timestamp": time.time()
218
+ "timestamp": time.time(),
219
+ "admin_url": "/admin"
80
220
  }
81
221
 
222
+ # Add startup event handler
223
+ @app.on_event("startup")
224
+ async def startup_event():
225
+ logger.info("๐Ÿš€ Starting application startup initialization...")
226
+ try:
227
+ await run_startup_initialization()
228
+ logger.info("โœ… Application startup completed successfully")
229
+ except Exception as e:
230
+ logger.error(f"โŒ Application startup failed: {e}")
231
+ logger.error("โš ๏ธ Server will continue but may have reduced functionality")
232
+ # Store startup failure state for health checks
233
+ app.state.startup_failed = True
234
+ app.state.startup_error = str(e)
235
+ # Continue running to allow debugging and partial functionality
236
+
237
+ # Add shutdown event handler
238
+ @app.on_event("shutdown")
239
+ async def shutdown_event():
240
+ logger.info("๐Ÿงน Starting application shutdown cleanup...")
241
+ try:
242
+ # Close database connections
243
+ try:
244
+ from .dependencies.database import close_database_pool
245
+ await close_database_pool()
246
+ logger.info("โœ… Database connections closed")
247
+ except Exception as e:
248
+ logger.error(f"โŒ Error closing database connections: {e}")
249
+
250
+ # Clean up AI factory and services
251
+ try:
252
+ from ...inference.ai_factory import AIFactory
253
+ factory = AIFactory.get_instance()
254
+ await factory.cleanup()
255
+ logger.info("โœ… AI Factory cleaned up")
256
+ except Exception as e:
257
+ logger.error(f"โŒ Error cleaning up AI Factory: {e}")
258
+
259
+ # Clean up startup initializer resources
260
+ try:
261
+ from .startup import startup_initializer
262
+ await startup_initializer.cleanup()
263
+ logger.info("โœ… Startup resources cleaned up")
264
+ except Exception as e:
265
+ logger.error(f"โŒ Error cleaning up startup resources: {e}")
266
+
267
+ logger.info("โœ… Application shutdown completed successfully")
268
+ except Exception as e:
269
+ logger.error(f"โŒ Error during application shutdown: {e}")
270
+
82
271
  return app
83
272
 
84
273
  # Create default app instance
@@ -86,4 +275,37 @@ app = create_app()
86
275
 
87
276
  if __name__ == "__main__":
88
277
  import uvicorn
89
- uvicorn.run(app, host="0.0.0.0", port=8000)
278
+ import os
279
+ import signal
280
+
281
+ port = int(os.getenv("PORT", 8082))
282
+
283
+ # Configure uvicorn for graceful shutdown
284
+ config = uvicorn.Config(
285
+ app,
286
+ host="0.0.0.0",
287
+ port=port,
288
+ log_level=os.getenv("LOG_LEVEL", "info").lower(),
289
+ access_log=True,
290
+ loop="asyncio",
291
+ # Graceful shutdown configuration
292
+ timeout_keep_alive=30, # Keep connections alive for 30 seconds
293
+ timeout_graceful_shutdown=30, # 30 second graceful shutdown timeout
294
+ )
295
+
296
+ server = uvicorn.Server(config)
297
+
298
+ # Setup signal handlers for graceful shutdown
299
+ def signal_handler(signum, frame):
300
+ logger.info(f"Received signal {signum}, initiating graceful shutdown...")
301
+ server.should_exit = True
302
+
303
+ signal.signal(signal.SIGINT, signal_handler)
304
+ signal.signal(signal.SIGTERM, signal_handler)
305
+
306
+ try:
307
+ server.run()
308
+ except KeyboardInterrupt:
309
+ logger.info("Keyboard interrupt received, shutting down...")
310
+ finally:
311
+ logger.info("Server shutdown complete")