isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -36,12 +36,13 @@ class AIFactory:
36
36
 
37
37
  def __init__(self):
38
38
  """Initialize the AI Factory."""
39
- if not self._is_initialized:
39
+ # Check if this specific instance has been initialized (not class-level flag)
40
+ if not hasattr(self, 'model_manager'):
40
41
  # Use centralized managers
41
42
  self.model_manager = ModelManager()
42
43
  self.config_manager = ConfigManager()
43
44
  self._cached_services: Dict[str, BaseService] = {}
44
-
45
+
45
46
  logger.info("AI Factory initialized with centralized ModelManager and ConfigManager")
46
47
  AIFactory._is_initialized = True
47
48
 
@@ -52,8 +53,8 @@ class AIFactory:
52
53
  Get a LLM service instance with automatic defaults
53
54
 
54
55
  Args:
55
- model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-mini", Ollama="llama3.2:3b", YYDS="claude-sonnet-4-20250514")
56
- provider: Provider name (defaults to 'openai' for production, 'ollama' for dev)
56
+ model_name: Name of the model to use (defaults: OpenAI="gpt-4.1-mini", Ollama="llama3.2:3b", YYDS="claude-sonnet-4-20250514", Cerebras="gpt-oss-120b", ISA="isa-llm-service")
57
+ provider: Provider name (defaults to 'openai' for production, 'ollama' for dev, 'cerebras' for ultra-fast inference, 'isa' for custom models, 'huggingface' for HF models)
57
58
  config: Optional configuration dictionary
58
59
 
59
60
  Returns:
@@ -69,6 +70,15 @@ class AIFactory:
69
70
  elif provider == "yyds":
70
71
  final_model_name = model_name or "claude-sonnet-4-20250514"
71
72
  final_provider = provider
73
+ elif provider == "cerebras":
74
+ final_model_name = model_name or "gpt-oss-120b"
75
+ final_provider = provider
76
+ elif provider == "isa":
77
+ final_model_name = model_name or "isa-llm-service"
78
+ final_provider = provider
79
+ elif provider == "huggingface":
80
+ final_model_name = model_name or "xenobordom/dialogpt-isa-trained-1755493402"
81
+ final_provider = provider
72
82
  else:
73
83
  # Default provider selection - OpenAI with cheapest model
74
84
  final_provider = provider or "openai"
@@ -76,6 +86,12 @@ class AIFactory:
76
86
  final_model_name = model_name or "gpt-4.1-mini"
77
87
  elif final_provider == "ollama":
78
88
  final_model_name = model_name or "llama3.2:3b-instruct-fp16"
89
+ elif final_provider == "cerebras":
90
+ final_model_name = model_name or "gpt-oss-120b"
91
+ elif final_provider == "isa":
92
+ final_model_name = model_name or "isa-llm-service"
93
+ elif final_provider == "huggingface":
94
+ final_model_name = model_name or "xenobordom/dialogpt-isa-trained-1755493402"
79
95
  else:
80
96
  final_model_name = model_name or "gpt-4.1-mini"
81
97
 
@@ -93,6 +109,18 @@ class AIFactory:
93
109
  from isa_model.inference.services.llm.yyds_llm_service import YydsLLMService
94
110
  return YydsLLMService(provider_name=final_provider, model_name=final_model_name,
95
111
  model_manager=self.model_manager, config_manager=self.config_manager)
112
+ elif final_provider == "cerebras":
113
+ from isa_model.inference.services.llm.cerebras_llm_service import CerebrasLLMService
114
+ return CerebrasLLMService(provider_name=final_provider, model_name=final_model_name,
115
+ model_manager=self.model_manager, config_manager=self.config_manager)
116
+ elif final_provider == "isa":
117
+ from isa_model.inference.services.llm.huggingface_llm_service import ISALLMService
118
+ return ISALLMService(provider_name=final_provider, model_name=final_model_name,
119
+ model_manager=self.model_manager, config_manager=self.config_manager)
120
+ elif final_provider == "huggingface":
121
+ from isa_model.inference.services.llm.huggingface_llm_service import ISALLMService
122
+ return ISALLMService(provider_name="isa", model_name=final_model_name,
123
+ model_manager=self.model_manager, config_manager=self.config_manager)
96
124
  else:
97
125
  raise ValueError(f"Unsupported LLM provider: {final_provider}")
98
126
  except Exception as e:
@@ -110,6 +138,7 @@ class AIFactory:
110
138
 
111
139
  Args:
112
140
  model_name: Model name. Special names:
141
+ - "hybrid": Unified UI/Document analysis service (RECOMMENDED)
113
142
  - "isa_vision_table": Table extraction service
114
143
  - "isa_vision_ui": UI detection service
115
144
  - "isa_vision_doc": Document analysis service
@@ -120,18 +149,27 @@ class AIFactory:
120
149
  Returns:
121
150
  Vision service instance
122
151
  """
123
- # Handle special ISA vision services
124
- if model_name in ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]:
152
+ # Handle special vision services
153
+ if model_name == "hybrid":
154
+ # Hybrid vision service has been deprecated, use OpenAI as fallback
155
+ logger.warning("HybridVisionService is deprecated, using OpenAI vision service as fallback")
156
+ final_provider = "openai"
157
+ final_model_name = "gpt-4.1-nano"
158
+
159
+ elif model_name in ["isa_vision_table", "isa_vision_ui", "isa_vision_doc"]:
125
160
  try:
126
- from isa_model.deployment.services.simple_auto_deploy_vision_service import SimpleAutoDeployVisionService
161
+ from isa_model.deployment.modal.services.vision.simple_auto_deploy_vision_service import SimpleAutoDeployVisionService
127
162
  logger.info(f"Creating auto-deploy service wrapper for {model_name}")
128
163
  return SimpleAutoDeployVisionService(model_name, config)
129
164
  except Exception as e:
130
165
  logger.error(f"Failed to create ISA vision service: {e}")
131
- raise
166
+ # Fallback to ISA service
167
+ logger.warning(f"Auto-deploy service failed, using ISA vision service as fallback")
168
+ final_provider = "isa"
169
+ final_model_name = "isa-omniparser-ui-detection"
132
170
 
133
171
  # Set defaults for regular services
134
- if provider == "openai":
172
+ elif provider == "openai":
135
173
  final_model_name = model_name or "gpt-4.1-mini"
136
174
  final_provider = provider
137
175
  elif provider == "ollama":
@@ -140,6 +178,9 @@ class AIFactory:
140
178
  elif provider == "replicate":
141
179
  final_model_name = model_name or "meta/llama-2-70b-chat"
142
180
  final_provider = provider
181
+ elif provider == "isa":
182
+ final_model_name = model_name or "isa-omniparser-ui-detection"
183
+ final_provider = provider
143
184
  else:
144
185
  # Default provider selection
145
186
  final_provider = provider or "openai"
@@ -147,6 +188,8 @@ class AIFactory:
147
188
  final_model_name = model_name or "gpt-4.1-mini"
148
189
  elif final_provider == "ollama":
149
190
  final_model_name = model_name or "llama3.2-vision:latest"
191
+ elif final_provider == "isa":
192
+ final_model_name = model_name or "isa-omniparser-ui-detection"
150
193
  else:
151
194
  final_model_name = model_name or "gpt-4.1-mini"
152
195
 
@@ -160,6 +203,10 @@ class AIFactory:
160
203
  from isa_model.inference.services.vision.replicate_vision_service import ReplicateVisionService
161
204
  return ReplicateVisionService(provider_name=final_provider, model_name=final_model_name,
162
205
  model_manager=self.model_manager, config_manager=self.config_manager)
206
+ elif final_provider == "isa":
207
+ from isa_model.inference.services.vision.isa_vision_service import ISAVisionService
208
+ logger.info(f"Creating ISA Vision Service with model: {final_model_name}")
209
+ return ISAVisionService()
163
210
  else:
164
211
  raise ValueError(f"Unsupported vision provider: {final_provider}")
165
212
  except Exception as e:
@@ -293,6 +340,40 @@ class AIFactory:
293
340
  logger.error(f"Failed to create TTS service: {e}")
294
341
  raise
295
342
 
343
+ def get_realtime(self, model_name: Optional[str] = None, provider: Optional[str] = None,
344
+ config: Optional[Dict[str, Any]] = None) -> BaseService:
345
+ """
346
+ Get realtime audio service with automatic defaults
347
+
348
+ Args:
349
+ model_name: Name of the model to use (defaults: OpenAI="gpt-4o-realtime-preview-2024-10-01")
350
+ provider: Provider name (defaults to 'openai')
351
+ config: Optional configuration dictionary
352
+
353
+ Returns:
354
+ Realtime service instance
355
+ """
356
+ # Set defaults based on provider
357
+ if provider == "openai":
358
+ final_model_name = model_name or "gpt-4o-realtime-preview-2024-10-01"
359
+ final_provider = provider
360
+ else:
361
+ # Default provider selection - only OpenAI supports realtime currently
362
+ final_provider = provider or "openai"
363
+ final_model_name = model_name or "gpt-4o-realtime-preview-2024-10-01"
364
+
365
+ # Create service using new centralized approach
366
+ try:
367
+ if final_provider == "openai":
368
+ from isa_model.inference.services.audio.openai_realtime_service import OpenAIRealtimeService
369
+ return OpenAIRealtimeService(provider_name=final_provider, model_name=final_model_name,
370
+ model_manager=self.model_manager, config_manager=self.config_manager)
371
+ else:
372
+ raise ValueError(f"Unsupported realtime provider: {final_provider}")
373
+ except Exception as e:
374
+ logger.error(f"Failed to create realtime service: {e}")
375
+ raise
376
+
296
377
  def get_embed(self, model_name: Optional[str] = None, provider: Optional[str] = None,
297
378
  config: Optional[Dict[str, Any]] = None) -> BaseService:
298
379
  """
@@ -322,20 +403,61 @@ class AIFactory:
322
403
  final_model_name = model_name or "bge-m3"
323
404
 
324
405
  # Create service using new centralized approach
406
+ # Create cache key
407
+ cache_key = f"embed_{final_provider}_{final_model_name}"
408
+
409
+ # Check cache first
410
+ if cache_key in self._cached_services:
411
+ logger.debug(f"Using cached embedding service: {cache_key}")
412
+ return self._cached_services[cache_key]
413
+
325
414
  try:
326
415
  if final_provider == "openai":
327
- from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
328
- return OpenAIEmbedService(provider_name=final_provider, model_name=final_model_name,
329
- model_manager=self.model_manager, config_manager=self.config_manager)
416
+ # Use resilient embedding service for OpenAI (with fallback)
417
+ from isa_model.inference.services.embedding.resilient_embed_service import ResilientEmbedService
418
+ service = ResilientEmbedService(provider_name=final_provider, model_name=final_model_name,
419
+ model_manager=self.model_manager, config_manager=self.config_manager)
330
420
  elif final_provider == "ollama":
331
421
  from isa_model.inference.services.embedding.ollama_embed_service import OllamaEmbedService
332
- return OllamaEmbedService(provider_name=final_provider, model_name=final_model_name,
333
- model_manager=self.model_manager, config_manager=self.config_manager)
422
+ service = OllamaEmbedService(provider_name=final_provider, model_name=final_model_name,
423
+ model_manager=self.model_manager, config_manager=self.config_manager)
424
+ elif final_provider == "isa":
425
+ from isa_model.inference.services.embedding.isa_embed_service import ISAEmbedService
426
+ service = ISAEmbedService() # ISA service doesn't use model_manager/config_manager yet
334
427
  else:
335
428
  raise ValueError(f"Unsupported embedding provider: {final_provider}")
429
+
430
+ # Cache the service
431
+ self._cached_services[cache_key] = service
432
+ logger.debug(f"Created and cached embedding service: {cache_key}")
433
+ return service
434
+
336
435
  except Exception as e:
337
436
  logger.error(f"Failed to create embedding service: {e}")
338
- raise
437
+ # As a last resort, try the resilient service
438
+ try:
439
+ logger.info("Attempting to create resilient embedding service as fallback")
440
+ from isa_model.inference.services.embedding.resilient_embed_service import ResilientEmbedService
441
+ service = ResilientEmbedService(provider_name="openai", model_name="text-embedding-3-small",
442
+ model_manager=self.model_manager, config_manager=self.config_manager)
443
+ self._cached_services[cache_key] = service
444
+ logger.info("Successfully created fallback embedding service")
445
+ return service
446
+ except Exception as fallback_error:
447
+ logger.error(f"Even fallback embedding service failed: {fallback_error}")
448
+ # Create a more informative error
449
+ error_details = {
450
+ "primary_error": str(e),
451
+ "fallback_error": str(fallback_error),
452
+ "provider": final_provider,
453
+ "model": final_model_name,
454
+ "suggestions": [
455
+ "检查OpenAI API密钥配置",
456
+ "确认网络连接正常",
457
+ "尝试使用其他嵌入提供商如ollama"
458
+ ]
459
+ }
460
+ raise ValueError(f"嵌入服务创建失败: {str(e)}。详细信息: {error_details}")
339
461
 
340
462
  def clear_cache(self):
341
463
  """Clear the service cache"""
@@ -427,4 +549,45 @@ class AIFactory:
427
549
  # Modal services auto-scale to zero, so explicit shutdown isn't required
428
550
  # This method is here for compatibility with AutoDeployVisionService
429
551
  logger.info(f"Modal service {model_name} will auto-scale to zero when idle")
430
- pass
552
+ pass
553
+
554
+ async def cleanup(self):
555
+ """Clean up all cached services and resources"""
556
+ logger.info("🧹 Starting AIFactory cleanup...")
557
+
558
+ cleanup_tasks = []
559
+ for service_key, service in self._cached_services.items():
560
+ try:
561
+ if hasattr(service, 'close') and callable(service.close):
562
+ cleanup_tasks.append(service.close())
563
+ logger.debug(f"Scheduled cleanup for service: {service_key}")
564
+ except Exception as e:
565
+ logger.error(f"Error scheduling cleanup for service {service_key}: {e}")
566
+
567
+ # Wait for all cleanup tasks to complete
568
+ if cleanup_tasks:
569
+ import asyncio
570
+ try:
571
+ await asyncio.gather(*cleanup_tasks, return_exceptions=True)
572
+ logger.info(f"✅ Cleaned up {len(cleanup_tasks)} services")
573
+ except Exception as e:
574
+ logger.error(f"❌ Error during service cleanup: {e}")
575
+
576
+ # Clear the cached services
577
+ self._cached_services.clear()
578
+
579
+ # Clean up model manager if it has cleanup method
580
+ if hasattr(self.model_manager, 'cleanup') and callable(self.model_manager.cleanup):
581
+ try:
582
+ await self.model_manager.cleanup()
583
+ logger.info("✅ Model manager cleaned up")
584
+ except Exception as e:
585
+ logger.error(f"❌ Error cleaning up model manager: {e}")
586
+
587
+ logger.info("✅ AIFactory cleanup completed")
588
+
589
+ @classmethod
590
+ def reset_instance(cls):
591
+ """Reset the singleton instance (useful for testing)"""
592
+ cls._instance = None
593
+ cls._is_initialized = False
@@ -0,0 +1,21 @@
1
+ """
2
+ Model Service Suite Package
3
+ """
4
+
5
+ from .model_training import ModelTrainingService, TrainingConfig, TrainingResult
6
+ from .model_evaluation import ModelEvaluationService, EvaluationResult
7
+ from .model_serving import ModelServingService, ServingResult
8
+ from .model_service import ModelService, ModelConfig, ModelResult
9
+
10
+ __all__ = [
11
+ 'ModelTrainingService',
12
+ 'TrainingConfig',
13
+ 'TrainingResult',
14
+ 'ModelEvaluationService',
15
+ 'EvaluationResult',
16
+ 'ModelServingService',
17
+ 'ServingResult',
18
+ 'ModelService',
19
+ 'ModelConfig',
20
+ 'ModelResult'
21
+ ]