isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,8 @@ class OpenAILLMService(BaseLLMService):
20
20
 
21
21
  # Check if this is an O-series reasoning model
22
22
  self.is_reasoning_model = model_name.startswith("o4-") or model_name.startswith("o3-")
23
+ self.uses_completion_tokens = self.is_reasoning_model or model_name.startswith("gpt-5")
24
+ self.requires_default_temperature = self.is_reasoning_model or model_name.startswith("gpt-5")
23
25
  self.supports_deep_research = "deep-search" in model_name or "deep-research" in model_name
24
26
 
25
27
  # Get configuration from centralized config manager
@@ -36,7 +38,9 @@ class OpenAILLMService(BaseLLMService):
36
38
  self.client = AsyncOpenAI(
37
39
  api_key=provider_config["api_key"],
38
40
  base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
39
- organization=provider_config.get("organization")
41
+ organization=provider_config.get("organization"),
42
+ timeout=10.0, # 10 second timeout for first token (much faster than 600s default)
43
+ max_retries=2 # Retry on timeout
40
44
  )
41
45
 
42
46
  logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
@@ -70,6 +74,8 @@ class OpenAILLMService(BaseLLMService):
70
74
 
71
75
  # Copy OpenAI-specific attributes
72
76
  bound_service.is_reasoning_model = self.is_reasoning_model
77
+ bound_service.uses_completion_tokens = self.uses_completion_tokens
78
+ bound_service.requires_default_temperature = self.requires_default_temperature
73
79
  bound_service.supports_deep_research = self.supports_deep_research
74
80
 
75
81
  # Copy base class attributes
@@ -103,7 +109,7 @@ class OpenAILLMService(BaseLLMService):
103
109
 
104
110
  return bound_service
105
111
 
106
- async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
112
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
107
113
  """
108
114
  True streaming method - yields tokens one by one as they arrive
109
115
 
@@ -121,19 +127,19 @@ class OpenAILLMService(BaseLLMService):
121
127
  if use_responses_api:
122
128
  logger.info(f"Using Responses API streaming for {self.model_name}")
123
129
  # Use Responses API streaming
124
- async for chunk in self._astream_responses_api(input_data, show_reasoning):
130
+ async for chunk in self._astream_responses_api(input_data, show_reasoning, **extra_kwargs):
125
131
  yield chunk
126
132
  else:
127
133
  logger.debug(f"Using Chat Completions API streaming for {self.model_name}")
128
134
  # Use Chat Completions API streaming
129
- async for chunk in self._astream_chat_completions_api(input_data):
135
+ async for chunk in self._astream_chat_completions_api(input_data, **extra_kwargs):
130
136
  yield chunk
131
137
 
132
138
  except Exception as e:
133
139
  logger.error(f"Error in astream: {e}")
134
140
  raise
135
141
 
136
- async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
142
+ async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
137
143
  """Stream using Responses API for reasoning models and deep research models"""
138
144
  try:
139
145
  # Use adapter manager to prepare messages
@@ -228,7 +234,7 @@ class OpenAILLMService(BaseLLMService):
228
234
  logger.error(f"Error in _astream_responses_api: {e}")
229
235
  raise
230
236
 
231
- async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
237
+ async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any], **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
232
238
  """Stream using Chat Completions API for standard models"""
233
239
  try:
234
240
  # Use adapter manager to prepare messages
@@ -242,13 +248,13 @@ class OpenAILLMService(BaseLLMService):
242
248
  "stream": True
243
249
  }
244
250
 
245
- # O4 models only support temperature=1 (default)
246
- if not self.is_reasoning_model:
251
+ # O4 and GPT-5 models only support temperature=1 (default)
252
+ if not self.requires_default_temperature:
247
253
  kwargs["temperature"] = provider_config.get("temperature", 0.7)
248
254
 
249
- # O4 models use max_completion_tokens instead of max_tokens
255
+ # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
250
256
  max_tokens_value = provider_config.get("max_tokens", 1024)
251
- if self.is_reasoning_model:
257
+ if self.uses_completion_tokens:
252
258
  kwargs["max_completion_tokens"] = max_tokens_value
253
259
  else:
254
260
  kwargs["max_tokens"] = max_tokens_value
@@ -259,6 +265,11 @@ class OpenAILLMService(BaseLLMService):
259
265
  kwargs["tools"] = tool_schemas
260
266
  kwargs["tool_choice"] = "auto"
261
267
 
268
+ # Add response_format if specified (for JSON mode)
269
+ if 'response_format' in extra_kwargs:
270
+ kwargs['response_format'] = extra_kwargs['response_format']
271
+ logger.debug(f"Using response_format in streaming: {extra_kwargs['response_format']}")
272
+
262
273
  # Stream tokens and detect tool calls
263
274
  content_chunks = []
264
275
  tool_calls_accumulator = {} # Track complete tool calls by ID
@@ -360,13 +371,14 @@ class OpenAILLMService(BaseLLMService):
360
371
  logger.error(f"Error in _astream_chat_completions_api: {e}")
361
372
  raise
362
373
 
363
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
374
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> Union[str, Any]:
364
375
  """
365
376
  Unified invoke method for all input types
366
377
 
367
378
  Args:
368
379
  input_data: Input messages or text
369
380
  show_reasoning: If True and model supports it, show reasoning process using Responses API
381
+ **extra_kwargs: Additional parameters to pass to the API (e.g., response_format)
370
382
  """
371
383
  try:
372
384
  # Use adapter manager to prepare messages
@@ -385,13 +397,13 @@ class OpenAILLMService(BaseLLMService):
385
397
  "messages": messages
386
398
  }
387
399
 
388
- # O4 models only support temperature=1 (default)
389
- if not self.is_reasoning_model:
400
+ # O4 and GPT-5 models only support temperature=1 (default)
401
+ if not self.requires_default_temperature:
390
402
  kwargs["temperature"] = provider_config.get("temperature", 0.7)
391
403
 
392
- # O4 models use max_completion_tokens instead of max_tokens
404
+ # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
393
405
  max_tokens_value = provider_config.get("max_tokens", 1024)
394
- if self.is_reasoning_model:
406
+ if self.uses_completion_tokens:
395
407
  kwargs["max_completion_tokens"] = max_tokens_value
396
408
  else:
397
409
  kwargs["max_tokens"] = max_tokens_value
@@ -403,11 +415,16 @@ class OpenAILLMService(BaseLLMService):
403
415
  if not use_responses_api: # Responses API handles tool choice differently
404
416
  kwargs["tool_choice"] = "auto"
405
417
 
418
+ # Add response_format if specified (for JSON mode)
419
+ if 'response_format' in extra_kwargs:
420
+ kwargs['response_format'] = extra_kwargs['response_format']
421
+ logger.debug(f"Using response_format: {extra_kwargs['response_format']}")
422
+
406
423
  # Handle streaming vs non-streaming
407
424
  if self.streaming:
408
425
  # TRUE STREAMING MODE - collect all chunks from the stream
409
426
  content_chunks = []
410
- async for token in self.astream(input_data, show_reasoning=show_reasoning):
427
+ async for token in self.astream(input_data, show_reasoning=show_reasoning, **extra_kwargs):
411
428
  if isinstance(token, str):
412
429
  content_chunks.append(token)
413
430
  elif isinstance(token, dict) and "result" in token:
@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
63
63
 
64
64
  return bound_service
65
65
 
66
- async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
66
+ async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
67
67
  """
68
68
  True streaming method - yields tokens one by one as they arrive
69
69
 
70
70
  Args:
71
71
  input_data: Same as ainvoke
72
+ **kwargs: Additional parameters (will filter out unsupported ones)
72
73
 
73
74
  Yields:
74
75
  Individual tokens as they arrive from the API
75
76
  """
77
+ # Remove parameters that yyds doesn't support
78
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
76
79
  try:
77
80
  # Use adapter manager to prepare messages
78
81
  messages = self._prepare_messages(input_data)
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
115
118
  logger.error(f"Error in astream: {e}")
116
119
  raise
117
120
 
118
- async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
121
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
119
122
  """Unified invoke method for all input types"""
123
+ # Remove parameters that yyds doesn't support
124
+ kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
125
+ kwargs.pop('task', None) # Handled internally
120
126
  try:
121
127
  # Use adapter manager to prepare messages
122
128
  messages = self._prepare_messages(input_data)
@@ -31,6 +31,21 @@ except ImportError:
31
31
  OllamaVisionService = None
32
32
  OLLAMA_VISION_AVAILABLE = False
33
33
 
34
+ # Computer Vision specialized services
35
+ try:
36
+ from .vgg16_vision_service import VGG16VisionService
37
+ VGG16_VISION_AVAILABLE = True
38
+ except ImportError:
39
+ VGG16VisionService = None
40
+ VGG16_VISION_AVAILABLE = False
41
+
42
+ try:
43
+ from .blip_vision_service import BLIPVisionService
44
+ BLIP_VISION_AVAILABLE = True
45
+ except ImportError:
46
+ BLIPVisionService = None
47
+ BLIP_VISION_AVAILABLE = False
48
+
34
49
  __all__ = [
35
50
  "BaseVisionService",
36
51
  "OpenAIVisionService",
@@ -43,4 +58,10 @@ if ISA_VISION_AVAILABLE:
43
58
  __all__.append("ISAVisionService")
44
59
 
45
60
  if OLLAMA_VISION_AVAILABLE:
46
- __all__.append("OllamaVisionService")
61
+ __all__.append("OllamaVisionService")
62
+
63
+ if VGG16_VISION_AVAILABLE:
64
+ __all__.append("VGG16VisionService")
65
+
66
+ if BLIP_VISION_AVAILABLE:
67
+ __all__.append("BLIPVisionService")
@@ -59,25 +59,28 @@ def get_image_data(image: Union[str, BinaryIO]) -> bytes:
59
59
 
60
60
  def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
61
61
  """压缩图片以减小大小
62
-
62
+
63
63
  Args:
64
64
  image_data: 图片数据,可以是 bytes 或 BytesIO
65
65
  max_size: 最大尺寸(像素)
66
-
66
+
67
67
  Returns:
68
68
  bytes: 压缩后的图片数据
69
69
  """
70
70
  try:
71
+ # Ensure max_size is int (type safety)
72
+ max_size = int(max_size)
73
+
71
74
  # 如果输入是 bytes,转换为 BytesIO
72
75
  if isinstance(image_data, bytes):
73
76
  image_data = BytesIO(image_data)
74
-
77
+
75
78
  img = Image.open(image_data)
76
-
79
+
77
80
  # 转换为 RGB 模式(如果需要)
78
81
  if img.mode in ('RGBA', 'P'):
79
82
  img = img.convert('RGB')
80
-
83
+
81
84
  # 计算新尺寸,保持宽高比
82
85
  ratio = max_size / max(img.size)
83
86
  if ratio < 1:
@@ -9,6 +9,7 @@ import logging
9
9
  import base64
10
10
  import io
11
11
  import time
12
+ import asyncio
12
13
  from typing import Dict, Any, List, Union, Optional, BinaryIO
13
14
  from PIL import Image
14
15
 
@@ -36,7 +37,7 @@ class ISAVisionService(BaseVisionService):
36
37
  def __init__(self,
37
38
  modal_app_id: str = "ap-VlHUQoiPUdy9cgrHSfG7Fk",
38
39
  modal_app_name: str = "isa-vision-ui-optimized",
39
- timeout: int = 30):
40
+ timeout: int = 60):
40
41
  """
41
42
  初始化ISA Vision服务
42
43
 
@@ -77,6 +78,31 @@ class ISAVisionService(BaseVisionService):
77
78
  self.request_count = 0
78
79
  self.total_cost = 0.0
79
80
 
81
+ # 性能优化 - 预热连接(延迟初始化)
82
+ self._connection_warmed = False
83
+
84
+ # 简单缓存机制(可选)
85
+ self._result_cache = {}
86
+ self._cache_max_size = 100
87
+
88
+ async def _warm_connection(self):
89
+ """预热Modal连接,减少首次调用延迟"""
90
+ if self._connection_warmed or not self.modal_app:
91
+ return
92
+
93
+ try:
94
+ logger.info("Warming up Modal connection...")
95
+ # 尝试获取服务状态来预热连接
96
+ if hasattr(self.modal_app, 'list_functions'):
97
+ await asyncio.wait_for(
98
+ asyncio.to_thread(self.modal_app.list_functions),
99
+ timeout=10
100
+ )
101
+ self._connection_warmed = True
102
+ logger.info("✅ Modal connection warmed up")
103
+ except Exception as e:
104
+ logger.warning(f"Failed to warm up connection: {e}")
105
+
80
106
  async def analyze_image(
81
107
  self,
82
108
  image: Union[str, BinaryIO],
@@ -154,6 +180,9 @@ class ISAVisionService(BaseVisionService):
154
180
  'error': 'Modal app or service not available'
155
181
  }
156
182
 
183
+ # 预热连接以减少延迟
184
+ await self._warm_connection()
185
+
157
186
  # 准备图像数据
158
187
  image_b64 = await self._prepare_image_base64(image)
159
188
 
@@ -208,11 +237,22 @@ class ISAVisionService(BaseVisionService):
208
237
 
209
238
  # 创建实例并调用优化方法(快速模式,无字幕)
210
239
  instance = OptimizedUIDetectionService()
211
- result = instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False)
240
+ # 使用超时控制Modal调用
241
+ result = await asyncio.wait_for(
242
+ instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False),
243
+ timeout=self.timeout
244
+ )
212
245
 
213
246
  logger.info("✅ Modal SDK call successful")
214
247
  return result
215
248
 
249
+ except asyncio.TimeoutError:
250
+ logger.error(f"Modal SDK call timed out after {self.timeout} seconds")
251
+ return {
252
+ 'success': False,
253
+ 'error': f'Modal service timeout after {self.timeout} seconds',
254
+ 'timeout': True
255
+ }
216
256
  except Exception as e:
217
257
  logger.error(f"Modal SDK call failed: {e}")
218
258
  return {
@@ -316,11 +356,22 @@ class ISAVisionService(BaseVisionService):
316
356
 
317
357
  # 创建实例并调用方法
318
358
  instance = SuryaOCRService()
319
- result = instance.extract_text.remote(image_b64, languages)
359
+ # 使用超时控制OCR调用
360
+ result = await asyncio.wait_for(
361
+ instance.extract_text.remote(image_b64, languages),
362
+ timeout=self.timeout
363
+ )
320
364
 
321
365
  logger.info("✅ OCR service call successful")
322
366
  return result
323
367
 
368
+ except asyncio.TimeoutError:
369
+ logger.error(f"OCR service call timed out after {self.timeout} seconds")
370
+ return {
371
+ 'success': False,
372
+ 'error': f'OCR service timeout after {self.timeout} seconds',
373
+ 'timeout': True
374
+ }
324
375
  except Exception as e:
325
376
  logger.error(f"OCR service call failed: {e}")
326
377
  return {
@@ -499,7 +550,7 @@ class ISAVisionService(BaseVisionService):
499
550
  """准备base64编码的图像"""
500
551
  if isinstance(image, str):
501
552
  # Check if it's already base64 encoded
502
- if image.startswith('data:image') or len(image) > 1000:
553
+ if image.startswith('data:image') or (not image.startswith('http') and len(image) > 1000):
503
554
  # Likely already base64
504
555
  if image.startswith('data:image'):
505
556
  # Extract base64 part
@@ -507,6 +558,16 @@ class ISAVisionService(BaseVisionService):
507
558
  else:
508
559
  # Assume it's pure base64
509
560
  return image
561
+ elif image.startswith('http://') or image.startswith('https://'):
562
+ # URL - download the image
563
+ import aiohttp
564
+ async with aiohttp.ClientSession() as session:
565
+ async with session.get(image) as response:
566
+ if response.status == 200:
567
+ image_data = await response.read()
568
+ return base64.b64encode(image_data).decode('utf-8')
569
+ else:
570
+ raise ValueError(f"Failed to download image from URL: {response.status}")
510
571
  else:
511
572
  # File path
512
573
  with open(image, 'rb') as f:
@@ -92,12 +92,21 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
92
92
  }
93
93
  ]
94
94
 
95
- response = await self._client.chat.completions.create( # type: ignore
96
- model=self.model_name,
97
- messages=messages, # type: ignore
98
- max_tokens=max_tokens,
99
- temperature=self.temperature
100
- )
95
+ # Use max_completion_tokens for newer models like gpt-4o-mini
96
+ completion_params = {
97
+ "model": self.model_name,
98
+ "messages": messages, # type: ignore
99
+ "temperature": self.temperature
100
+ }
101
+
102
+ # Check if model uses new parameter name
103
+ # All newer models (gpt-4o, gpt-4.1, o1, etc.) use max_completion_tokens
104
+ if any(prefix in self.model_name for prefix in ["gpt-4o", "gpt-4.1", "o1"]):
105
+ completion_params["max_completion_tokens"] = max_tokens
106
+ else:
107
+ completion_params["max_tokens"] = max_tokens
108
+
109
+ response = await self._client.chat.completions.create(**completion_params) # type: ignore
101
110
 
102
111
  # Track usage for billing
103
112
  if response.usage:
@@ -162,7 +171,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
162
171
  图像描述 - 使用专门提示词
163
172
  """
164
173
  prompt = self.get_task_prompt("describe", detail_level=detail_level)
165
- return await self.analyze_image(image, prompt)
174
+ return await self.analyze_image(image, prompt, max_tokens=1000)
166
175
 
167
176
  async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
168
177
  """
@@ -170,7 +179,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
170
179
  """
171
180
  prompt = self.get_task_prompt("extract_text")
172
181
 
173
- return await self.analyze_image(image, prompt)
182
+ return await self.analyze_image(image, prompt, max_tokens=1000)
174
183
 
175
184
  async def detect_objects(
176
185
  self,
@@ -182,7 +191,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
182
191
  """
183
192
  prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
184
193
 
185
- return await self.analyze_image(image, prompt)
194
+ return await self.analyze_image(image, prompt, max_tokens=1000)
186
195
 
187
196
  async def detect_ui_elements(
188
197
  self,
@@ -195,7 +204,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
195
204
  """
196
205
  prompt = self.get_task_prompt("detect_ui_elements", element_types=element_types, confidence_threshold=confidence_threshold)
197
206
 
198
- return await self.analyze_image(image, prompt)
207
+ return await self.analyze_image(image, prompt, max_tokens=1000)
199
208
 
200
209
  async def detect_document_elements(
201
210
  self,