isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,8 @@ class OpenAILLMService(BaseLLMService):
|
|
20
20
|
|
21
21
|
# Check if this is an O-series reasoning model
|
22
22
|
self.is_reasoning_model = model_name.startswith("o4-") or model_name.startswith("o3-")
|
23
|
+
self.uses_completion_tokens = self.is_reasoning_model or model_name.startswith("gpt-5")
|
24
|
+
self.requires_default_temperature = self.is_reasoning_model or model_name.startswith("gpt-5")
|
23
25
|
self.supports_deep_research = "deep-search" in model_name or "deep-research" in model_name
|
24
26
|
|
25
27
|
# Get configuration from centralized config manager
|
@@ -36,7 +38,9 @@ class OpenAILLMService(BaseLLMService):
|
|
36
38
|
self.client = AsyncOpenAI(
|
37
39
|
api_key=provider_config["api_key"],
|
38
40
|
base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
|
39
|
-
organization=provider_config.get("organization")
|
41
|
+
organization=provider_config.get("organization"),
|
42
|
+
timeout=10.0, # 10 second timeout for first token (much faster than 600s default)
|
43
|
+
max_retries=2 # Retry on timeout
|
40
44
|
)
|
41
45
|
|
42
46
|
logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
|
@@ -70,6 +74,8 @@ class OpenAILLMService(BaseLLMService):
|
|
70
74
|
|
71
75
|
# Copy OpenAI-specific attributes
|
72
76
|
bound_service.is_reasoning_model = self.is_reasoning_model
|
77
|
+
bound_service.uses_completion_tokens = self.uses_completion_tokens
|
78
|
+
bound_service.requires_default_temperature = self.requires_default_temperature
|
73
79
|
bound_service.supports_deep_research = self.supports_deep_research
|
74
80
|
|
75
81
|
# Copy base class attributes
|
@@ -103,7 +109,7 @@ class OpenAILLMService(BaseLLMService):
|
|
103
109
|
|
104
110
|
return bound_service
|
105
111
|
|
106
|
-
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
112
|
+
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
107
113
|
"""
|
108
114
|
True streaming method - yields tokens one by one as they arrive
|
109
115
|
|
@@ -121,19 +127,19 @@ class OpenAILLMService(BaseLLMService):
|
|
121
127
|
if use_responses_api:
|
122
128
|
logger.info(f"Using Responses API streaming for {self.model_name}")
|
123
129
|
# Use Responses API streaming
|
124
|
-
async for chunk in self._astream_responses_api(input_data, show_reasoning):
|
130
|
+
async for chunk in self._astream_responses_api(input_data, show_reasoning, **extra_kwargs):
|
125
131
|
yield chunk
|
126
132
|
else:
|
127
133
|
logger.debug(f"Using Chat Completions API streaming for {self.model_name}")
|
128
134
|
# Use Chat Completions API streaming
|
129
|
-
async for chunk in self._astream_chat_completions_api(input_data):
|
135
|
+
async for chunk in self._astream_chat_completions_api(input_data, **extra_kwargs):
|
130
136
|
yield chunk
|
131
137
|
|
132
138
|
except Exception as e:
|
133
139
|
logger.error(f"Error in astream: {e}")
|
134
140
|
raise
|
135
141
|
|
136
|
-
async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
142
|
+
async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
137
143
|
"""Stream using Responses API for reasoning models and deep research models"""
|
138
144
|
try:
|
139
145
|
# Use adapter manager to prepare messages
|
@@ -228,7 +234,7 @@ class OpenAILLMService(BaseLLMService):
|
|
228
234
|
logger.error(f"Error in _astream_responses_api: {e}")
|
229
235
|
raise
|
230
236
|
|
231
|
-
async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
237
|
+
async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any], **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
|
232
238
|
"""Stream using Chat Completions API for standard models"""
|
233
239
|
try:
|
234
240
|
# Use adapter manager to prepare messages
|
@@ -242,13 +248,13 @@ class OpenAILLMService(BaseLLMService):
|
|
242
248
|
"stream": True
|
243
249
|
}
|
244
250
|
|
245
|
-
# O4 models only support temperature=1 (default)
|
246
|
-
if not self.
|
251
|
+
# O4 and GPT-5 models only support temperature=1 (default)
|
252
|
+
if not self.requires_default_temperature:
|
247
253
|
kwargs["temperature"] = provider_config.get("temperature", 0.7)
|
248
254
|
|
249
|
-
# O4 models use max_completion_tokens instead of max_tokens
|
255
|
+
# O4 and GPT-5 models use max_completion_tokens instead of max_tokens
|
250
256
|
max_tokens_value = provider_config.get("max_tokens", 1024)
|
251
|
-
if self.
|
257
|
+
if self.uses_completion_tokens:
|
252
258
|
kwargs["max_completion_tokens"] = max_tokens_value
|
253
259
|
else:
|
254
260
|
kwargs["max_tokens"] = max_tokens_value
|
@@ -259,6 +265,11 @@ class OpenAILLMService(BaseLLMService):
|
|
259
265
|
kwargs["tools"] = tool_schemas
|
260
266
|
kwargs["tool_choice"] = "auto"
|
261
267
|
|
268
|
+
# Add response_format if specified (for JSON mode)
|
269
|
+
if 'response_format' in extra_kwargs:
|
270
|
+
kwargs['response_format'] = extra_kwargs['response_format']
|
271
|
+
logger.debug(f"Using response_format in streaming: {extra_kwargs['response_format']}")
|
272
|
+
|
262
273
|
# Stream tokens and detect tool calls
|
263
274
|
content_chunks = []
|
264
275
|
tool_calls_accumulator = {} # Track complete tool calls by ID
|
@@ -360,13 +371,14 @@ class OpenAILLMService(BaseLLMService):
|
|
360
371
|
logger.error(f"Error in _astream_chat_completions_api: {e}")
|
361
372
|
raise
|
362
373
|
|
363
|
-
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
|
374
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> Union[str, Any]:
|
364
375
|
"""
|
365
376
|
Unified invoke method for all input types
|
366
377
|
|
367
378
|
Args:
|
368
379
|
input_data: Input messages or text
|
369
380
|
show_reasoning: If True and model supports it, show reasoning process using Responses API
|
381
|
+
**extra_kwargs: Additional parameters to pass to the API (e.g., response_format)
|
370
382
|
"""
|
371
383
|
try:
|
372
384
|
# Use adapter manager to prepare messages
|
@@ -385,13 +397,13 @@ class OpenAILLMService(BaseLLMService):
|
|
385
397
|
"messages": messages
|
386
398
|
}
|
387
399
|
|
388
|
-
# O4 models only support temperature=1 (default)
|
389
|
-
if not self.
|
400
|
+
# O4 and GPT-5 models only support temperature=1 (default)
|
401
|
+
if not self.requires_default_temperature:
|
390
402
|
kwargs["temperature"] = provider_config.get("temperature", 0.7)
|
391
403
|
|
392
|
-
# O4 models use max_completion_tokens instead of max_tokens
|
404
|
+
# O4 and GPT-5 models use max_completion_tokens instead of max_tokens
|
393
405
|
max_tokens_value = provider_config.get("max_tokens", 1024)
|
394
|
-
if self.
|
406
|
+
if self.uses_completion_tokens:
|
395
407
|
kwargs["max_completion_tokens"] = max_tokens_value
|
396
408
|
else:
|
397
409
|
kwargs["max_tokens"] = max_tokens_value
|
@@ -403,11 +415,16 @@ class OpenAILLMService(BaseLLMService):
|
|
403
415
|
if not use_responses_api: # Responses API handles tool choice differently
|
404
416
|
kwargs["tool_choice"] = "auto"
|
405
417
|
|
418
|
+
# Add response_format if specified (for JSON mode)
|
419
|
+
if 'response_format' in extra_kwargs:
|
420
|
+
kwargs['response_format'] = extra_kwargs['response_format']
|
421
|
+
logger.debug(f"Using response_format: {extra_kwargs['response_format']}")
|
422
|
+
|
406
423
|
# Handle streaming vs non-streaming
|
407
424
|
if self.streaming:
|
408
425
|
# TRUE STREAMING MODE - collect all chunks from the stream
|
409
426
|
content_chunks = []
|
410
|
-
async for token in self.astream(input_data, show_reasoning=show_reasoning):
|
427
|
+
async for token in self.astream(input_data, show_reasoning=show_reasoning, **extra_kwargs):
|
411
428
|
if isinstance(token, str):
|
412
429
|
content_chunks.append(token)
|
413
430
|
elif isinstance(token, dict) and "result" in token:
|
@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
|
|
63
63
|
|
64
64
|
return bound_service
|
65
65
|
|
66
|
-
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
|
66
|
+
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
|
67
67
|
"""
|
68
68
|
True streaming method - yields tokens one by one as they arrive
|
69
69
|
|
70
70
|
Args:
|
71
71
|
input_data: Same as ainvoke
|
72
|
+
**kwargs: Additional parameters (will filter out unsupported ones)
|
72
73
|
|
73
74
|
Yields:
|
74
75
|
Individual tokens as they arrive from the API
|
75
76
|
"""
|
77
|
+
# Remove parameters that yyds doesn't support
|
78
|
+
kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
|
76
79
|
try:
|
77
80
|
# Use adapter manager to prepare messages
|
78
81
|
messages = self._prepare_messages(input_data)
|
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
|
|
115
118
|
logger.error(f"Error in astream: {e}")
|
116
119
|
raise
|
117
120
|
|
118
|
-
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
121
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
|
119
122
|
"""Unified invoke method for all input types"""
|
123
|
+
# Remove parameters that yyds doesn't support
|
124
|
+
kwargs.pop('show_reasoning', None) # OpenAI-specific parameter
|
125
|
+
kwargs.pop('task', None) # Handled internally
|
120
126
|
try:
|
121
127
|
# Use adapter manager to prepare messages
|
122
128
|
messages = self._prepare_messages(input_data)
|
@@ -31,6 +31,21 @@ except ImportError:
|
|
31
31
|
OllamaVisionService = None
|
32
32
|
OLLAMA_VISION_AVAILABLE = False
|
33
33
|
|
34
|
+
# Computer Vision specialized services
|
35
|
+
try:
|
36
|
+
from .vgg16_vision_service import VGG16VisionService
|
37
|
+
VGG16_VISION_AVAILABLE = True
|
38
|
+
except ImportError:
|
39
|
+
VGG16VisionService = None
|
40
|
+
VGG16_VISION_AVAILABLE = False
|
41
|
+
|
42
|
+
try:
|
43
|
+
from .blip_vision_service import BLIPVisionService
|
44
|
+
BLIP_VISION_AVAILABLE = True
|
45
|
+
except ImportError:
|
46
|
+
BLIPVisionService = None
|
47
|
+
BLIP_VISION_AVAILABLE = False
|
48
|
+
|
34
49
|
__all__ = [
|
35
50
|
"BaseVisionService",
|
36
51
|
"OpenAIVisionService",
|
@@ -43,4 +58,10 @@ if ISA_VISION_AVAILABLE:
|
|
43
58
|
__all__.append("ISAVisionService")
|
44
59
|
|
45
60
|
if OLLAMA_VISION_AVAILABLE:
|
46
|
-
__all__.append("OllamaVisionService")
|
61
|
+
__all__.append("OllamaVisionService")
|
62
|
+
|
63
|
+
if VGG16_VISION_AVAILABLE:
|
64
|
+
__all__.append("VGG16VisionService")
|
65
|
+
|
66
|
+
if BLIP_VISION_AVAILABLE:
|
67
|
+
__all__.append("BLIPVisionService")
|
@@ -59,25 +59,28 @@ def get_image_data(image: Union[str, BinaryIO]) -> bytes:
|
|
59
59
|
|
60
60
|
def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
|
61
61
|
"""压缩图片以减小大小
|
62
|
-
|
62
|
+
|
63
63
|
Args:
|
64
64
|
image_data: 图片数据,可以是 bytes 或 BytesIO
|
65
65
|
max_size: 最大尺寸(像素)
|
66
|
-
|
66
|
+
|
67
67
|
Returns:
|
68
68
|
bytes: 压缩后的图片数据
|
69
69
|
"""
|
70
70
|
try:
|
71
|
+
# Ensure max_size is int (type safety)
|
72
|
+
max_size = int(max_size)
|
73
|
+
|
71
74
|
# 如果输入是 bytes,转换为 BytesIO
|
72
75
|
if isinstance(image_data, bytes):
|
73
76
|
image_data = BytesIO(image_data)
|
74
|
-
|
77
|
+
|
75
78
|
img = Image.open(image_data)
|
76
|
-
|
79
|
+
|
77
80
|
# 转换为 RGB 模式(如果需要)
|
78
81
|
if img.mode in ('RGBA', 'P'):
|
79
82
|
img = img.convert('RGB')
|
80
|
-
|
83
|
+
|
81
84
|
# 计算新尺寸,保持宽高比
|
82
85
|
ratio = max_size / max(img.size)
|
83
86
|
if ratio < 1:
|
@@ -9,6 +9,7 @@ import logging
|
|
9
9
|
import base64
|
10
10
|
import io
|
11
11
|
import time
|
12
|
+
import asyncio
|
12
13
|
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
13
14
|
from PIL import Image
|
14
15
|
|
@@ -36,7 +37,7 @@ class ISAVisionService(BaseVisionService):
|
|
36
37
|
def __init__(self,
|
37
38
|
modal_app_id: str = "ap-VlHUQoiPUdy9cgrHSfG7Fk",
|
38
39
|
modal_app_name: str = "isa-vision-ui-optimized",
|
39
|
-
timeout: int =
|
40
|
+
timeout: int = 60):
|
40
41
|
"""
|
41
42
|
初始化ISA Vision服务
|
42
43
|
|
@@ -77,6 +78,31 @@ class ISAVisionService(BaseVisionService):
|
|
77
78
|
self.request_count = 0
|
78
79
|
self.total_cost = 0.0
|
79
80
|
|
81
|
+
# 性能优化 - 预热连接(延迟初始化)
|
82
|
+
self._connection_warmed = False
|
83
|
+
|
84
|
+
# 简单缓存机制(可选)
|
85
|
+
self._result_cache = {}
|
86
|
+
self._cache_max_size = 100
|
87
|
+
|
88
|
+
async def _warm_connection(self):
|
89
|
+
"""预热Modal连接,减少首次调用延迟"""
|
90
|
+
if self._connection_warmed or not self.modal_app:
|
91
|
+
return
|
92
|
+
|
93
|
+
try:
|
94
|
+
logger.info("Warming up Modal connection...")
|
95
|
+
# 尝试获取服务状态来预热连接
|
96
|
+
if hasattr(self.modal_app, 'list_functions'):
|
97
|
+
await asyncio.wait_for(
|
98
|
+
asyncio.to_thread(self.modal_app.list_functions),
|
99
|
+
timeout=10
|
100
|
+
)
|
101
|
+
self._connection_warmed = True
|
102
|
+
logger.info("✅ Modal connection warmed up")
|
103
|
+
except Exception as e:
|
104
|
+
logger.warning(f"Failed to warm up connection: {e}")
|
105
|
+
|
80
106
|
async def analyze_image(
|
81
107
|
self,
|
82
108
|
image: Union[str, BinaryIO],
|
@@ -154,6 +180,9 @@ class ISAVisionService(BaseVisionService):
|
|
154
180
|
'error': 'Modal app or service not available'
|
155
181
|
}
|
156
182
|
|
183
|
+
# 预热连接以减少延迟
|
184
|
+
await self._warm_connection()
|
185
|
+
|
157
186
|
# 准备图像数据
|
158
187
|
image_b64 = await self._prepare_image_base64(image)
|
159
188
|
|
@@ -208,11 +237,22 @@ class ISAVisionService(BaseVisionService):
|
|
208
237
|
|
209
238
|
# 创建实例并调用优化方法(快速模式,无字幕)
|
210
239
|
instance = OptimizedUIDetectionService()
|
211
|
-
|
240
|
+
# 使用超时控制Modal调用
|
241
|
+
result = await asyncio.wait_for(
|
242
|
+
instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False),
|
243
|
+
timeout=self.timeout
|
244
|
+
)
|
212
245
|
|
213
246
|
logger.info("✅ Modal SDK call successful")
|
214
247
|
return result
|
215
248
|
|
249
|
+
except asyncio.TimeoutError:
|
250
|
+
logger.error(f"Modal SDK call timed out after {self.timeout} seconds")
|
251
|
+
return {
|
252
|
+
'success': False,
|
253
|
+
'error': f'Modal service timeout after {self.timeout} seconds',
|
254
|
+
'timeout': True
|
255
|
+
}
|
216
256
|
except Exception as e:
|
217
257
|
logger.error(f"Modal SDK call failed: {e}")
|
218
258
|
return {
|
@@ -316,11 +356,22 @@ class ISAVisionService(BaseVisionService):
|
|
316
356
|
|
317
357
|
# 创建实例并调用方法
|
318
358
|
instance = SuryaOCRService()
|
319
|
-
|
359
|
+
# 使用超时控制OCR调用
|
360
|
+
result = await asyncio.wait_for(
|
361
|
+
instance.extract_text.remote(image_b64, languages),
|
362
|
+
timeout=self.timeout
|
363
|
+
)
|
320
364
|
|
321
365
|
logger.info("✅ OCR service call successful")
|
322
366
|
return result
|
323
367
|
|
368
|
+
except asyncio.TimeoutError:
|
369
|
+
logger.error(f"OCR service call timed out after {self.timeout} seconds")
|
370
|
+
return {
|
371
|
+
'success': False,
|
372
|
+
'error': f'OCR service timeout after {self.timeout} seconds',
|
373
|
+
'timeout': True
|
374
|
+
}
|
324
375
|
except Exception as e:
|
325
376
|
logger.error(f"OCR service call failed: {e}")
|
326
377
|
return {
|
@@ -499,7 +550,7 @@ class ISAVisionService(BaseVisionService):
|
|
499
550
|
"""准备base64编码的图像"""
|
500
551
|
if isinstance(image, str):
|
501
552
|
# Check if it's already base64 encoded
|
502
|
-
if image.startswith('data:image') or len(image) > 1000:
|
553
|
+
if image.startswith('data:image') or (not image.startswith('http') and len(image) > 1000):
|
503
554
|
# Likely already base64
|
504
555
|
if image.startswith('data:image'):
|
505
556
|
# Extract base64 part
|
@@ -507,6 +558,16 @@ class ISAVisionService(BaseVisionService):
|
|
507
558
|
else:
|
508
559
|
# Assume it's pure base64
|
509
560
|
return image
|
561
|
+
elif image.startswith('http://') or image.startswith('https://'):
|
562
|
+
# URL - download the image
|
563
|
+
import aiohttp
|
564
|
+
async with aiohttp.ClientSession() as session:
|
565
|
+
async with session.get(image) as response:
|
566
|
+
if response.status == 200:
|
567
|
+
image_data = await response.read()
|
568
|
+
return base64.b64encode(image_data).decode('utf-8')
|
569
|
+
else:
|
570
|
+
raise ValueError(f"Failed to download image from URL: {response.status}")
|
510
571
|
else:
|
511
572
|
# File path
|
512
573
|
with open(image, 'rb') as f:
|
@@ -92,12 +92,21 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
|
|
92
92
|
}
|
93
93
|
]
|
94
94
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
temperature
|
100
|
-
|
95
|
+
# Use max_completion_tokens for newer models like gpt-4o-mini
|
96
|
+
completion_params = {
|
97
|
+
"model": self.model_name,
|
98
|
+
"messages": messages, # type: ignore
|
99
|
+
"temperature": self.temperature
|
100
|
+
}
|
101
|
+
|
102
|
+
# Check if model uses new parameter name
|
103
|
+
# All newer models (gpt-4o, gpt-4.1, o1, etc.) use max_completion_tokens
|
104
|
+
if any(prefix in self.model_name for prefix in ["gpt-4o", "gpt-4.1", "o1"]):
|
105
|
+
completion_params["max_completion_tokens"] = max_tokens
|
106
|
+
else:
|
107
|
+
completion_params["max_tokens"] = max_tokens
|
108
|
+
|
109
|
+
response = await self._client.chat.completions.create(**completion_params) # type: ignore
|
101
110
|
|
102
111
|
# Track usage for billing
|
103
112
|
if response.usage:
|
@@ -162,7 +171,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
|
|
162
171
|
图像描述 - 使用专门提示词
|
163
172
|
"""
|
164
173
|
prompt = self.get_task_prompt("describe", detail_level=detail_level)
|
165
|
-
return await self.analyze_image(image, prompt)
|
174
|
+
return await self.analyze_image(image, prompt, max_tokens=1000)
|
166
175
|
|
167
176
|
async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
|
168
177
|
"""
|
@@ -170,7 +179,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
|
|
170
179
|
"""
|
171
180
|
prompt = self.get_task_prompt("extract_text")
|
172
181
|
|
173
|
-
return await self.analyze_image(image, prompt)
|
182
|
+
return await self.analyze_image(image, prompt, max_tokens=1000)
|
174
183
|
|
175
184
|
async def detect_objects(
|
176
185
|
self,
|
@@ -182,7 +191,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
|
|
182
191
|
"""
|
183
192
|
prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
|
184
193
|
|
185
|
-
return await self.analyze_image(image, prompt)
|
194
|
+
return await self.analyze_image(image, prompt, max_tokens=1000)
|
186
195
|
|
187
196
|
async def detect_ui_elements(
|
188
197
|
self,
|
@@ -195,7 +204,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
|
|
195
204
|
"""
|
196
205
|
prompt = self.get_task_prompt("detect_ui_elements", element_types=element_types, confidence_threshold=confidence_threshold)
|
197
206
|
|
198
|
-
return await self.analyze_image(image, prompt)
|
207
|
+
return await self.analyze_image(image, prompt, max_tokens=1000)
|
199
208
|
|
200
209
|
async def detect_document_elements(
|
201
210
|
self,
|