PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/inference/services/llm/openai_llm_service.py CHANGED Viewed

@@ -20,6 +20,8 @@ class OpenAILLMService(BaseLLMService):
         # Check if this is an O-series reasoning model
         self.is_reasoning_model = model_name.startswith("o4-") or model_name.startswith("o3-")
+        self.uses_completion_tokens = self.is_reasoning_model or model_name.startswith("gpt-5")
+        self.requires_default_temperature = self.is_reasoning_model or model_name.startswith("gpt-5")
         self.supports_deep_research = "deep-search" in model_name or "deep-research" in model_name
         # Get configuration from centralized config manager
@@ -36,7 +38,9 @@ class OpenAILLMService(BaseLLMService):
             self.client = AsyncOpenAI(
                 api_key=provider_config["api_key"],
                 base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
-                organization=provider_config.get("organization")
+                organization=provider_config.get("organization"),
+                timeout=10.0,  # 10 second timeout for first token (much faster than 600s default)
+                max_retries=2  # Retry on timeout
             )
             logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
@@ -70,6 +74,8 @@ class OpenAILLMService(BaseLLMService):
         # Copy OpenAI-specific attributes
         bound_service.is_reasoning_model = self.is_reasoning_model
+        bound_service.uses_completion_tokens = self.uses_completion_tokens
+        bound_service.requires_default_temperature = self.requires_default_temperature
         bound_service.supports_deep_research = self.supports_deep_research
         # Copy base class attributes
@@ -103,7 +109,7 @@ class OpenAILLMService(BaseLLMService):
         return bound_service
-    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
+    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
         """
         True streaming method - yields tokens one by one as they arrive
@@ -121,19 +127,19 @@ class OpenAILLMService(BaseLLMService):
             if use_responses_api:
                 logger.info(f"Using Responses API streaming for {self.model_name}")
                 # Use Responses API streaming
-                async for chunk in self._astream_responses_api(input_data, show_reasoning):
+                async for chunk in self._astream_responses_api(input_data, show_reasoning, **extra_kwargs):
                     yield chunk
             else:
                 logger.debug(f"Using Chat Completions API streaming for {self.model_name}")
                 # Use Chat Completions API streaming
-                async for chunk in self._astream_chat_completions_api(input_data):
+                async for chunk in self._astream_chat_completions_api(input_data, **extra_kwargs):
                     yield chunk
         except Exception as e:
             logger.error(f"Error in astream: {e}")
             raise
-    async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
+    async def _astream_responses_api(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
         """Stream using Responses API for reasoning models and deep research models"""
         try:
             # Use adapter manager to prepare messages
@@ -228,7 +234,7 @@ class OpenAILLMService(BaseLLMService):
             logger.error(f"Error in _astream_responses_api: {e}")
             raise
-    async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
+    async def _astream_chat_completions_api(self, input_data: Union[str, List[Dict[str, str]], Any], **extra_kwargs) -> AsyncGenerator[Union[str, Dict[str, Any]], None]:
         """Stream using Chat Completions API for standard models"""
         try:
             # Use adapter manager to prepare messages
@@ -242,13 +248,13 @@ class OpenAILLMService(BaseLLMService):
                 "stream": True
             }
-            # O4 models only support temperature=1 (default)
-            if not self.is_reasoning_model:
+            # O4 and GPT-5 models only support temperature=1 (default)
+            if not self.requires_default_temperature:
                 kwargs["temperature"] = provider_config.get("temperature", 0.7)
-            # O4 models use max_completion_tokens instead of max_tokens
+            # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
             max_tokens_value = provider_config.get("max_tokens", 1024)
-            if self.is_reasoning_model:
+            if self.uses_completion_tokens:
                 kwargs["max_completion_tokens"] = max_tokens_value
             else:
                 kwargs["max_tokens"] = max_tokens_value
@@ -259,6 +265,11 @@ class OpenAILLMService(BaseLLMService):
                 kwargs["tools"] = tool_schemas
                 kwargs["tool_choice"] = "auto"
+            # Add response_format if specified (for JSON mode)
+            if 'response_format' in extra_kwargs:
+                kwargs['response_format'] = extra_kwargs['response_format']
+                logger.debug(f"Using response_format in streaming: {extra_kwargs['response_format']}")
             # Stream tokens and detect tool calls
             content_chunks = []
             tool_calls_accumulator = {}  # Track complete tool calls by ID
@@ -360,13 +371,14 @@ class OpenAILLMService(BaseLLMService):
             logger.error(f"Error in _astream_chat_completions_api: {e}")
             raise
-    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False) -> Union[str, Any]:
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], show_reasoning: bool = False, **extra_kwargs) -> Union[str, Any]:
         """
         Unified invoke method for all input types
         Args:
             input_data: Input messages or text
             show_reasoning: If True and model supports it, show reasoning process using Responses API
+            **extra_kwargs: Additional parameters to pass to the API (e.g., response_format)
         """
         try:
             # Use adapter manager to prepare messages
@@ -385,13 +397,13 @@ class OpenAILLMService(BaseLLMService):
                 "messages": messages
             }
-            # O4 models only support temperature=1 (default)
-            if not self.is_reasoning_model:
+            # O4 and GPT-5 models only support temperature=1 (default)
+            if not self.requires_default_temperature:
                 kwargs["temperature"] = provider_config.get("temperature", 0.7)
-            # O4 models use max_completion_tokens instead of max_tokens
+            # O4 and GPT-5 models use max_completion_tokens instead of max_tokens
             max_tokens_value = provider_config.get("max_tokens", 1024)
-            if self.is_reasoning_model:
+            if self.uses_completion_tokens:
                 kwargs["max_completion_tokens"] = max_tokens_value
             else:
                 kwargs["max_tokens"] = max_tokens_value
@@ -403,11 +415,16 @@ class OpenAILLMService(BaseLLMService):
                 if not use_responses_api:  # Responses API handles tool choice differently
                     kwargs["tool_choice"] = "auto"
+            # Add response_format if specified (for JSON mode)
+            if 'response_format' in extra_kwargs:
+                kwargs['response_format'] = extra_kwargs['response_format']
+                logger.debug(f"Using response_format: {extra_kwargs['response_format']}")
             # Handle streaming vs non-streaming
             if self.streaming:
                 # TRUE STREAMING MODE - collect all chunks from the stream
                 content_chunks = []
-                async for token in self.astream(input_data, show_reasoning=show_reasoning):
+                async for token in self.astream(input_data, show_reasoning=show_reasoning, **extra_kwargs):
                     if isinstance(token, str):
                         content_chunks.append(token)
                     elif isinstance(token, dict) and "result" in token:

isa_model/inference/services/llm/yyds_llm_service.py CHANGED Viewed

@@ -63,16 +63,19 @@ class YydsLLMService(BaseLLMService):
         return bound_service
-    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
+    async def astream(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> AsyncGenerator[str, None]:
         """
         True streaming method - yields tokens one by one as they arrive
         Args:
             input_data: Same as ainvoke
+            **kwargs: Additional parameters (will filter out unsupported ones)
         Yields:
             Individual tokens as they arrive from the API
         """
+        # Remove parameters that yyds doesn't support
+        kwargs.pop('show_reasoning', None)  # OpenAI-specific parameter
         try:
             # Use adapter manager to prepare messages
             messages = self._prepare_messages(input_data)
@@ -115,8 +118,11 @@ class YydsLLMService(BaseLLMService):
             logger.error(f"Error in astream: {e}")
             raise
-    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any], **kwargs) -> Union[str, Any]:
         """Unified invoke method for all input types"""
+        # Remove parameters that yyds doesn't support
+        kwargs.pop('show_reasoning', None)  # OpenAI-specific parameter
+        kwargs.pop('task', None)  # Handled internally
         try:
             # Use adapter manager to prepare messages
             messages = self._prepare_messages(input_data)

isa_model/inference/services/vision/__init__.py CHANGED Viewed

@@ -31,6 +31,21 @@ except ImportError:
     OllamaVisionService = None
     OLLAMA_VISION_AVAILABLE = False
+# Computer Vision specialized services
+try:
+    from .vgg16_vision_service import VGG16VisionService
+    VGG16_VISION_AVAILABLE = True
+except ImportError:
+    VGG16VisionService = None
+    VGG16_VISION_AVAILABLE = False
+try:
+    from .blip_vision_service import BLIPVisionService
+    BLIP_VISION_AVAILABLE = True
+except ImportError:
+    BLIPVisionService = None
+    BLIP_VISION_AVAILABLE = False
 __all__ = [
     "BaseVisionService",
     "OpenAIVisionService",
@@ -43,4 +58,10 @@ if ISA_VISION_AVAILABLE:
     __all__.append("ISAVisionService")
 if OLLAMA_VISION_AVAILABLE:
-    __all__.append("OllamaVisionService")
+    __all__.append("OllamaVisionService")
+if VGG16_VISION_AVAILABLE:
+    __all__.append("VGG16VisionService")
+if BLIP_VISION_AVAILABLE:
+    __all__.append("BLIPVisionService")

isa_model/inference/services/vision/helpers/image_utils.py CHANGED Viewed

@@ -59,25 +59,28 @@ def get_image_data(image: Union[str, BinaryIO]) -> bytes:
 def compress_image(image_data: Union[bytes, BytesIO], max_size: int = 1024) -> bytes:
     """压缩图片以减小大小
     Args:
         image_data: 图片数据，可以是 bytes 或 BytesIO
         max_size: 最大尺寸（像素）
     Returns:
         bytes: 压缩后的图片数据
     """
     try:
+        # Ensure max_size is int (type safety)
+        max_size = int(max_size)
         # 如果输入是 bytes，转换为 BytesIO
         if isinstance(image_data, bytes):
             image_data = BytesIO(image_data)
         img = Image.open(image_data)
         # 转换为 RGB 模式（如果需要）
         if img.mode in ('RGBA', 'P'):
             img = img.convert('RGB')
         # 计算新尺寸，保持宽高比
         ratio = max_size / max(img.size)
         if ratio < 1:

isa_model/inference/services/vision/isa_vision_service.py CHANGED Viewed

@@ -9,6 +9,7 @@ import logging
 import base64
 import io
 import time
+import asyncio
 from typing import Dict, Any, List, Union, Optional, BinaryIO
 from PIL import Image
@@ -36,7 +37,7 @@ class ISAVisionService(BaseVisionService):
     def __init__(self,
                  modal_app_id: str = "ap-VlHUQoiPUdy9cgrHSfG7Fk",
                  modal_app_name: str = "isa-vision-ui-optimized",
-                 timeout: int = 30):
+                 timeout: int = 60):
         """
         初始化ISA Vision服务
@@ -77,6 +78,31 @@ class ISAVisionService(BaseVisionService):
         self.request_count = 0
         self.total_cost = 0.0
+        # 性能优化 - 预热连接（延迟初始化）
+        self._connection_warmed = False
+        # 简单缓存机制（可选）
+        self._result_cache = {}
+        self._cache_max_size = 100
+    async def _warm_connection(self):
+        """预热Modal连接，减少首次调用延迟"""
+        if self._connection_warmed or not self.modal_app:
+            return
+        try:
+            logger.info("Warming up Modal connection...")
+            # 尝试获取服务状态来预热连接
+            if hasattr(self.modal_app, 'list_functions'):
+                await asyncio.wait_for(
+                    asyncio.to_thread(self.modal_app.list_functions),
+                    timeout=10
+                )
+            self._connection_warmed = True
+            logger.info("✅ Modal connection warmed up")
+        except Exception as e:
+            logger.warning(f"Failed to warm up connection: {e}")
     async def analyze_image(
         self,
         image: Union[str, BinaryIO],
@@ -154,6 +180,9 @@ class ISAVisionService(BaseVisionService):
                     'error': 'Modal app or service not available'
                 }
+            # 预热连接以减少延迟
+            await self._warm_connection()
             # 准备图像数据
             image_b64 = await self._prepare_image_base64(image)
@@ -208,11 +237,22 @@ class ISAVisionService(BaseVisionService):
             # 创建实例并调用优化方法（快速模式，无字幕）
             instance = OptimizedUIDetectionService()
-            result = instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False)
+            # 使用超时控制Modal调用
+            result = await asyncio.wait_for(
+                instance.detect_ui_elements_fast.remote(image_b64, enable_captions=False),
+                timeout=self.timeout
+            )
             logger.info("✅ Modal SDK call successful")
             return result
+        except asyncio.TimeoutError:
+            logger.error(f"Modal SDK call timed out after {self.timeout} seconds")
+            return {
+                'success': False,
+                'error': f'Modal service timeout after {self.timeout} seconds',
+                'timeout': True
+            }
         except Exception as e:
             logger.error(f"Modal SDK call failed: {e}")
             return {
@@ -316,11 +356,22 @@ class ISAVisionService(BaseVisionService):
             # 创建实例并调用方法
             instance = SuryaOCRService()
-            result = instance.extract_text.remote(image_b64, languages)
+            # 使用超时控制OCR调用
+            result = await asyncio.wait_for(
+                instance.extract_text.remote(image_b64, languages),
+                timeout=self.timeout
+            )
             logger.info("✅ OCR service call successful")
             return result
+        except asyncio.TimeoutError:
+            logger.error(f"OCR service call timed out after {self.timeout} seconds")
+            return {
+                'success': False,
+                'error': f'OCR service timeout after {self.timeout} seconds',
+                'timeout': True
+            }
         except Exception as e:
             logger.error(f"OCR service call failed: {e}")
             return {
@@ -499,7 +550,7 @@ class ISAVisionService(BaseVisionService):
         """准备base64编码的图像"""
         if isinstance(image, str):
             # Check if it's already base64 encoded
-            if image.startswith('data:image') or len(image) > 1000:
+            if image.startswith('data:image') or (not image.startswith('http') and len(image) > 1000):
                 # Likely already base64
                 if image.startswith('data:image'):
                     # Extract base64 part
@@ -507,6 +558,16 @@ class ISAVisionService(BaseVisionService):
                 else:
                     # Assume it's pure base64
                     return image
+            elif image.startswith('http://') or image.startswith('https://'):
+                # URL - download the image
+                import aiohttp
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(image) as response:
+                        if response.status == 200:
+                            image_data = await response.read()
+                            return base64.b64encode(image_data).decode('utf-8')
+                        else:
+                            raise ValueError(f"Failed to download image from URL: {response.status}")
             else:
                 # File path
                 with open(image, 'rb') as f:

isa_model/inference/services/vision/openai_vision_service.py CHANGED Viewed

@@ -92,12 +92,21 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
                 }
             ]
-            response = await self._client.chat.completions.create(  # type: ignore
-                model=self.model_name,
-                messages=messages,  # type: ignore
-                max_tokens=max_tokens,
-                temperature=self.temperature
-            )
+            # Use max_completion_tokens for newer models like gpt-4o-mini
+            completion_params = {
+                "model": self.model_name,
+                "messages": messages,  # type: ignore
+                "temperature": self.temperature
+            }
+            # Check if model uses new parameter name
+            # All newer models (gpt-4o, gpt-4.1, o1, etc.) use max_completion_tokens
+            if any(prefix in self.model_name for prefix in ["gpt-4o", "gpt-4.1", "o1"]):
+                completion_params["max_completion_tokens"] = max_tokens
+            else:
+                completion_params["max_tokens"] = max_tokens
+            response = await self._client.chat.completions.create(**completion_params)  # type: ignore
             # Track usage for billing
             if response.usage:
@@ -162,7 +171,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
         图像描述 - 使用专门提示词
         """
         prompt = self.get_task_prompt("describe", detail_level=detail_level)
-        return await self.analyze_image(image, prompt)
+        return await self.analyze_image(image, prompt, max_tokens=1000)
     async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
         """
@@ -170,7 +179,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
         """
         prompt = self.get_task_prompt("extract_text")
-        return await self.analyze_image(image, prompt)
+        return await self.analyze_image(image, prompt, max_tokens=1000)
     async def detect_objects(
         self,
@@ -182,7 +191,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
         """
         prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
-        return await self.analyze_image(image, prompt)
+        return await self.analyze_image(image, prompt, max_tokens=1000)
     async def detect_ui_elements(
         self,
@@ -195,7 +204,7 @@ class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
         """
         prompt = self.get_task_prompt("detect_ui_elements", element_types=element_types, confidence_threshold=confidence_threshold)
-        return await self.analyze_image(image, prompt)
+        return await self.analyze_image(image, prompt, max_tokens=1000)
     async def detect_document_elements(
         self,

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl