PyPI - isa-model - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

isa-model 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

isa_model/__init__.py +30 -1
isa_model/client.py +937 -0
isa_model/core/config/__init__.py +16 -0
isa_model/core/config/config_manager.py +514 -0
isa_model/core/config.py +426 -0
isa_model/core/models/model_billing_tracker.py +476 -0
isa_model/core/models/model_manager.py +399 -0
isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
isa_model/core/pricing_manager.py +426 -0
isa_model/core/services/__init__.py +19 -0
isa_model/core/services/intelligent_model_selector.py +547 -0
isa_model/core/types.py +291 -0
isa_model/deployment/__init__.py +2 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
isa_model/deployment/cloud/modal/register_models.py +321 -0
isa_model/deployment/runtime/deployed_service.py +338 -0
isa_model/deployment/services/__init__.py +9 -0
isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
isa_model/deployment/services/model_service.py +332 -0
isa_model/deployment/services/service_monitor.py +356 -0
isa_model/deployment/services/service_registry.py +527 -0
isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
isa_model/eval/__init__.py +80 -44
isa_model/eval/config/__init__.py +10 -0
isa_model/eval/config/evaluation_config.py +108 -0
isa_model/eval/evaluators/__init__.py +18 -0
isa_model/eval/evaluators/base_evaluator.py +503 -0
isa_model/eval/evaluators/llm_evaluator.py +472 -0
isa_model/eval/factory.py +417 -709
isa_model/eval/infrastructure/__init__.py +24 -0
isa_model/eval/infrastructure/experiment_tracker.py +466 -0
isa_model/eval/metrics.py +191 -21
isa_model/inference/ai_factory.py +257 -601
isa_model/inference/services/audio/base_stt_service.py +65 -1
isa_model/inference/services/audio/base_tts_service.py +75 -1
isa_model/inference/services/audio/openai_stt_service.py +189 -151
isa_model/inference/services/audio/openai_tts_service.py +12 -10
isa_model/inference/services/audio/replicate_tts_service.py +61 -56
isa_model/inference/services/base_service.py +55 -17
isa_model/inference/services/embedding/base_embed_service.py +65 -1
isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
isa_model/inference/services/embedding/openai_embed_service.py +8 -10
isa_model/inference/services/helpers/stacked_config.py +148 -0
isa_model/inference/services/img/__init__.py +18 -0
isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
isa_model/inference/services/llm/__init__.py +3 -3
isa_model/inference/services/llm/base_llm_service.py +492 -40
isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
isa_model/inference/services/llm/ollama_llm_service.py +51 -17
isa_model/inference/services/llm/openai_llm_service.py +70 -19
isa_model/inference/services/llm/yyds_llm_service.py +24 -23
isa_model/inference/services/vision/__init__.py +38 -4
isa_model/inference/services/vision/base_vision_service.py +218 -117
isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
isa_model/inference/services/vision/helpers/image_utils.py +272 -3
isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
isa_model/inference/services/vision/openai_vision_service.py +104 -307
isa_model/inference/services/vision/replicate_vision_service.py +140 -325
isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
isa_model/scripts/register_models.py +370 -0
isa_model/scripts/register_models_with_embeddings.py +510 -0
isa_model/serving/api/fastapi_server.py +6 -1
isa_model/serving/api/routes/unified.py +274 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
isa_model/config/__init__.py +0 -9
isa_model/config/config_manager.py +0 -213
isa_model/core/model_manager.py +0 -213
isa_model/core/model_registry.py +0 -375
isa_model/core/vision_models_init.py +0 -116
isa_model/inference/billing_tracker.py +0 -406
isa_model/inference/services/llm/triton_llm_service.py +0 -481
isa_model/inference/services/stacked/__init__.py +0 -26
isa_model/inference/services/stacked/config.py +0 -426
isa_model/inference/services/vision/ollama_vision_service.py +0 -194
/isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
/isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
/isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0

isa_model/inference/services/vision/openai_vision_service.py CHANGED Viewed

@@ -1,32 +1,31 @@
 from typing import Dict, Any, Union, List, Optional, BinaryIO
-import base64
-import aiohttp
 from openai import AsyncOpenAI
 from tenacity import retry, stop_after_attempt, wait_exponential
 from isa_model.inference.services.vision.base_vision_service import BaseVisionService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
+from isa_model.inference.services.vision.helpers.image_utils import prepare_image_base64
+from isa_model.inference.services.vision.helpers.vision_prompts import VisionPromptMixin
+from isa_model.core.types import ServiceType
 import logging
 logger = logging.getLogger(__name__)
-class OpenAIVisionService(BaseVisionService):
-    """OpenAI Vision service using gpt-4.1-nano with vision capabilities"""
+class OpenAIVisionService(BaseVisionService, VisionPromptMixin):
+    """OpenAI Vision service using centralized config management"""
-    def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
-        super().__init__(provider, model_name)
+    def __init__(self, provider_name: str, model_name: str = "gpt-4o-mini", **kwargs):
+        super().__init__(provider_name, model_name, **kwargs)
-        # Get full configuration from provider (including sensitive data)
-        provider_config = provider.get_full_config()
+        # Get configuration from centralized config manager
+        provider_config = self.get_provider_config()
-        # Initialize AsyncOpenAI client with provider configuration
+        # Initialize AsyncOpenAI client with centralized configuration
         try:
             if not provider_config.get("api_key"):
                 raise ValueError("OpenAI API key not found in provider configuration")
             self._client = AsyncOpenAI(
                 api_key=provider_config["api_key"],
-                base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
+                base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
                 organization=provider_config.get("organization")
             )
@@ -44,57 +43,7 @@ class OpenAIVisionService(BaseVisionService):
         """Get the underlying OpenAI client"""
         return self._client
-    async def _download_image(self, image_url: str) -> bytes:
-        """Download image from URL"""
-        async with aiohttp.ClientSession() as session:
-            async with session.get(image_url) as response:
-                if response.status == 200:
-                    return await response.read()
-                else:
-                    raise ValueError(f"Failed to download image from {image_url}: {response.status}")
-    def _encode_image(self, image_path_or_data: Union[str, bytes, BinaryIO]) -> str:
-        """Encode image to base64"""
-        if isinstance(image_path_or_data, str):
-            # If it's a file path
-            with open(image_path_or_data, "rb") as image_file:
-                return base64.b64encode(image_file.read()).decode("utf-8")
-        elif hasattr(image_path_or_data, 'read'):
-            # If it's a file-like object (BinaryIO)
-            data = image_path_or_data.read()  # type: ignore
-            if isinstance(data, bytes):
-                return base64.b64encode(data).decode("utf-8")
-            else:
-                raise ValueError("File-like object did not return bytes")
-        else:
-            # If it's bytes data
-            return base64.b64encode(image_path_or_data).decode("utf-8")  # type: ignore
-    async def invoke(
-        self,
-        image: Union[str, BinaryIO],
-        prompt: Optional[str] = None,
-        task: Optional[str] = None,
-        **kwargs
-    ) -> Dict[str, Any]:
-        """
-        Unified invoke method for all vision operations
-        """
-        task = task or "analyze"
-        if task == "analyze":
-            return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
-        elif task == "describe":
-            return await self.describe_image(image, kwargs.get("detail_level", "medium"))
-        elif task == "extract_text":
-            return await self.extract_text(image)
-        elif task == "detect_objects":
-            return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
-        elif task == "classify":
-            return await self.classify_image(image, kwargs.get("categories"))
-        else:
-            # Default to analyze_image for unknown tasks
-            return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
     @retry(
         stop=stop_after_attempt(3),
@@ -119,22 +68,8 @@ class OpenAIVisionService(BaseVisionService):
             Dict containing analysis results
         """
         try:
-            # Handle different input types
-            if isinstance(image, str):
-                if image.startswith(('http://', 'https://')):
-                    # Download image from URL
-                    image_bytes = await self._download_image(image)
-                    base64_image = self._encode_image(image_bytes)
-                else:
-                    # File path
-                    base64_image = self._encode_image(image)
-            else:
-                # BinaryIO or bytes data
-                if hasattr(image, 'read'):
-                    image_data = image.read()
-                else:
-                    image_data = image
-                base64_image = self._encode_image(image_data)
+            # Use unified image processing from image_utils
+            base64_image = prepare_image_base64(image)
             # Use default prompt if none provided
             if prompt is None:
@@ -166,7 +101,7 @@ class OpenAIVisionService(BaseVisionService):
             # Track usage for billing
             if response.usage:
-                self._track_usage(
+                await self._track_usage(
                     service_type=ServiceType.VISION,
                     operation="image_analysis",
                     input_tokens=response.usage.prompt_tokens,
@@ -176,14 +111,36 @@ class OpenAIVisionService(BaseVisionService):
             content = response.choices[0].message.content or ""
+            # 尝试解析JSON响应（对于结构化任务）
+            try:
+                import json
+                # 检查响应是否是JSON格式
+                if content.strip().startswith('{') and content.strip().endswith('}'):
+                    parsed_json = json.loads(content)
+                    return {
+                        "text": content,
+                        "parsed_data": parsed_json,
+                        "confidence": 1.0,
+                        "metadata": {
+                            "model": self.model_name,
+                            "prompt": prompt[:100],
+                            "tokens_used": response.usage.total_tokens if response.usage else 0,
+                            "response_format": "json"
+                        }
+                    }
+            except json.JSONDecodeError:
+                pass
+            # 标准文本响应
             return {
                 "text": content,
                 "confidence": 1.0,  # OpenAI doesn't provide confidence scores
-                "detected_objects": [],  # Would need separate object detection
+                "detected_objects": [],  # Populated by specific detection methods
                 "metadata": {
                     "model": self.model_name,
-                    "prompt": prompt,
-                    "tokens_used": response.usage.total_tokens if response.usage else 0
+                    "prompt": prompt[:100],
+                    "tokens_used": response.usage.total_tokens if response.usage else 0,
+                    "response_format": "text"
                 }
             }
@@ -191,264 +148,104 @@ class OpenAIVisionService(BaseVisionService):
             logger.error(f"Error in image analysis: {e}")
             raise
-    async def analyze_images(
-        self,
-        images: List[Union[str, BinaryIO]],
-        prompt: Optional[str] = None,
-        max_tokens: int = 1000
-    ) -> List[Dict[str, Any]]:
-        """Analyze multiple images"""
-        results = []
-        for image in images:
-            result = await self.analyze_image(image, prompt, max_tokens)
-            results.append(result)
-        return results
+    # ==================== 基于提示词的智能功能实现 ====================
+    # OpenAI通过改变提示词就能实现大部分Vision功能
+    # 使用统一的VisionPromptMixin提供标准提示词
+    # 重写其他方法以使用智能提示词
     async def describe_image(
         self,
         image: Union[str, BinaryIO],
         detail_level: str = "medium"
     ) -> Dict[str, Any]:
-        """Generate detailed description of image"""
-        detail_prompts = {
-            "low": "Briefly describe what you see in this image.",
-            "medium": "Describe what you see in this image in detail, including objects, colors, and scene.",
-            "high": "Provide a comprehensive and detailed description of this image, including all visible objects, their positions, colors, textures, lighting, composition, and any text or symbols present."
-        }
-        prompt = detail_prompts.get(detail_level, detail_prompts["medium"])
-        result = await self.analyze_image(image, prompt, 1500)
-        return {
-            "description": result["text"],
-            "objects": [],  # Would need object detection API
-            "scene": result["text"],  # Use same description
-            "colors": [],  # Would need color analysis
-            "detail_level": detail_level,
-            "metadata": result["metadata"]
-        }
+        """
+        图像描述 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("describe", detail_level=detail_level)
+        return await self.analyze_image(image, prompt)
     async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
-        """Extract text from image (OCR)"""
-        prompt = "Extract all text visible in this image. Provide only the text content, maintaining the original structure and formatting as much as possible."
-        result = await self.analyze_image(image, prompt, 1000)
+        """
+        文本提取(OCR) - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("extract_text")
-        return {
-            "text": result["text"],
-            "confidence": 1.0,
-            "bounding_boxes": [],  # OpenAI vision doesn't provide bounding boxes
-            "language": "unknown",  # Would need language detection
-            "metadata": result["metadata"]
-        }
+        return await self.analyze_image(image, prompt)
     async def detect_objects(
         self,
         image: Union[str, BinaryIO],
         confidence_threshold: float = 0.5
     ) -> Dict[str, Any]:
-        """Detect objects in image"""
-        prompt = """List all objects visible in this image. For each object, provide:
-1. Object name
-2. Approximate location as percentages from top-left corner (x%, y%)
-3. Approximate size as percentages of image dimensions (width%, height%)
-4. Brief description
-Format each object as: "ObjectName: x=X%, y=Y%, width=W%, height=H% - Description"
-Example: "Car: x=25%, y=40%, width=15%, height=12% - Red sedan in the center"
-"""
-        result = await self.analyze_image(image, prompt, 1500)
-        # Parse the response to extract object information with coordinates
-        objects = []
-        bounding_boxes = []
-        lines = result["text"].split('\n')
-        for line in lines:
-            line = line.strip()
-            if line and ':' in line and ('x=' in line or 'width=' in line):
-                try:
-                    # Extract object name and details
-                    parts = line.split(':', 1)
-                    if len(parts) == 2:
-                        object_name = parts[0].strip()
-                        details = parts[1].strip()
-                        # Extract coordinates using regex-like parsing
-                        coords = {}
-                        for param in ['x', 'y', 'width', 'height']:
-                            param_pattern = f"{param}="
-                            if param_pattern in details:
-                                start_idx = details.find(param_pattern) + len(param_pattern)
-                                end_idx = details.find('%', start_idx)
-                                if end_idx > start_idx:
-                                    try:
-                                        value = float(details[start_idx:end_idx])
-                                        coords[param] = value
-                                    except ValueError:
-                                        continue
-                        # Extract description (after the coordinates)
-                        desc_start = details.find(' - ')
-                        description = details[desc_start + 3:] if desc_start != -1 else details
-                        objects.append({
-                            "label": object_name,
-                            "confidence": 1.0,
-                            "coordinates": coords,
-                            "description": description
-                        })
-                        # Add bounding box if we have coordinates
-                        if all(k in coords for k in ['x', 'y', 'width', 'height']):
-                            bounding_boxes.append({
-                                "label": object_name,
-                                "x_percent": coords['x'],
-                                "y_percent": coords['y'],
-                                "width_percent": coords['width'],
-                                "height_percent": coords['height']
-                            })
-                except Exception:
-                    # Fallback for objects that don't match expected format
-                    objects.append({
-                        "label": line,
-                        "confidence": 1.0,
-                        "coordinates": {},
-                        "description": line
-                    })
+        """
+        物体检测 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
-        return {
-            "objects": objects,
-            "count": len(objects),
-            "bounding_boxes": bounding_boxes,
-            "metadata": result["metadata"]
-        }
+        return await self.analyze_image(image, prompt)
-    async def get_object_coordinates(
+    async def detect_ui_elements(
         self,
         image: Union[str, BinaryIO],
-        object_name: str
+        element_types: Optional[List[str]] = None,
+        confidence_threshold: float = 0.5
     ) -> Dict[str, Any]:
-        """Get coordinates of a specific object in the image"""
-        prompt = f"""Locate the {object_name} in this image and return its center coordinates as [x, y] pixels.
-Respond in this exact format:
-FOUND: YES/NO
-CENTER: [x, y]
-DESCRIPTION: [Brief description]
-If found, provide the pixel coordinates of the center point.
-If not found, explain why.
-Example:
-FOUND: YES
-CENTER: [640, 360]
-DESCRIPTION: Blue login button in the center-left area
-"""
-        result = await self.analyze_image(image, prompt, 300)
-        response_text = result["text"]
-        # Parse the structured response
-        found = False
-        center_coords = None
-        description = ""
+        """
+        UI元素检测 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("detect_ui_elements", element_types=element_types, confidence_threshold=confidence_threshold)
-        lines = response_text.split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.startswith('FOUND:'):
-                found = 'YES' in line.upper()
-            elif line.startswith('CENTER:') and found:
-                # Extract center coordinates [x, y]
-                coords_text = line.replace('CENTER:', '').strip()
-                try:
-                    # Remove brackets and split
-                    coords_text = coords_text.replace('[', '').replace(']', '')
-                    if ',' in coords_text:
-                        x_str, y_str = coords_text.split(',')
-                        x = int(float(x_str.strip()))
-                        y = int(float(y_str.strip()))
-                        center_coords = [x, y]
-                except (ValueError, IndexError):
-                    pass
-            elif line.startswith('DESCRIPTION:'):
-                description = line.replace('DESCRIPTION:', '').strip()
+        return await self.analyze_image(image, prompt)
+    async def detect_document_elements(
+        self,
+        image: Union[str, BinaryIO],
+        element_types: Optional[List[str]] = None,
+        confidence_threshold: float = 0.5
+    ) -> Dict[str, Any]:
+        """
+        文档元素检测 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("detect_document_elements", element_types=element_types, confidence_threshold=confidence_threshold)
-        return {
-            "found": found,
-            "center_coordinates": center_coords,
-            "confidence": 1.0 if found else 0.0,
-            "description": description,
-            "metadata": result["metadata"]
-        }
+        return await self.analyze_image(image, prompt)
     async def classify_image(
         self,
         image: Union[str, BinaryIO],
         categories: Optional[List[str]] = None
     ) -> Dict[str, Any]:
-        """Classify image into categories"""
-        if categories:
-            category_list = ", ".join(categories)
-            prompt = f"Classify this image into one of these categories: {category_list}. Respond with only the most appropriate category name."
-        else:
-            prompt = "What category best describes this image? Provide a single category name."
-        result = await self.analyze_image(image, prompt, 100)
-        category = result["text"].strip()
+        """
+        图像分类 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("classify", categories=categories)
-        return {
-            "category": category,
-            "confidence": 1.0,
-            "all_predictions": [{"category": category, "confidence": 1.0}],
-            "metadata": result["metadata"]
-        }
+        return await self.analyze_image(image, prompt)
-    async def compare_images(
-        self,
-        image1: Union[str, BinaryIO],
-        image2: Union[str, BinaryIO]
+    async def get_object_coordinates(
+        self,
+        image: Union[str, BinaryIO],
+        object_name: str
     ) -> Dict[str, Any]:
-        """Compare two images for similarity"""
-        # For now, analyze both images separately and compare descriptions
-        result1 = await self.analyze_image(image1, "Describe this image in detail.")
-        result2 = await self.analyze_image(image2, "Describe this image in detail.")
-        # Use LLM to compare the descriptions
-        comparison_prompt = f"Compare these two image descriptions and provide a similarity analysis:\n\nImage 1: {result1['text']}\n\nImage 2: {result2['text']}\n\nProvide: 1) A similarity score from 0.0 to 1.0, 2) Key differences, 3) Common elements."
-        comparison_result = await self._client.chat.completions.create(
-            model=self.model_name,
-            messages=[{"role": "user", "content": comparison_prompt}],
-            max_tokens=500,
-            temperature=0.3
-        )
-        comparison_text = comparison_result.choices[0].message.content or ""
+        """
+        获取对象坐标 - 使用专门提示词
+        """
+        prompt = self.get_task_prompt("get_coordinates", object_name=object_name)
-        return {
-            "similarity_score": 0.5,  # Would need better parsing to extract actual score
-            "differences": comparison_text,
-            "common_elements": comparison_text,
-            "metadata": {
-                "model": self.model_name,
-                "comparison_method": "description_based"
-            }
-        }
-    def get_supported_formats(self) -> List[str]:
-        """Get list of supported image formats"""
-        return ['jpg', 'jpeg', 'png', 'gif', 'webp']
+        return await self.analyze_image(image, prompt)
-    def get_max_image_size(self) -> Dict[str, int]:
-        """Get maximum supported image dimensions"""
-        return {
-            "width": 2048,
-            "height": 2048,
-            "file_size_mb": 20
-        }
+    async def extract_table_data(
+        self,
+        image: Union[str, BinaryIO],
+        table_format: str = "json",
+        preserve_formatting: bool = True
+    ) -> Dict[str, Any]:
+        """
+        表格数据结构化抽取 - 使用专门的表格抽取提示词
+        """
+        prompt = self.get_task_prompt("extract_table_data", table_format=table_format, preserve_formatting=preserve_formatting)
+        return await self.analyze_image(image, prompt)
     async def close(self):
         """Clean up resources"""

isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

isa-model 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl