PyPI - isa-model - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

isa-model 0.3.5py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

isa_model/__init__.py +30 -1
isa_model/client.py +770 -0
isa_model/core/config/__init__.py +16 -0
isa_model/core/config/config_manager.py +514 -0
isa_model/core/config.py +426 -0
isa_model/core/models/model_billing_tracker.py +476 -0
isa_model/core/models/model_manager.py +399 -0
isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
isa_model/core/pricing_manager.py +426 -0
isa_model/core/services/__init__.py +19 -0
isa_model/core/services/intelligent_model_selector.py +547 -0
isa_model/core/types.py +291 -0
isa_model/deployment/__init__.py +2 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
isa_model/deployment/cloud/modal/register_models.py +321 -0
isa_model/deployment/runtime/deployed_service.py +338 -0
isa_model/deployment/services/__init__.py +9 -0
isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
isa_model/deployment/services/model_service.py +332 -0
isa_model/deployment/services/service_monitor.py +356 -0
isa_model/deployment/services/service_registry.py +527 -0
isa_model/eval/__init__.py +80 -44
isa_model/eval/config/__init__.py +10 -0
isa_model/eval/config/evaluation_config.py +108 -0
isa_model/eval/evaluators/__init__.py +18 -0
isa_model/eval/evaluators/base_evaluator.py +503 -0
isa_model/eval/evaluators/llm_evaluator.py +472 -0
isa_model/eval/factory.py +417 -709
isa_model/eval/infrastructure/__init__.py +24 -0
isa_model/eval/infrastructure/experiment_tracker.py +466 -0
isa_model/eval/metrics.py +191 -21
isa_model/inference/ai_factory.py +181 -605
isa_model/inference/services/audio/base_stt_service.py +65 -1
isa_model/inference/services/audio/base_tts_service.py +75 -1
isa_model/inference/services/audio/openai_stt_service.py +189 -151
isa_model/inference/services/audio/openai_tts_service.py +12 -10
isa_model/inference/services/audio/replicate_tts_service.py +61 -56
isa_model/inference/services/base_service.py +55 -17
isa_model/inference/services/embedding/base_embed_service.py +65 -1
isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
isa_model/inference/services/embedding/openai_embed_service.py +8 -10
isa_model/inference/services/helpers/stacked_config.py +148 -0
isa_model/inference/services/img/__init__.py +18 -0
isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
isa_model/inference/services/llm/__init__.py +3 -3
isa_model/inference/services/llm/base_llm_service.py +492 -40
isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
isa_model/inference/services/llm/ollama_llm_service.py +51 -17
isa_model/inference/services/llm/openai_llm_service.py +70 -19
isa_model/inference/services/llm/yyds_llm_service.py +24 -23
isa_model/inference/services/vision/__init__.py +38 -4
isa_model/inference/services/vision/base_vision_service.py +218 -117
isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
isa_model/inference/services/vision/helpers/image_utils.py +272 -3
isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
isa_model/inference/services/vision/openai_vision_service.py +104 -307
isa_model/inference/services/vision/replicate_vision_service.py +140 -325
isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
isa_model/scripts/register_models.py +370 -0
isa_model/scripts/register_models_with_embeddings.py +510 -0
isa_model/serving/api/fastapi_server.py +6 -1
isa_model/serving/api/routes/unified.py +202 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
{isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
isa_model/config/__init__.py +0 -9
isa_model/config/config_manager.py +0 -213
isa_model/core/model_manager.py +0 -213
isa_model/core/model_registry.py +0 -375
isa_model/core/vision_models_init.py +0 -116
isa_model/inference/billing_tracker.py +0 -406
isa_model/inference/services/llm/triton_llm_service.py +0 -481
isa_model/inference/services/stacked/__init__.py +0 -26
isa_model/inference/services/stacked/config.py +0 -426
isa_model/inference/services/vision/ollama_vision_service.py +0 -194
/isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
/isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
/isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
{isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0

isa_model/inference/services/vision/replicate_vision_service.py CHANGED Viewed

@@ -5,13 +5,14 @@ import replicate
 import re
 import ast
 from isa_model.inference.services.vision.base_vision_service import BaseVisionService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.billing_tracker import ServiceType
+from isa_model.core.types import ServiceType
+from isa_model.inference.services.vision.helpers.image_utils import prepare_image_data_url
+from isa_model.inference.services.vision.helpers.vision_prompts import VisionPromptMixin
 import logging
 logger = logging.getLogger(__name__)
-class ReplicateVisionService(BaseVisionService):
+class ReplicateVisionService(BaseVisionService, VisionPromptMixin):
     """Enhanced Replicate Vision service supporting multiple specialized models"""
     # Supported model configurations
@@ -19,17 +20,18 @@ class ReplicateVisionService(BaseVisionService):
         "cogvlm": "cjwbw/cogvlm:a5092d718ea77a073e6d8f6969d5c0fb87d0ac7e4cdb7175427331e1798a34ed",
         "florence-2": "microsoft/florence-2-large:fcdb54e52322b9e6dce7a35e5d8ad173dce30b46ef49a236c1a71bc6b78b5bed",
         "omniparser": "microsoft/omniparser-v2:49cf3d41b8d3aca1360514e83be4c97131ce8f0d99abfc365526d8384caa88df",
-        "yolov8": "adirik/yolov8:3b21ba0e5da47bb2c69a96f72894a31b7c1e77b3e8a7b6ba43b7eb93b7b2c4f4"
+        "yolov8": "adirik/yolov8:3b21ba0e5da47bb2c69a96f72894a31b7c1e77b3e8a7b6ba43b7eb93b7b2c4f4",
+        "qwen-vl-chat": "lucataco/qwen-vl-chat:50881b153b4d5f72b3db697e2bbad23bb1277ab741c5b52d80cd6ee17ea660e9"
     }
-    def __init__(self, provider: 'BaseProvider', model_name: str = "cogvlm"):
+    def __init__(self, provider_name: str, model_name: str = "cogvlm", **kwargs):
         # Resolve model name to full model path
         self.model_key = model_name
         resolved_model = self.MODELS.get(model_name, model_name)
-        super().__init__(provider, resolved_model)
+        super().__init__(provider_name, resolved_model, **kwargs)
-        # Get full configuration from provider
-        provider_config = provider.get_full_config()
+        # Get configuration from centralized config manager
+        provider_config = self.get_provider_config()
         # Initialize Replicate client
         try:
@@ -52,72 +54,15 @@ class ReplicateVisionService(BaseVisionService):
     def _prepare_image(self, image: Union[str, BinaryIO]) -> str:
         """Prepare image for Replicate API - convert to URL or base64"""
-        if isinstance(image, str):
-            if image.startswith(('http://', 'https://')):
-                # Already a URL
-                return image
-            else:
-                # Local file path - need to convert to base64 data URL
-                with open(image, "rb") as f:
-                    image_data = f.read()
-                    image_b64 = base64.b64encode(image_data).decode()
-                    # Determine file extension for MIME type
-                    ext = os.path.splitext(image)[1].lower()
-                    mime_type = {
-                        '.jpg': 'image/jpeg',
-                        '.jpeg': 'image/jpeg',
-                        '.png': 'image/png',
-                        '.gif': 'image/gif',
-                        '.webp': 'image/webp'
-                    }.get(ext, 'image/jpeg')
-                    return f"data:{mime_type};base64,{image_b64}"
+        if isinstance(image, str) and image.startswith(('http://', 'https://')):
+            # Already a URL
+            return image
         else:
-            # BinaryIO or bytes data - convert to base64 data URL
-            if hasattr(image, 'read'):
-                image_data = image.read()
-                if isinstance(image_data, bytes):
-                    image_b64 = base64.b64encode(image_data).decode()
-                else:
-                    raise ValueError("File-like object did not return bytes")
-            else:
-                # Assume it's bytes
-                image_b64 = base64.b64encode(image).decode()  # type: ignore
-            return f"data:image/jpeg;base64,{image_b64}"
+            # Use unified image processing from image_utils
+            return prepare_image_data_url(image)
-    async def invoke(
-        self,
-        image: Union[str, BinaryIO],
-        prompt: Optional[str] = None,
-        task: Optional[str] = None,
-        **kwargs
-    ) -> Dict[str, Any]:
-        """
-        Unified invoke method for all vision operations
-        """
-        task = task or "analyze"
-        if task == "analyze":
-            return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
-        elif task == "element_detection":
-            if self.model_key == "omniparser":
-                return await self.run_omniparser(image, **kwargs)
-            elif self.model_key == "florence-2":
-                return await self.run_florence2(image, **kwargs)
-            elif self.model_key == "yolov8":
-                return await self.run_yolo(image, **kwargs)
-            else:
-                return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
-        elif task == "describe":
-            return await self.describe_image(image, kwargs.get("detail_level", "medium"))
-        elif task == "extract_text":
-            return await self.extract_text(image)
-        elif task == "detect_objects":
-            return await self.detect_objects(image, kwargs.get("confidence_threshold", 0.5))
-        elif task == "classify":
-            return await self.classify_image(image, kwargs.get("categories"))
-        else:
-            # Default to analyze_image for unknown tasks
-            return await self.analyze_image(image, prompt, kwargs.get("max_tokens", 1000))
+    # Replicate使用base的invoke方法，不需要重写
+    # 直接实现对应的标准方法即可
     async def analyze_image(
         self,
@@ -129,28 +74,39 @@ class ReplicateVisionService(BaseVisionService):
         Analyze image and provide description or answer questions
         """
         try:
-            # Prepare image for API
+            # Prepare image for API using unified processing
             image_input = self._prepare_image(image)
             # Use default prompt if none provided
             if prompt is None:
                 prompt = "Describe this image in detail."
-            # Run CogVLM model
-            output = replicate.run(
-                self.model_name,
-                input={
-                    "vqa": True,  # Visual Question Answering mode
-                    "image": image_input,
-                    "query": prompt
-                }
-            )
+            # Choose input format based on model type
+            if self.model_key == "qwen-vl-chat":
+                # Qwen-VL-Chat uses simple image + prompt format
+                output = replicate.run(
+                    self.model_name,
+                    input={
+                        "image": image_input,
+                        "prompt": prompt
+                    }
+                )
+            else:
+                # CogVLM and other models use VQA format
+                output = replicate.run(
+                    self.model_name,
+                    input={
+                        "vqa": True,  # Visual Question Answering mode
+                        "image": image_input,
+                        "query": prompt
+                    }
+                )
             # CogVLM returns a string response
             response_text = str(output) if output else ""
             # Track usage for billing
-            self._track_usage(
+            await self._track_usage(
                 service_type=ServiceType.VISION,
                 operation="image_analysis",
                 input_tokens=len(prompt.split()) if prompt else 0,
@@ -173,272 +129,131 @@ class ReplicateVisionService(BaseVisionService):
             logger.error(f"Error in image analysis: {e}")
             raise
-    async def analyze_images(
-        self,
-        images: List[Union[str, BinaryIO]],
-        prompt: Optional[str] = None,
-        max_tokens: int = 1000
-    ) -> List[Dict[str, Any]]:
-        """Analyze multiple images"""
-        results = []
-        for image in images:
-            result = await self.analyze_image(image, prompt, max_tokens)
-            results.append(result)
-        return results
+    # ==================== 标准接口实现：检测抽取类 ====================
-    async def describe_image(
-        self,
+    async def detect_ui_elements(
+        self,
         image: Union[str, BinaryIO],
-        detail_level: str = "medium"
+        element_types: Optional[List[str]] = None,
+        confidence_threshold: float = 0.5
     ) -> Dict[str, Any]:
-        """Generate detailed description of image"""
-        detail_prompts = {
-            "low": "Briefly describe what you see in this image.",
-            "medium": "Describe what you see in this image in detail, including objects, colors, and scene.",
-            "high": "Provide a comprehensive and detailed description of this image, including all visible objects, their positions, colors, textures, lighting, composition, and any text or symbols present."
-        }
-        prompt = detail_prompts.get(detail_level, detail_prompts["medium"])
-        result = await self.analyze_image(image, prompt, 1500)
-        return {
-            "description": result["text"],
-            "objects": [],  # Would need object detection API
-            "scene": result["text"],  # Use same description
-            "colors": [],  # Would need color analysis
-            "detail_level": detail_level,
-            "metadata": result["metadata"]
-        }
+        """
+        UI界面元素检测 - 使用专门模型实现
+        """
+        if self.model_key == "omniparser":
+            return await self.run_omniparser(image, box_threshold=confidence_threshold)
+        elif self.model_key == "florence-2":
+            return await self.run_florence2(image, task="<OPEN_VOCABULARY_DETECTION>")
+        else:
+            # 使用通用物体检测作为fallback
+            return await self.detect_objects(image, confidence_threshold)
-    async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
-        """Extract text from image (OCR)"""
-        prompt = "Extract all text visible in this image. Provide only the text content, maintaining the original structure and formatting as much as possible."
-        result = await self.analyze_image(image, prompt, 1000)
-        return {
-            "text": result["text"],
-            "confidence": 1.0,
-            "bounding_boxes": [],  # CogVLM doesn't provide bounding boxes
-            "language": "unknown",  # Would need language detection
-            "metadata": result["metadata"]
-        }
+    async def detect_document_elements(
+        self,
+        image: Union[str, BinaryIO],
+        element_types: Optional[List[str]] = None,
+        confidence_threshold: float = 0.5
+    ) -> Dict[str, Any]:
+        """
+        文档结构元素检测 - 使用专门模型实现
+        """
+        if self.model_key == "florence-2":
+            # Florence-2可以检测文档结构
+            return await self.run_florence2(image, task="<DETAILED_CAPTION>")
+        else:
+            raise NotImplementedError(f"Document detection not supported for model {self.model_key}")
     async def detect_objects(
         self,
         image: Union[str, BinaryIO],
         confidence_threshold: float = 0.5
     ) -> Dict[str, Any]:
-        """Detect objects in image"""
-        prompt = """Analyze this image and identify all distinct objects, UI elements, or regions. For each element you identify, provide its location and size as percentages.
-Look carefully at the image and identify distinct visual elements like:
-- Text regions, buttons, input fields, images
-- Distinct objects, shapes, or regions
-- Interactive elements like buttons or form controls
-For each element, respond in this EXACT format:
-ElementName: x=X%, y=Y%, width=W%, height=H% - Description
-Where:
-- x% = horizontal position from left edge (0-100%)
-- y% = vertical position from top edge (0-100%)
-- width% = element width as percentage of image width (0-100%)
-- height% = element height as percentage of image height (0-100%)
-Be precise about the actual visual boundaries of each element.
-Example: "Submit Button: x=25%, y=60%, width=15%, height=5% - Blue rectangular button with white text"
-"""
-        result = await self.analyze_image(image, prompt, 1500)
-        # Parse the response to extract object information with coordinates
-        objects = []
-        bounding_boxes = []
-        lines = result["text"].split('\n')
-        for line in lines:
-            line = line.strip()
-            if line and ':' in line and ('x=' in line or 'width=' in line):
-                try:
-                    # Extract object name and details
-                    parts = line.split(':', 1)
-                    if len(parts) == 2:
-                        object_name = parts[0].strip()
-                        details = parts[1].strip()
-                        # Extract coordinates using regex-like parsing
-                        coords = {}
-                        for param in ['x', 'y', 'width', 'height']:
-                            param_pattern = f"{param}="
-                            if param_pattern in details:
-                                start_idx = details.find(param_pattern) + len(param_pattern)
-                                end_idx = details.find('%', start_idx)
-                                if end_idx > start_idx:
-                                    try:
-                                        value = float(details[start_idx:end_idx])
-                                        coords[param] = value
-                                    except ValueError:
-                                        continue
-                        # Extract description (after the coordinates)
-                        desc_start = details.find(' - ')
-                        description = details[desc_start + 3:] if desc_start != -1 else details
-                        objects.append({
-                            "label": object_name,
-                            "confidence": 1.0,
-                            "coordinates": coords,
-                            "description": description
-                        })
-                        # Add bounding box if we have coordinates
-                        if all(k in coords for k in ['x', 'y', 'width', 'height']):
-                            bounding_boxes.append({
-                                "label": object_name,
-                                "x_percent": coords['x'],
-                                "y_percent": coords['y'],
-                                "width_percent": coords['width'],
-                                "height_percent": coords['height']
-                            })
-                except Exception:
-                    # Fallback for objects that don't match expected format
-                    objects.append({
-                        "label": line,
-                        "confidence": 1.0,
-                        "coordinates": {},
-                        "description": line
-                    })
-        return {
-            "objects": objects,
-            "count": len(objects),
-            "bounding_boxes": bounding_boxes,
-            "metadata": result["metadata"]
-        }
+        """
+        通用物体检测 - 实现标准接口
+        """
+        if self.model_key == "yolov8":
+            return await self.run_yolo(image, confidence=confidence_threshold)
+        elif self.model_key == "florence-2":
+            return await self.run_florence2(image, task="<OD>")
+        elif self.model_key == "qwen-vl-chat":
+            # Qwen-VL-Chat can do object detection through prompting
+            prompt = self.get_task_prompt("detect_objects", confidence_threshold=confidence_threshold)
+            return await self.analyze_image(image, prompt)
+        else:
+            raise NotImplementedError(f"Object detection not supported for model {self.model_key}")
-    async def get_object_coordinates(
-        self,
+    # ==================== QWEN-VL-CHAT 智能提示词实现 ====================
+    # 类似 OpenAI，qwen-vl-chat 通过提示词实现所有 Vision 功能
+    async def describe_image(
+        self,
         image: Union[str, BinaryIO],
-        object_name: str
+        detail_level: str = "medium"
     ) -> Dict[str, Any]:
-        """Get coordinates of a specific object in the image"""
-        prompt = f"""Locate the {object_name} in this image and return its center coordinates as [x, y] pixels.
-Look carefully at the image to find the exact element described. Be very precise about the location.
-Respond in this exact format:
-FOUND: YES/NO
-CENTER: [x, y]
-DESCRIPTION: [Brief description]
-If found, provide the pixel coordinates of the center point.
-If not found, explain why.
-Example:
-FOUND: YES
-CENTER: [640, 360]
-DESCRIPTION: Blue login button in the center-left area
-"""
-        result = await self.analyze_image(image, prompt, 300)
-        response_text = result["text"]
-        # Parse the structured response
-        found = False
-        center_coords = None
-        description = ""
-        lines = response_text.split('\n')
-        for line in lines:
-            line = line.strip()
-            if line.startswith('FOUND:'):
-                found = 'YES' in line.upper()
-            elif line.startswith('CENTER:') and found:
-                # Extract center coordinates [x, y]
-                coords_text = line.replace('CENTER:', '').strip()
-                try:
-                    # Remove brackets and split
-                    coords_text = coords_text.replace('[', '').replace(']', '')
-                    if ',' in coords_text:
-                        x_str, y_str = coords_text.split(',')
-                        x = int(float(x_str.strip()))
-                        y = int(float(y_str.strip()))
-                        center_coords = [x, y]
-                except (ValueError, IndexError):
-                    pass
-            elif line.startswith('DESCRIPTION:'):
-                description = line.replace('DESCRIPTION:', '').strip()
-        return {
-            "found": found,
-            "center_coordinates": center_coords,
-            "confidence": 1.0 if found else 0.0,
-            "description": description,
-            "metadata": result["metadata"]
-        }
+        """
+        图像描述 - qwen-vl-chat通过提示词实现
+        """
+        if self.model_key == "qwen-vl-chat":
+            prompt = self.get_task_prompt("describe", detail_level=detail_level)
+            return await self.analyze_image(image, prompt)
+        else:
+            raise NotImplementedError(f"describe_image not supported for model {self.model_key}")
+    async def extract_text(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
+        """
+        文本提取(OCR) - qwen-vl-chat通过提示词实现
+        """
+        if self.model_key == "qwen-vl-chat":
+            prompt = self.get_task_prompt("extract_text")
+            return await self.analyze_image(image, prompt)
+        else:
+            raise NotImplementedError(f"extract_text not supported for model {self.model_key}")
     async def classify_image(
         self,
         image: Union[str, BinaryIO],
         categories: Optional[List[str]] = None
     ) -> Dict[str, Any]:
-        """Classify image into categories"""
-        if categories:
-            category_list = ", ".join(categories)
-            prompt = f"Classify this image into one of these categories: {category_list}. Respond with only the most appropriate category name."
+        """
+        图像分类 - qwen-vl-chat通过提示词实现
+        """
+        if self.model_key == "qwen-vl-chat":
+            prompt = self.get_task_prompt("classify", categories=categories)
+            return await self.analyze_image(image, prompt)
         else:
-            prompt = "What category best describes this image? Provide a single category name."
-        result = await self.analyze_image(image, prompt, 100)
-        category = result["text"].strip()
-        return {
-            "category": category,
-            "confidence": 1.0,
-            "all_predictions": [{"category": category, "confidence": 1.0}],
-            "metadata": result["metadata"]
-        }
+            raise NotImplementedError(f"classify_image not supported for model {self.model_key}")
-    async def compare_images(
-        self,
-        image1: Union[str, BinaryIO],
-        image2: Union[str, BinaryIO]
+    async def extract_table_data(
+        self,
+        image: Union[str, BinaryIO],
+        table_format: str = "json",
+        preserve_formatting: bool = True
     ) -> Dict[str, Any]:
-        """Compare two images for similarity"""
-        # For now, analyze both images separately and compare descriptions
-        result1 = await self.analyze_image(image1, "Describe this image in detail.")
-        result2 = await self.analyze_image(image2, "Describe this image in detail.")
-        # Use another CogVLM call to compare the descriptions
-        comparison_prompt = f"Compare these two image descriptions and provide a similarity analysis:\n\nImage 1: {result1['text']}\n\nImage 2: {result2['text']}\n\nProvide: 1) A similarity score from 0.0 to 1.0, 2) Key differences, 3) Common elements."
-        # Create a simple text prompt for comparison
-        comparison_result = await self.analyze_image(image1, comparison_prompt)
-        comparison_text = comparison_result["text"]
-        return {
-            "similarity_score": 0.5,  # Would need better parsing to extract actual score
-            "differences": comparison_text,
-            "common_elements": comparison_text,
-            "metadata": {
-                "model": self.model_name,
-                "comparison_method": "description_based"
-            }
-        }
+        """
+        表格数据抽取 - qwen-vl-chat通过提示词实现
+        """
+        if self.model_key == "qwen-vl-chat":
+            prompt = self.get_task_prompt("extract_table_data", table_format=table_format, preserve_formatting=preserve_formatting)
+            return await self.analyze_image(image, prompt)
+        else:
+            raise NotImplementedError(f"extract_table_data not supported for model {self.model_key}")
-    def get_supported_formats(self) -> List[str]:
-        """Get list of supported image formats"""
-        return ['jpg', 'jpeg', 'png', 'gif', 'webp']
+    async def get_object_coordinates(
+        self,
+        image: Union[str, BinaryIO],
+        object_name: str
+    ) -> Dict[str, Any]:
+        """
+        获取对象坐标 - qwen-vl-chat通过提示词实现
+        """
+        if self.model_key == "qwen-vl-chat":
+            prompt = self.get_task_prompt("get_coordinates", object_name=object_name)
+            return await self.analyze_image(image, prompt)
+        else:
+            raise NotImplementedError(f"get_object_coordinates not supported for model {self.model_key}")
-    def get_max_image_size(self) -> Dict[str, int]:
-        """Get maximum supported image dimensions"""
-        return {
-            "width": 2048,
-            "height": 2048,
-            "file_size_mb": 10
-        }
+    # ==================== REPLICATE专门模型方法 ====================
+    # 以下方法是Replicate特有的专门模型实现，不在标准接口中
     # ==================== MODEL-SPECIFIC METHODS ====================

isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

isa-model 0.3.5py3-none-any.whl → 0.3.6py3-none-any.whl