PyPI - isa-model - Versions diffs - 0.1.0__py3-none-any.whl - Mend

isa-model 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

isa_model/__init__.py +5 -0
isa_model/core/model_manager.py +143 -0
isa_model/core/model_registry.py +115 -0
isa_model/core/model_router.py +226 -0
isa_model/core/model_storage.py +133 -0
isa_model/core/model_version.py +0 -0
isa_model/core/resource_manager.py +202 -0
isa_model/core/storage/hf_storage.py +0 -0
isa_model/core/storage/local_storage.py +0 -0
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/mlflow_gateway/__init__.py +8 -0
isa_model/deployment/mlflow_gateway/start_gateway.py +65 -0
isa_model/deployment/unified_multimodal_client.py +341 -0
isa_model/inference/__init__.py +11 -0
isa_model/inference/adapter/triton_adapter.py +453 -0
isa_model/inference/adapter/unified_api.py +248 -0
isa_model/inference/ai_factory.py +354 -0
isa_model/inference/backends/Pytorch/bge_embed_backend.py +188 -0
isa_model/inference/backends/Pytorch/gemma_backend.py +167 -0
isa_model/inference/backends/Pytorch/llama_backend.py +166 -0
isa_model/inference/backends/Pytorch/whisper_backend.py +194 -0
isa_model/inference/backends/__init__.py +53 -0
isa_model/inference/backends/base_backend_client.py +26 -0
isa_model/inference/backends/container_services.py +104 -0
isa_model/inference/backends/local_services.py +72 -0
isa_model/inference/backends/openai_client.py +130 -0
isa_model/inference/backends/replicate_client.py +197 -0
isa_model/inference/backends/third_party_services.py +239 -0
isa_model/inference/backends/triton_client.py +97 -0
isa_model/inference/base.py +46 -0
isa_model/inference/client_sdk/__init__.py +0 -0
isa_model/inference/client_sdk/client.py +134 -0
isa_model/inference/client_sdk/client_data_std.py +34 -0
isa_model/inference/client_sdk/client_sdk_schema.py +16 -0
isa_model/inference/client_sdk/exceptions.py +0 -0
isa_model/inference/engine/triton/model_repository/bge/1/model.py +174 -0
isa_model/inference/engine/triton/model_repository/gemma/1/model.py +250 -0
isa_model/inference/engine/triton/model_repository/llama/1/model.py +76 -0
isa_model/inference/engine/triton/model_repository/whisper/1/model.py +195 -0
isa_model/inference/providers/__init__.py +19 -0
isa_model/inference/providers/base_provider.py +30 -0
isa_model/inference/providers/model_cache_manager.py +341 -0
isa_model/inference/providers/ollama_provider.py +73 -0
isa_model/inference/providers/openai_provider.py +87 -0
isa_model/inference/providers/replicate_provider.py +94 -0
isa_model/inference/providers/triton_provider.py +439 -0
isa_model/inference/providers/vllm_provider.py +0 -0
isa_model/inference/providers/yyds_provider.py +83 -0
isa_model/inference/services/__init__.py +14 -0
isa_model/inference/services/audio/fish_speech/handler.py +215 -0
isa_model/inference/services/audio/runpod_tts_fish_service.py +212 -0
isa_model/inference/services/audio/triton_speech_service.py +138 -0
isa_model/inference/services/audio/whisper_service.py +186 -0
isa_model/inference/services/audio/yyds_audio_service.py +71 -0
isa_model/inference/services/base_service.py +106 -0
isa_model/inference/services/base_tts_service.py +66 -0
isa_model/inference/services/embedding/bge_service.py +183 -0
isa_model/inference/services/embedding/ollama_embed_service.py +85 -0
isa_model/inference/services/embedding/ollama_rerank_service.py +118 -0
isa_model/inference/services/embedding/onnx_rerank_service.py +73 -0
isa_model/inference/services/llm/__init__.py +16 -0
isa_model/inference/services/llm/gemma_service.py +143 -0
isa_model/inference/services/llm/llama_service.py +143 -0
isa_model/inference/services/llm/ollama_llm_service.py +108 -0
isa_model/inference/services/llm/openai_llm_service.py +129 -0
isa_model/inference/services/llm/replicate_llm_service.py +179 -0
isa_model/inference/services/llm/triton_llm_service.py +230 -0
isa_model/inference/services/others/table_transformer_service.py +61 -0
isa_model/inference/services/vision/__init__.py +12 -0
isa_model/inference/services/vision/helpers/image_utils.py +58 -0
isa_model/inference/services/vision/helpers/text_splitter.py +46 -0
isa_model/inference/services/vision/ollama_vision_service.py +60 -0
isa_model/inference/services/vision/replicate_vision_service.py +241 -0
isa_model/inference/services/vision/triton_vision_service.py +199 -0
isa_model/inference/services/vision/yyds_vision_service.py +80 -0
isa_model/inference/utils/conversion/bge_rerank_convert.py +73 -0
isa_model/inference/utils/conversion/onnx_converter.py +0 -0
isa_model/inference/utils/conversion/torch_converter.py +0 -0
isa_model/scripts/inference_tracker.py +283 -0
isa_model/scripts/mlflow_manager.py +379 -0
isa_model/scripts/model_registry.py +465 -0
isa_model/scripts/start_mlflow.py +95 -0
isa_model/scripts/training_tracker.py +257 -0
isa_model/training/engine/llama_factory/__init__.py +39 -0
isa_model/training/engine/llama_factory/config.py +115 -0
isa_model/training/engine/llama_factory/data_adapter.py +284 -0
isa_model/training/engine/llama_factory/examples/__init__.py +6 -0
isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +185 -0
isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +163 -0
isa_model/training/engine/llama_factory/factory.py +331 -0
isa_model/training/engine/llama_factory/rl.py +254 -0
isa_model/training/engine/llama_factory/trainer.py +171 -0
isa_model/training/image_model/configs/create_config.py +37 -0
isa_model/training/image_model/configs/create_flux_config.py +26 -0
isa_model/training/image_model/configs/create_lora_config.py +21 -0
isa_model/training/image_model/prepare_massed_compute.py +97 -0
isa_model/training/image_model/prepare_upload.py +17 -0
isa_model/training/image_model/raw_data/create_captions.py +16 -0
isa_model/training/image_model/raw_data/create_lora_captions.py +20 -0
isa_model/training/image_model/raw_data/pre_processing.py +200 -0
isa_model/training/image_model/train/train.py +42 -0
isa_model/training/image_model/train/train_flux.py +41 -0
isa_model/training/image_model/train/train_lora.py +57 -0
isa_model/training/image_model/train_main.py +25 -0
isa_model/training/llm_model/annotation/annotation_schema.py +47 -0
isa_model/training/llm_model/annotation/processors/annotation_processor.py +126 -0
isa_model/training/llm_model/annotation/storage/dataset_manager.py +131 -0
isa_model/training/llm_model/annotation/storage/dataset_schema.py +44 -0
isa_model/training/llm_model/annotation/tests/test_annotation_flow.py +109 -0
isa_model/training/llm_model/annotation/tests/test_minio copy.py +113 -0
isa_model/training/llm_model/annotation/tests/test_minio_upload.py +43 -0
isa_model/training/llm_model/annotation/views/annotation_controller.py +158 -0
isa_model-0.1.0.dist-info/METADATA +116 -0
isa_model-0.1.0.dist-info/RECORD +117 -0
isa_model-0.1.0.dist-info/WHEEL +5 -0
isa_model-0.1.0.dist-info/licenses/LICENSE +21 -0
isa_model-0.1.0.dist-info/top_level.txt +1 -0

isa_model/inference/services/vision/helpers/text_splitter.py ADDED Viewed

@@ -0,0 +1,46 @@
+from typing import Dict, List, Optional
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from uuid import uuid4
+class TextChunkHelper:
+    """Text splitting and chunking helper"""
+    def __init__(self,
+                 chunk_size: int = 512,
+                 chunk_overlap: int = 50,
+                 min_chunk_size: int = 50):
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            length_function=len,
+            separators=["\n\n", "\n", ". ", ", ", " "]
+        )
+        self.min_chunk_size = min_chunk_size
+    def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
+        """Create text chunks with metadata"""
+        if not text or not isinstance(text, str):
+            raise ValueError("Text must be a non-empty string")
+        chunks = self.text_splitter.split_text(text)
+        valid_chunks = [
+            chunk for chunk in chunks
+            if len(chunk) >= self.min_chunk_size
+        ]
+        results = []
+        for i, chunk in enumerate(valid_chunks):
+            chunk_data = {
+                "chunk_id": f"chunk_{uuid4().hex[:8]}",
+                "content": chunk,
+                "token_count": len(chunk),
+                "metadata": {
+                    **(metadata or {}),
+                    "position": i,
+                    "start_idx": text.find(chunk),
+                    "end_idx": text.find(chunk) + len(chunk)
+                }
+            }
+            results.append(chunk_data)
+        return results

isa_model/inference/services/vision/ollama_vision_service.py ADDED Viewed

@@ -0,0 +1,60 @@
+import os
+import json
+import base64
+import ollama
+from typing import Dict, Any, Union
+from tenacity import retry, stop_after_attempt, wait_exponential
+from ...base_service import BaseService
+from ...base_provider import BaseProvider
+from app.config.config_manager import config_manager
+logger = config_manager.get_logger(__name__)
+class OllamaVisionService(BaseService):
+    """Vision model service wrapper for Ollama using base64 encoded images"""
+    def __init__(self, provider: 'BaseProvider', model_name: str = 'gemma3:4b'):
+        super().__init__(provider, model_name)
+        self.max_tokens = self.config.get('max_tokens', 1000)
+        self.temperature = self.config.get('temperature', 0.7)
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        reraise=True
+    )
+    async def analyze_image(self, image_data: Union[bytes, str], query: str) -> str:
+        """分析图片并返回结果
+        Args:
+            image_data: 图片数据，可以是 bytes 或图片路径字符串
+            query: 查询文本
+        Returns:
+            str: 分析结果
+        """
+        try:
+            # 如果是文件路径，读取文件内容
+            if isinstance(image_data, str):
+                with open(image_data, 'rb') as f:
+                    image_data = f.read()
+            # 转换为base64
+            image_base64 = base64.b64encode(image_data).decode('utf-8')
+            # 使用 ollama 库直接调用
+            response = ollama.chat(
+                model=self.model_name,
+                messages=[{
+                    'role': 'user',
+                    'content': query,
+                    'images': [image_base64]
+                }]
+            )
+            return response['message']['content']
+        except Exception as e:
+            logger.error(f"Error in image analysis: {e}")
+            raise

isa_model/inference/services/vision/replicate_vision_service.py ADDED Viewed

@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Replicate Vision服务
+用于与Replicate API交互，支持图像生成和图像分析
+"""
+import os
+import time
+import uuid
+import logging
+from typing import Dict, Any, List, Optional, Union
+import asyncio
+import aiohttp
+import replicate
+import requests
+from PIL import Image
+from io import BytesIO
+from isa_model.inference.services.base_service import BaseService
+from isa_model.inference.providers.base_provider import BaseProvider
+from isa_model.inference.base import ModelType
+# 设置日志记录
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ReplicateVisionService(BaseService):
+    """
+    Replicate Vision服务，用于处理图像生成和分析
+    """
+    def __init__(self, provider: BaseProvider, model_name: str):
+        """
+        初始化Replicate Vision服务
+        Args:
+            provider: Replicate提供商实例
+            model_name: Replicate模型ID (格式: 'username/model_name:version')
+        """
+        super().__init__(provider, model_name)
+        self.api_token = provider.config.get("api_token", os.environ.get("REPLICATE_API_TOKEN"))
+        self.client = replicate.Client(api_token=self.api_token)
+        self.model_type = ModelType.VISION
+        # 可选的默认配置
+        self.guidance_scale = provider.config.get("guidance_scale", 7.5)
+        self.num_inference_steps = provider.config.get("num_inference_steps", 30)
+        # 生成的图像存储目录
+        self.output_dir = "generated_images"
+        os.makedirs(self.output_dir, exist_ok=True)
+    async def load(self) -> None:
+        """
+        加载模型（对于Replicate，这只是验证API令牌）
+        """
+        if not self.api_token:
+            raise ValueError("缺少Replicate API令牌，请设置REPLICATE_API_TOKEN环境变量")
+        # 验证令牌有效性
+        try:
+            self.client.api_token = self.api_token
+            logger.info(f"Replicate Vision服务初始化成功，使用模型: {self.model_name}")
+        except Exception as e:
+            logger.error(f"Replicate初始化失败: {e}")
+            raise
+    async def generate_image(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        使用Replicate模型生成图像
+        Args:
+            input_data: 包含生成参数的字典
+        Returns:
+            包含生成图像URL的结果字典
+        """
+        try:
+            # 设置默认参数
+            if "guidance_scale" not in input_data and self.guidance_scale:
+                input_data["guidance_scale"] = self.guidance_scale
+            if "num_inference_steps" not in input_data and self.num_inference_steps:
+                input_data["num_inference_steps"] = self.num_inference_steps
+            # 运行模型（同步API调用）
+            logger.info(f"开始使用模型 {self.model_name} 生成图像")
+            # 转换成异步操作
+            loop = asyncio.get_event_loop()
+            output = await loop.run_in_executor(
+                None,
+                lambda: replicate.run(self.model_name, input=input_data)
+            )
+            # 将结果转换为标准格式
+            # 处理Replicate对象输出
+            if hasattr(output, 'url'):
+                urls = [output.url]
+            elif isinstance(output, list) and all(hasattr(item, 'url') for item in output if item is not None):
+                urls = [item.url for item in output if item is not None]
+            else:
+                # 兼容直接返回URL字符串的情况
+                urls = output if isinstance(output, list) else [output]
+            result = {
+                "urls": urls,
+                "metadata": {
+                    "model": self.model_name,
+                    "input": input_data
+                }
+            }
+            logger.info(f"图像生成完成: {result['urls']}")
+            return result
+        except Exception as e:
+            logger.error(f"图像生成失败: {e}")
+            raise
+    async def analyze_image(self, image_path: str, prompt: str) -> Dict[str, Any]:
+        """
+        分析图像（用于支持视觉分析模型）
+        Args:
+            image_path: 图像路径或URL
+            prompt: 分析提示
+        Returns:
+            分析结果字典
+        """
+        try:
+            # 构建输入数据
+            input_data = {
+                "image": self._get_image_url(image_path),
+                "prompt": prompt
+            }
+            # 运行模型
+            logger.info(f"开始使用模型 {self.model_name} 分析图像")
+            # 转换成异步操作
+            loop = asyncio.get_event_loop()
+            output = await loop.run_in_executor(
+                None,
+                lambda: replicate.run(self.model_name, input=input_data)
+            )
+            result = {
+                "text": output,
+                "metadata": {
+                    "model": self.model_name,
+                    "input": input_data
+                }
+            }
+            logger.info(f"图像分析完成")
+            return result
+        except Exception as e:
+            logger.error(f"图像分析失败: {e}")
+            raise
+    async def generate_and_save(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        生成图像并保存到本地
+        Args:
+            input_data: 包含生成参数的字典
+        Returns:
+            包含生成图像URL和保存路径的结果字典
+        """
+        # 首先生成图像
+        result = await self.generate_image(input_data)
+        # 然后下载并保存
+        saved_paths = []
+        for i, url in enumerate(result["urls"]):
+            # 生成唯一文件名
+            timestamp = int(time.time())
+            file_name = f"{self.output_dir}/{timestamp}_{uuid.uuid4().hex[:8]}_{i+1}.png"
+            # 异步下载图像
+            try:
+                await self._download_image(url, file_name)
+                saved_paths.append(file_name)
+                logger.info(f"图像已保存至: {file_name}")
+            except Exception as e:
+                logger.error(f"保存图像失败: {e}")
+        # 添加保存路径到结果
+        result["saved_paths"] = saved_paths
+        return result
+    async def _download_image(self, url: str, save_path: str) -> None:
+        """
+        异步下载图像并保存
+        Args:
+            url: 图像URL
+            save_path: 保存路径
+        """
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url) as response:
+                    if response.status == 200:
+                        content = await response.read()
+                        img = Image.open(BytesIO(content))
+                        img.save(save_path)
+                    else:
+                        logger.error(f"下载图像失败: HTTP {response.status}")
+                        raise Exception(f"下载图像失败: HTTP {response.status}")
+        except Exception as e:
+            logger.error(f"下载图像时出错: {e}")
+            raise
+    def _get_image_url(self, image_path: str) -> str:
+        """
+        获取图像URL（如果提供的是本地路径，则上传到临时存储）
+        Args:
+            image_path: 图像路径或URL
+        Returns:
+            图像URL
+        """
+        # 如果已经是URL，直接返回
+        if image_path.startswith(("http://", "https://")):
+            return image_path
+        # 否则，这是一个需要上传的本地文件
+        # 注意：这里可以实现上传逻辑，但为简单起见，我们仅支持URL
+        raise NotImplementedError("当前仅支持图像URL，不支持上传本地文件")
+    async def unload(self) -> None:
+        """卸载模型（对于Replicate API，这是一个无操作）"""
+        logger.info(f"卸载Replicate Vision服务: {self.model_name}")
+        # 没有需要清理的资源

isa_model/inference/services/vision/triton_vision_service.py ADDED Viewed

@@ -0,0 +1,199 @@
+import json
+import logging
+import asyncio
+import base64
+import io
+from PIL import Image
+import numpy as np
+from typing import Dict, List, Any, AsyncGenerator, Optional, Union
+from isa_model.inference.services.base_service import BaseService
+from isa_model.inference.providers.triton_provider import TritonProvider
+logger = logging.getLogger(__name__)
+class TritonVisionService(BaseService):
+    """
+    Vision service that uses Triton Inference Server to run inference.
+    """
+    def __init__(self, provider: TritonProvider, model_name: str):
+        """
+        Initialize the Triton Vision service.
+        Args:
+            provider: The Triton provider
+            model_name: Name of the model in Triton (e.g., "Gemma3-4B")
+        """
+        super().__init__(provider, model_name)
+        self.client = None
+        self.token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+        self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+    async def _initialize_client(self):
+        """Initialize the Triton client"""
+        if self.client is None:
+            self.client = self.provider.create_client()
+            # Check if model is ready
+            if not self.provider.is_model_ready(self.model_name):
+                logger.error(f"Model {self.model_name} is not ready on Triton server")
+                raise RuntimeError(f"Model {self.model_name} is not ready on Triton server")
+            logger.info(f"Initialized Triton client for vision model: {self.model_name}")
+    async def process_image(self,
+                          image: Union[str, Image.Image, bytes],
+                          prompt: Optional[str] = None,
+                          params: Optional[Dict[str, Any]] = None) -> str:
+        """
+        Process an image and generate a description.
+        Args:
+            image: Input image (PIL Image, base64 string, or bytes)
+            prompt: Optional text prompt to guide the model
+            params: Generation parameters
+        Returns:
+            Generated text description
+        """
+        await self._initialize_client()
+        try:
+            import tritonclient.http as httpclient
+            # Process the image to get numpy array
+            image_array = self._prepare_image_input(image)
+            # Create input tensors for the image
+            image_input = httpclient.InferInput("IMAGE", image_array.shape, "UINT8")
+            image_input.set_data_from_numpy(image_array)
+            inputs = [image_input]
+            # Add text prompt if provided
+            if prompt:
+                text_data = np.array([prompt], dtype=np.object_)
+                text_input = httpclient.InferInput("TEXT", text_data.shape, "BYTES")
+                text_input.set_data_from_numpy(text_data)
+                inputs.append(text_input)
+            # Add parameters if provided
+            if params:
+                default_params = {
+                    "max_new_tokens": 512,
+                    "temperature": 0.7,
+                    "top_p": 0.9,
+                    "do_sample": True
+                }
+                generation_params = {**default_params, **params}
+                param_json = json.dumps(generation_params)
+                param_data = np.array([param_json], dtype=np.object_)
+                param_input = httpclient.InferInput("PARAMETERS", param_data.shape, "BYTES")
+                param_input.set_data_from_numpy(param_data)
+                inputs.append(param_input)
+            # Create output tensor
+            outputs = [httpclient.InferRequestedOutput("TEXT")]
+            # Send the request
+            response = await asyncio.to_thread(
+                self.client.infer,
+                self.model_name,
+                inputs,
+                outputs=outputs
+            )
+            # Process the response
+            output = response.as_numpy("TEXT")
+            response_text = output[0].decode('utf-8')
+            # Update token usage (estimated since we don't have actual token counts)
+            prompt_tokens = len(prompt) // 4 if prompt else 100  # Rough estimate
+            completion_tokens = len(response_text) // 4  # Rough estimate
+            total_tokens = prompt_tokens + completion_tokens
+            self.last_token_usage = {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens
+            }
+            # Update total token usage
+            self.token_usage["prompt_tokens"] += prompt_tokens
+            self.token_usage["completion_tokens"] += completion_tokens
+            self.token_usage["total_tokens"] += total_tokens
+            return response_text
+        except Exception as e:
+            logger.error(f"Error during Triton vision inference: {str(e)}")
+            raise
+    def get_token_usage(self) -> Dict[str, int]:
+        """
+        Get total token usage statistics.
+        Returns:
+            Dictionary with token usage statistics
+        """
+        return self.token_usage
+    def get_last_token_usage(self) -> Dict[str, int]:
+        """
+        Get token usage from last request.
+        Returns:
+            Dictionary with token usage statistics from last request
+        """
+        return self.last_token_usage
+    def _prepare_image_input(self, image: Union[str, Image.Image, bytes]) -> np.ndarray:
+        """
+        Process different types of image inputs into a numpy array.
+        Args:
+            image: Image input (PIL Image, base64 string, or bytes)
+        Returns:
+            Numpy array of the image
+        """
+        # Convert to PIL image first
+        pil_image = self._to_pil_image(image)
+        # Convert PIL image to numpy array
+        return np.array(pil_image)
+    def _to_pil_image(self, image: Union[str, Image.Image, bytes]) -> Image.Image:
+        """
+        Convert different image inputs to PIL Image.
+        Args:
+            image: Image input (PIL Image, base64 string, or bytes)
+        Returns:
+            PIL Image
+        """
+        if isinstance(image, Image.Image):
+            return image
+        elif isinstance(image, str):
+            # Check if it's a base64 string
+            if image.startswith("data:image"):
+                # Extract the base64 part
+                image = image.split(",")[1]
+            try:
+                # Try to decode as base64
+                image_bytes = base64.b64decode(image)
+                return Image.open(io.BytesIO(image_bytes))
+            except Exception:
+                # Try to open as a file path
+                return Image.open(image)
+        elif isinstance(image, bytes):
+            return Image.open(io.BytesIO(image))
+        else:
+            raise ValueError(f"Unsupported image type: {type(image)}")

isa_model/inference/services/vision/yyds_vision_service.py ADDED Viewed

@@ -0,0 +1,80 @@
+from typing import Dict, Any, Union
+from openai import AsyncOpenAI
+from tenacity import retry, stop_after_attempt, wait_exponential
+from ...base_service import BaseService
+from ...base_provider import BaseProvider
+from .helpers.image_utils import compress_image, encode_image_to_base64
+from app.config.config_manager import config_manager
+logger = config_manager.get_logger(__name__)
+class YYDSVisionService(BaseService):
+    """Vision model service wrapper for YYDS"""
+    def __init__(self, provider: 'BaseProvider', model_name: str):
+        super().__init__(provider, model_name)
+        # 初始化 AsyncOpenAI 客户端
+        self._client = AsyncOpenAI(
+            api_key=self.config.get('api_key'),
+            base_url=self.config.get('base_url')
+        )
+        self.max_tokens = self.config.get('max_tokens', 1000)
+        self.temperature = self.config.get('temperature', 0.7)
+    @property
+    def client(self) -> AsyncOpenAI:
+        """获取底层的 OpenAI 客户端"""
+        return self._client
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        reraise=True
+    )
+    async def analyze_image(self, image_data: Union[bytes, str], query: str) -> str:
+        """分析图片并返回结果
+        Args:
+            image_data: 图片数据，可以是 bytes 或已编码的 base64 字符串
+            query: 查询文本
+        Returns:
+            str: 分析结果
+        """
+        try:
+            # 处理图片数据
+            if isinstance(image_data, bytes):
+                # 压缩并编码图片
+                compressed_image = compress_image(image_data)
+                image_b64 = encode_image_to_base64(compressed_image)
+            else:
+                image_b64 = image_data
+            # 移除可能存在的 base64 前缀
+            if 'base64,' in image_b64:
+                image_b64 = image_b64.split('base64,')[1]
+            # 使用 AsyncOpenAI 客户端创建请求
+            response = await self._client.chat.completions.create(
+                model=self.model_name,
+                messages=[{
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": query},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{image_b64}"
+                            }
+                        }
+                    ]
+                }],
+                max_tokens=self.max_tokens,
+                temperature=self.temperature
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logger.error(f"Error in image analysis: {e}")
+            raise