PyPI - isa-model - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

isa-model 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

isa_model/__init__.py +1 -1
isa_model/core/model_registry.py +273 -46
isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +120 -0
isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +18 -0
isa_model/deployment/gpu_int8_ds8/app/server.py +66 -0
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +43 -0
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +35 -0
isa_model/eval/__init__.py +56 -0
isa_model/eval/benchmarks.py +469 -0
isa_model/eval/factory.py +582 -0
isa_model/eval/metrics.py +628 -0
isa_model/inference/ai_factory.py +98 -93
isa_model/inference/providers/openai_provider.py +21 -7
isa_model/inference/providers/replicate_provider.py +18 -5
isa_model/inference/providers/triton_provider.py +1 -1
isa_model/inference/services/audio/base_stt_service.py +91 -0
isa_model/inference/services/audio/base_tts_service.py +136 -0
isa_model/inference/services/audio/{yyds_audio_service.py → openai_tts_service.py} +4 -4
isa_model/inference/services/embedding/ollama_embed_service.py +48 -36
isa_model/inference/services/llm/__init__.py +0 -4
isa_model/inference/services/llm/base_llm_service.py +134 -0
isa_model/inference/services/llm/ollama_llm_service.py +1 -10
isa_model/inference/services/llm/openai_llm_service.py +70 -61
isa_model/inference/services/vision/__init__.py +1 -1
isa_model/inference/services/vision/ollama_vision_service.py +4 -4
isa_model/inference/services/vision/{yyds_vision_service.py → openai_vision_service.py} +5 -5
isa_model/inference/services/vision/replicate_image_gen_service.py +185 -0
isa_model/training/__init__.py +44 -0
isa_model/training/factory.py +393 -0
isa_model-0.2.0.dist-info/METADATA +327 -0
{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/RECORD +35 -60
isa_model/deployment/mlflow_gateway/__init__.py +0 -8
isa_model/deployment/mlflow_gateway/start_gateway.py +0 -65
isa_model/deployment/unified_multimodal_client.py +0 -341
isa_model/inference/adapter/triton_adapter.py +0 -453
isa_model/inference/backends/Pytorch/bge_embed_backend.py +0 -188
isa_model/inference/backends/Pytorch/gemma_backend.py +0 -167
isa_model/inference/backends/Pytorch/llama_backend.py +0 -166
isa_model/inference/backends/Pytorch/whisper_backend.py +0 -194
isa_model/inference/backends/__init__.py +0 -53
isa_model/inference/backends/base_backend_client.py +0 -26
isa_model/inference/backends/container_services.py +0 -104
isa_model/inference/backends/local_services.py +0 -72
isa_model/inference/backends/openai_client.py +0 -130
isa_model/inference/backends/replicate_client.py +0 -197
isa_model/inference/backends/third_party_services.py +0 -239
isa_model/inference/backends/triton_client.py +0 -97
isa_model/inference/client_sdk/client.py +0 -134
isa_model/inference/client_sdk/client_data_std.py +0 -34
isa_model/inference/client_sdk/client_sdk_schema.py +0 -16
isa_model/inference/client_sdk/exceptions.py +0 -0
isa_model/inference/engine/triton/model_repository/bge/1/model.py +0 -174
isa_model/inference/engine/triton/model_repository/gemma/1/model.py +0 -250
isa_model/inference/engine/triton/model_repository/llama/1/model.py +0 -76
isa_model/inference/engine/triton/model_repository/whisper/1/model.py +0 -195
isa_model/inference/providers/vllm_provider.py +0 -0
isa_model/inference/providers/yyds_provider.py +0 -83
isa_model/inference/services/audio/fish_speech/handler.py +0 -215
isa_model/inference/services/audio/runpod_tts_fish_service.py +0 -212
isa_model/inference/services/audio/triton_speech_service.py +0 -138
isa_model/inference/services/audio/whisper_service.py +0 -186
isa_model/inference/services/base_tts_service.py +0 -66
isa_model/inference/services/embedding/bge_service.py +0 -183
isa_model/inference/services/embedding/ollama_rerank_service.py +0 -118
isa_model/inference/services/embedding/onnx_rerank_service.py +0 -73
isa_model/inference/services/llm/gemma_service.py +0 -143
isa_model/inference/services/llm/llama_service.py +0 -143
isa_model/inference/services/llm/replicate_llm_service.py +0 -179
isa_model/inference/services/llm/triton_llm_service.py +0 -230
isa_model/inference/services/vision/replicate_vision_service.py +0 -241
isa_model/inference/services/vision/triton_vision_service.py +0 -199
isa_model-0.1.0.dist-info/METADATA +0 -116
/isa_model/inference/{client_sdk/__init__.py → services/embedding/openai_embed_service.py} +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/WHEEL +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/licenses/LICENSE +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/top_level.txt +0 -0

isa_model/inference/services/vision/replicate_vision_service.py DELETED Viewed

@@ -1,241 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Replicate Vision服务
-用于与Replicate API交互，支持图像生成和图像分析
-"""
-import os
-import time
-import uuid
-import logging
-from typing import Dict, Any, List, Optional, Union
-import asyncio
-import aiohttp
-import replicate
-import requests
-from PIL import Image
-from io import BytesIO
-from isa_model.inference.services.base_service import BaseService
-from isa_model.inference.providers.base_provider import BaseProvider
-from isa_model.inference.base import ModelType
-# 设置日志记录
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class ReplicateVisionService(BaseService):
-    """
-    Replicate Vision服务，用于处理图像生成和分析
-    """
-    def __init__(self, provider: BaseProvider, model_name: str):
-        """
-        初始化Replicate Vision服务
-        Args:
-            provider: Replicate提供商实例
-            model_name: Replicate模型ID (格式: 'username/model_name:version')
-        """
-        super().__init__(provider, model_name)
-        self.api_token = provider.config.get("api_token", os.environ.get("REPLICATE_API_TOKEN"))
-        self.client = replicate.Client(api_token=self.api_token)
-        self.model_type = ModelType.VISION
-        # 可选的默认配置
-        self.guidance_scale = provider.config.get("guidance_scale", 7.5)
-        self.num_inference_steps = provider.config.get("num_inference_steps", 30)
-        # 生成的图像存储目录
-        self.output_dir = "generated_images"
-        os.makedirs(self.output_dir, exist_ok=True)
-    async def load(self) -> None:
-        """
-        加载模型（对于Replicate，这只是验证API令牌）
-        """
-        if not self.api_token:
-            raise ValueError("缺少Replicate API令牌，请设置REPLICATE_API_TOKEN环境变量")
-        # 验证令牌有效性
-        try:
-            self.client.api_token = self.api_token
-            logger.info(f"Replicate Vision服务初始化成功，使用模型: {self.model_name}")
-        except Exception as e:
-            logger.error(f"Replicate初始化失败: {e}")
-            raise
-    async def generate_image(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        使用Replicate模型生成图像
-        Args:
-            input_data: 包含生成参数的字典
-        Returns:
-            包含生成图像URL的结果字典
-        """
-        try:
-            # 设置默认参数
-            if "guidance_scale" not in input_data and self.guidance_scale:
-                input_data["guidance_scale"] = self.guidance_scale
-            if "num_inference_steps" not in input_data and self.num_inference_steps:
-                input_data["num_inference_steps"] = self.num_inference_steps
-            # 运行模型（同步API调用）
-            logger.info(f"开始使用模型 {self.model_name} 生成图像")
-            # 转换成异步操作
-            loop = asyncio.get_event_loop()
-            output = await loop.run_in_executor(
-                None,
-                lambda: replicate.run(self.model_name, input=input_data)
-            )
-            # 将结果转换为标准格式
-            # 处理Replicate对象输出
-            if hasattr(output, 'url'):
-                urls = [output.url]
-            elif isinstance(output, list) and all(hasattr(item, 'url') for item in output if item is not None):
-                urls = [item.url for item in output if item is not None]
-            else:
-                # 兼容直接返回URL字符串的情况
-                urls = output if isinstance(output, list) else [output]
-            result = {
-                "urls": urls,
-                "metadata": {
-                    "model": self.model_name,
-                    "input": input_data
-                }
-            }
-            logger.info(f"图像生成完成: {result['urls']}")
-            return result
-        except Exception as e:
-            logger.error(f"图像生成失败: {e}")
-            raise
-    async def analyze_image(self, image_path: str, prompt: str) -> Dict[str, Any]:
-        """
-        分析图像（用于支持视觉分析模型）
-        Args:
-            image_path: 图像路径或URL
-            prompt: 分析提示
-        Returns:
-            分析结果字典
-        """
-        try:
-            # 构建输入数据
-            input_data = {
-                "image": self._get_image_url(image_path),
-                "prompt": prompt
-            }
-            # 运行模型
-            logger.info(f"开始使用模型 {self.model_name} 分析图像")
-            # 转换成异步操作
-            loop = asyncio.get_event_loop()
-            output = await loop.run_in_executor(
-                None,
-                lambda: replicate.run(self.model_name, input=input_data)
-            )
-            result = {
-                "text": output,
-                "metadata": {
-                    "model": self.model_name,
-                    "input": input_data
-                }
-            }
-            logger.info(f"图像分析完成")
-            return result
-        except Exception as e:
-            logger.error(f"图像分析失败: {e}")
-            raise
-    async def generate_and_save(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        生成图像并保存到本地
-        Args:
-            input_data: 包含生成参数的字典
-        Returns:
-            包含生成图像URL和保存路径的结果字典
-        """
-        # 首先生成图像
-        result = await self.generate_image(input_data)
-        # 然后下载并保存
-        saved_paths = []
-        for i, url in enumerate(result["urls"]):
-            # 生成唯一文件名
-            timestamp = int(time.time())
-            file_name = f"{self.output_dir}/{timestamp}_{uuid.uuid4().hex[:8]}_{i+1}.png"
-            # 异步下载图像
-            try:
-                await self._download_image(url, file_name)
-                saved_paths.append(file_name)
-                logger.info(f"图像已保存至: {file_name}")
-            except Exception as e:
-                logger.error(f"保存图像失败: {e}")
-        # 添加保存路径到结果
-        result["saved_paths"] = saved_paths
-        return result
-    async def _download_image(self, url: str, save_path: str) -> None:
-        """
-        异步下载图像并保存
-        Args:
-            url: 图像URL
-            save_path: 保存路径
-        """
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(url) as response:
-                    if response.status == 200:
-                        content = await response.read()
-                        img = Image.open(BytesIO(content))
-                        img.save(save_path)
-                    else:
-                        logger.error(f"下载图像失败: HTTP {response.status}")
-                        raise Exception(f"下载图像失败: HTTP {response.status}")
-        except Exception as e:
-            logger.error(f"下载图像时出错: {e}")
-            raise
-    def _get_image_url(self, image_path: str) -> str:
-        """
-        获取图像URL（如果提供的是本地路径，则上传到临时存储）
-        Args:
-            image_path: 图像路径或URL
-        Returns:
-            图像URL
-        """
-        # 如果已经是URL，直接返回
-        if image_path.startswith(("http://", "https://")):
-            return image_path
-        # 否则，这是一个需要上传的本地文件
-        # 注意：这里可以实现上传逻辑，但为简单起见，我们仅支持URL
-        raise NotImplementedError("当前仅支持图像URL，不支持上传本地文件")
-    async def unload(self) -> None:
-        """卸载模型（对于Replicate API，这是一个无操作）"""
-        logger.info(f"卸载Replicate Vision服务: {self.model_name}")
-        # 没有需要清理的资源

isa_model/inference/services/vision/triton_vision_service.py DELETED Viewed

@@ -1,199 +0,0 @@
-import json
-import logging
-import asyncio
-import base64
-import io
-from PIL import Image
-import numpy as np
-from typing import Dict, List, Any, AsyncGenerator, Optional, Union
-from isa_model.inference.services.base_service import BaseService
-from isa_model.inference.providers.triton_provider import TritonProvider
-logger = logging.getLogger(__name__)
-class TritonVisionService(BaseService):
-    """
-    Vision service that uses Triton Inference Server to run inference.
-    """
-    def __init__(self, provider: TritonProvider, model_name: str):
-        """
-        Initialize the Triton Vision service.
-        Args:
-            provider: The Triton provider
-            model_name: Name of the model in Triton (e.g., "Gemma3-4B")
-        """
-        super().__init__(provider, model_name)
-        self.client = None
-        self.token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-    async def _initialize_client(self):
-        """Initialize the Triton client"""
-        if self.client is None:
-            self.client = self.provider.create_client()
-            # Check if model is ready
-            if not self.provider.is_model_ready(self.model_name):
-                logger.error(f"Model {self.model_name} is not ready on Triton server")
-                raise RuntimeError(f"Model {self.model_name} is not ready on Triton server")
-            logger.info(f"Initialized Triton client for vision model: {self.model_name}")
-    async def process_image(self,
-                          image: Union[str, Image.Image, bytes],
-                          prompt: Optional[str] = None,
-                          params: Optional[Dict[str, Any]] = None) -> str:
-        """
-        Process an image and generate a description.
-        Args:
-            image: Input image (PIL Image, base64 string, or bytes)
-            prompt: Optional text prompt to guide the model
-            params: Generation parameters
-        Returns:
-            Generated text description
-        """
-        await self._initialize_client()
-        try:
-            import tritonclient.http as httpclient
-            # Process the image to get numpy array
-            image_array = self._prepare_image_input(image)
-            # Create input tensors for the image
-            image_input = httpclient.InferInput("IMAGE", image_array.shape, "UINT8")
-            image_input.set_data_from_numpy(image_array)
-            inputs = [image_input]
-            # Add text prompt if provided
-            if prompt:
-                text_data = np.array([prompt], dtype=np.object_)
-                text_input = httpclient.InferInput("TEXT", text_data.shape, "BYTES")
-                text_input.set_data_from_numpy(text_data)
-                inputs.append(text_input)
-            # Add parameters if provided
-            if params:
-                default_params = {
-                    "max_new_tokens": 512,
-                    "temperature": 0.7,
-                    "top_p": 0.9,
-                    "do_sample": True
-                }
-                generation_params = {**default_params, **params}
-                param_json = json.dumps(generation_params)
-                param_data = np.array([param_json], dtype=np.object_)
-                param_input = httpclient.InferInput("PARAMETERS", param_data.shape, "BYTES")
-                param_input.set_data_from_numpy(param_data)
-                inputs.append(param_input)
-            # Create output tensor
-            outputs = [httpclient.InferRequestedOutput("TEXT")]
-            # Send the request
-            response = await asyncio.to_thread(
-                self.client.infer,
-                self.model_name,
-                inputs,
-                outputs=outputs
-            )
-            # Process the response
-            output = response.as_numpy("TEXT")
-            response_text = output[0].decode('utf-8')
-            # Update token usage (estimated since we don't have actual token counts)
-            prompt_tokens = len(prompt) // 4 if prompt else 100  # Rough estimate
-            completion_tokens = len(response_text) // 4  # Rough estimate
-            total_tokens = prompt_tokens + completion_tokens
-            self.last_token_usage = {
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens
-            }
-            # Update total token usage
-            self.token_usage["prompt_tokens"] += prompt_tokens
-            self.token_usage["completion_tokens"] += completion_tokens
-            self.token_usage["total_tokens"] += total_tokens
-            return response_text
-        except Exception as e:
-            logger.error(f"Error during Triton vision inference: {str(e)}")
-            raise
-    def get_token_usage(self) -> Dict[str, int]:
-        """
-        Get total token usage statistics.
-        Returns:
-            Dictionary with token usage statistics
-        """
-        return self.token_usage
-    def get_last_token_usage(self) -> Dict[str, int]:
-        """
-        Get token usage from last request.
-        Returns:
-            Dictionary with token usage statistics from last request
-        """
-        return self.last_token_usage
-    def _prepare_image_input(self, image: Union[str, Image.Image, bytes]) -> np.ndarray:
-        """
-        Process different types of image inputs into a numpy array.
-        Args:
-            image: Image input (PIL Image, base64 string, or bytes)
-        Returns:
-            Numpy array of the image
-        """
-        # Convert to PIL image first
-        pil_image = self._to_pil_image(image)
-        # Convert PIL image to numpy array
-        return np.array(pil_image)
-    def _to_pil_image(self, image: Union[str, Image.Image, bytes]) -> Image.Image:
-        """
-        Convert different image inputs to PIL Image.
-        Args:
-            image: Image input (PIL Image, base64 string, or bytes)
-        Returns:
-            PIL Image
-        """
-        if isinstance(image, Image.Image):
-            return image
-        elif isinstance(image, str):
-            # Check if it's a base64 string
-            if image.startswith("data:image"):
-                # Extract the base64 part
-                image = image.split(",")[1]
-            try:
-                # Try to decode as base64
-                image_bytes = base64.b64decode(image)
-                return Image.open(io.BytesIO(image_bytes))
-            except Exception:
-                # Try to open as a file path
-                return Image.open(image)
-        elif isinstance(image, bytes):
-            return Image.open(io.BytesIO(image))
-        else:
-            raise ValueError(f"Unsupported image type: {type(image)}")

isa_model-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,116 +0,0 @@
-Metadata-Version: 2.4
-Name: isa-model
-Version: 0.1.0
-Summary: Unified AI model serving framework
-Author-email: isA_Model Contributors <your.email@example.com>
-License: MIT
-Classifier: Development Status :: 3 - Alpha
-Classifier: Intended Audience :: Developers
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: MIT License
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: fastapi>=0.95.0
-Requires-Dist: numpy>=1.20.0
-Requires-Dist: httpx>=0.23.0
-Requires-Dist: pydantic>=2.0.0
-Requires-Dist: uvicorn>=0.22.0
-Requires-Dist: requests>=2.28.0
-Requires-Dist: aiohttp>=3.8.0
-Requires-Dist: transformers>=4.30.0
-Requires-Dist: langchain-core>=0.1.0
-Requires-Dist: tritonclient[grpc,http]>=2.30.0
-Requires-Dist: huggingface-hub>=0.16.0
-Requires-Dist: kubernetes>=25.3.0
-Requires-Dist: mlflow>=2.4.0
-Requires-Dist: torch>=2.0.0
-Dynamic: license-file
-# isA_Model - AI服务工厂
-isA_Model是一个轻量级AI服务工厂，用于统一管理和调用不同的AI模型和服务提供商。
-## 特性
-- 支持多种AI提供商(Ollama, OpenAI, Replicate, Triton)
-- 统一的API接口
-- 灵活的工厂模式
-- 异步支持
-- 单例模式，高效缓存
-## 安装
-```bash
-pip install -r requirements.txt
-```
-## 快速开始
-使用AI工厂很简单：
-```python
-from isa_model.inference.ai_factory import AIFactory
-from isa_model.inference.base import ModelType
-# 获取工厂实例
-factory = AIFactory()
-# LLM示例 - 使用Ollama
-llm = factory.get_llm(model_name="llama3.1", provider="ollama")
-response = await llm.generate("你好，请介绍一下自己。")
-print(response)
-# 图像生成示例 - 使用Replicate
-vision_service = factory.get_vision_model(
-    model_name="stability-ai/sdxl:c221b2b8ef527988fb59bf24a8b97c4561f1c671f73bd389f866bfb27c061316",
-    provider="replicate",
-    config={"api_token": "your_replicate_token"}
-)
-result = await vision_service.generate_image({
-    "prompt": "A beautiful sunset over mountains",
-    "num_inference_steps": 25
-})
-print(result["urls"])
-```
-## 工厂架构
-isA_Model使用三层架构:
-1. **客户端层** - 应用程序代码
-2. **服务层** - 模型服务实现(LLM, 图像, 嵌入等)
-3. **提供商层** - 底层API集成(Ollama, OpenAI, Replicate等)
-### 主要组件
-- `AIFactory` - 中央工厂类，提供模型和服务访问
-- `BaseService` - 所有服务的基类
-- `BaseProvider` - 所有提供商的基类
-- 特定服务实现 - 如`ReplicateVisionService`, `OllamaLLMService`等
-## 支持的模型类型
-- **LLM** - 大语言模型
-- **VISION** - 图像生成和分析
-- **EMBEDDING** - 文本嵌入
-- **AUDIO** - 语音识别
-- **RERANK** - 重排序
-## 示例
-查看`test_*.py`文件获取更多使用示例。
-## 环境变量
-将API密钥和其他配置添加到`.env.local`文件中:
-```
-OPENAI_API_KEY=your_openai_key
-REPLICATE_API_TOKEN=your_replicate_token
-```
-## 许可证
-MIT

/isa_model/inference/{client_sdk/__init__.py → services/embedding/openai_embed_service.py} RENAMED Viewed

File without changes

{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{isa_model-0.1.0.dist-info → isa_model-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

isa-model 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

isa-model 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl