PyPI - isa-model - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

isa-model 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

isa_model/__init__.py +1 -1
isa_model/core/model_registry.py +273 -46
isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +120 -0
isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +18 -0
isa_model/deployment/gpu_int8_ds8/app/server.py +66 -0
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +43 -0
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +35 -0
isa_model/eval/__init__.py +56 -0
isa_model/eval/benchmarks.py +469 -0
isa_model/eval/factory.py +582 -0
isa_model/eval/metrics.py +628 -0
isa_model/inference/ai_factory.py +98 -93
isa_model/inference/providers/openai_provider.py +21 -7
isa_model/inference/providers/replicate_provider.py +18 -5
isa_model/inference/providers/triton_provider.py +1 -1
isa_model/inference/services/audio/base_stt_service.py +91 -0
isa_model/inference/services/audio/base_tts_service.py +136 -0
isa_model/inference/services/audio/{yyds_audio_service.py → openai_tts_service.py} +4 -4
isa_model/inference/services/embedding/ollama_embed_service.py +48 -36
isa_model/inference/services/llm/__init__.py +0 -4
isa_model/inference/services/llm/base_llm_service.py +134 -0
isa_model/inference/services/llm/ollama_llm_service.py +1 -10
isa_model/inference/services/llm/openai_llm_service.py +70 -61
isa_model/inference/services/vision/__init__.py +1 -1
isa_model/inference/services/vision/ollama_vision_service.py +4 -4
isa_model/inference/services/vision/{yyds_vision_service.py → openai_vision_service.py} +5 -5
isa_model/inference/services/vision/replicate_image_gen_service.py +185 -0
isa_model/training/__init__.py +44 -0
isa_model/training/factory.py +393 -0
isa_model-0.1.1.dist-info/METADATA +327 -0
{isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/RECORD +35 -60
isa_model/deployment/mlflow_gateway/__init__.py +0 -8
isa_model/deployment/mlflow_gateway/start_gateway.py +0 -65
isa_model/deployment/unified_multimodal_client.py +0 -341
isa_model/inference/adapter/triton_adapter.py +0 -453
isa_model/inference/backends/Pytorch/bge_embed_backend.py +0 -188
isa_model/inference/backends/Pytorch/gemma_backend.py +0 -167
isa_model/inference/backends/Pytorch/llama_backend.py +0 -166
isa_model/inference/backends/Pytorch/whisper_backend.py +0 -194
isa_model/inference/backends/__init__.py +0 -53
isa_model/inference/backends/base_backend_client.py +0 -26
isa_model/inference/backends/container_services.py +0 -104
isa_model/inference/backends/local_services.py +0 -72
isa_model/inference/backends/openai_client.py +0 -130
isa_model/inference/backends/replicate_client.py +0 -197
isa_model/inference/backends/third_party_services.py +0 -239
isa_model/inference/backends/triton_client.py +0 -97
isa_model/inference/client_sdk/client.py +0 -134
isa_model/inference/client_sdk/client_data_std.py +0 -34
isa_model/inference/client_sdk/client_sdk_schema.py +0 -16
isa_model/inference/client_sdk/exceptions.py +0 -0
isa_model/inference/engine/triton/model_repository/bge/1/model.py +0 -174
isa_model/inference/engine/triton/model_repository/gemma/1/model.py +0 -250
isa_model/inference/engine/triton/model_repository/llama/1/model.py +0 -76
isa_model/inference/engine/triton/model_repository/whisper/1/model.py +0 -195
isa_model/inference/providers/vllm_provider.py +0 -0
isa_model/inference/providers/yyds_provider.py +0 -83
isa_model/inference/services/audio/fish_speech/handler.py +0 -215
isa_model/inference/services/audio/runpod_tts_fish_service.py +0 -212
isa_model/inference/services/audio/triton_speech_service.py +0 -138
isa_model/inference/services/audio/whisper_service.py +0 -186
isa_model/inference/services/base_tts_service.py +0 -66
isa_model/inference/services/embedding/bge_service.py +0 -183
isa_model/inference/services/embedding/ollama_rerank_service.py +0 -118
isa_model/inference/services/embedding/onnx_rerank_service.py +0 -73
isa_model/inference/services/llm/gemma_service.py +0 -143
isa_model/inference/services/llm/llama_service.py +0 -143
isa_model/inference/services/llm/replicate_llm_service.py +0 -179
isa_model/inference/services/llm/triton_llm_service.py +0 -230
isa_model/inference/services/vision/replicate_vision_service.py +0 -241
isa_model/inference/services/vision/triton_vision_service.py +0 -199
isa_model-0.1.0.dist-info/METADATA +0 -116
/isa_model/inference/{client_sdk/__init__.py → services/embedding/openai_embed_service.py} +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/WHEEL +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/licenses/LICENSE +0 -0
{isa_model-0.1.0.dist-info → isa_model-0.1.1.dist-info}/top_level.txt +0 -0

isa_model/inference/backends/third_party_services.py DELETED Viewed

@@ -1,239 +0,0 @@
-"""
-Third-party Services Backend - External API services with wrappers.
-Examples: OpenAI, Anthropic, Cohere, Google AI, Azure OpenAI
-"""
-import aiohttp
-import json
-from typing import Dict, Any, List, Optional
-from .base_backend_client import BaseBackendClient
-class OpenAIClient(BaseBackendClient):
-    """Wrapper for OpenAI API"""
-    def __init__(self, api_key: str, base_url: str = "https://api.openai.com/v1"):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip('/')
-        self.headers = {
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json"
-        }
-    async def generate_completion(self, model: str, prompt: str, **kwargs) -> Dict[str, Any]:
-        """Generate completion using OpenAI API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "prompt": prompt,
-                "max_tokens": kwargs.get("max_tokens", 100),
-                "temperature": kwargs.get("temperature", 0.7),
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/completions",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def generate_chat_completion(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
-        """Generate chat completion using OpenAI API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "messages": messages,
-                "max_tokens": kwargs.get("max_tokens", 100),
-                "temperature": kwargs.get("temperature", 0.7),
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/chat/completions",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def generate_embeddings(self, model: str, input_text: str, **kwargs) -> Dict[str, Any]:
-        """Generate embeddings using OpenAI API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "input": input_text,
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/embeddings",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def health_check(self) -> bool:
-        """Check if OpenAI API is accessible"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(f"{self.base_url}/models", headers=self.headers) as response:
-                    return response.status == 200
-        except Exception:
-            return False
-class AnthropicClient(BaseBackendClient):
-    """Wrapper for Anthropic Claude API"""
-    def __init__(self, api_key: str, base_url: str = "https://api.anthropic.com/v1"):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip('/')
-        self.headers = {
-            "x-api-key": api_key,
-            "Content-Type": "application/json",
-            "anthropic-version": "2023-06-01"
-        }
-    async def generate_chat_completion(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
-        """Generate chat completion using Anthropic API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "messages": messages,
-                "max_tokens": kwargs.get("max_tokens", 100),
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/messages",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def health_check(self) -> bool:
-        """Check if Anthropic API is accessible"""
-        try:
-            # Anthropic doesn't have a models endpoint, so we'll just check the base URL
-            async with aiohttp.ClientSession() as session:
-                async with session.get(self.base_url, headers=self.headers) as response:
-                    return response.status in [200, 404]  # 404 is also acceptable for base URL
-        except Exception:
-            return False
-class CohereClient(BaseBackendClient):
-    """Wrapper for Cohere API"""
-    def __init__(self, api_key: str, base_url: str = "https://api.cohere.ai/v1"):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip('/')
-        self.headers = {
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json"
-        }
-    async def generate_completion(self, model: str, prompt: str, **kwargs) -> Dict[str, Any]:
-        """Generate completion using Cohere API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "prompt": prompt,
-                "max_tokens": kwargs.get("max_tokens", 100),
-                "temperature": kwargs.get("temperature", 0.7),
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/generate",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def generate_embeddings(self, model: str, texts: List[str], **kwargs) -> Dict[str, Any]:
-        """Generate embeddings using Cohere API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "model": model,
-                "texts": texts,
-                **kwargs
-            }
-            async with session.post(
-                f"{self.base_url}/embed",
-                json=payload,
-                headers=self.headers
-            ) as response:
-                return await response.json()
-    async def health_check(self) -> bool:
-        """Check if Cohere API is accessible"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(f"{self.base_url}/check-api-key", headers=self.headers) as response:
-                    return response.status == 200
-        except Exception:
-            return False
-class AzureOpenAIClient(BaseBackendClient):
-    """Wrapper for Azure OpenAI API"""
-    def __init__(self, api_key: str, endpoint: str, api_version: str = "2023-12-01-preview"):
-        self.api_key = api_key
-        self.endpoint = endpoint.rstrip('/')
-        self.api_version = api_version
-        self.headers = {
-            "api-key": api_key,
-            "Content-Type": "application/json"
-        }
-    async def generate_chat_completion(self, deployment_name: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
-        """Generate chat completion using Azure OpenAI API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "messages": messages,
-                "max_tokens": kwargs.get("max_tokens", 100),
-                "temperature": kwargs.get("temperature", 0.7),
-                **kwargs
-            }
-            url = f"{self.endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version={self.api_version}"
-            async with session.post(url, json=payload, headers=self.headers) as response:
-                return await response.json()
-    async def health_check(self) -> bool:
-        """Check if Azure OpenAI API is accessible"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                url = f"{self.endpoint}/openai/models?api-version={self.api_version}"
-                async with session.get(url, headers=self.headers) as response:
-                    return response.status == 200
-        except Exception:
-            return False
-class GoogleAIClient(BaseBackendClient):
-    """Wrapper for Google AI (Gemini) API"""
-    def __init__(self, api_key: str, base_url: str = "https://generativelanguage.googleapis.com/v1"):
-        self.api_key = api_key
-        self.base_url = base_url.rstrip('/')
-    async def generate_completion(self, model: str, prompt: str, **kwargs) -> Dict[str, Any]:
-        """Generate completion using Google AI API"""
-        async with aiohttp.ClientSession() as session:
-            payload = {
-                "contents": [{"parts": [{"text": prompt}]}],
-                "generationConfig": {
-                    "maxOutputTokens": kwargs.get("max_tokens", 100),
-                    "temperature": kwargs.get("temperature", 0.7),
-                }
-            }
-            url = f"{self.base_url}/models/{model}:generateContent?key={self.api_key}"
-            async with session.post(url, json=payload) as response:
-                return await response.json()
-    async def health_check(self) -> bool:
-        """Check if Google AI API is accessible"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                url = f"{self.base_url}/models?key={self.api_key}"
-                async with session.get(url) as response:
-                    return response.status == 200
-        except Exception:
-            return False

isa_model/inference/backends/triton_client.py DELETED Viewed

@@ -1,97 +0,0 @@
-import aiohttp
-import json
-from typing import Dict, Any, List, Optional, AsyncGenerator
-from .base_backend_client import BaseBackendClient
-class TritonBackendClient(BaseBackendClient):
-    """Pure connection client for Triton Inference Server"""
-    def __init__(self, url: str = "localhost:8000", protocol: str = "http"):
-        self.base_url = f"http://{url}" if not url.startswith("http") else url
-        self.protocol = protocol
-        self._session = None
-    async def _get_session(self):
-        """Get or create HTTP session"""
-        if self._session is None:
-            self._session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=120))
-        return self._session
-    async def post(self, endpoint: str, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Make POST request to Triton server"""
-        session = await self._get_session()
-        async with session.post(f"{self.base_url}{endpoint}", json=payload) as response:
-            response.raise_for_status()
-            return await response.json()
-    async def get(self, endpoint: str) -> Dict[str, Any]:
-        """Make GET request to Triton server"""
-        session = await self._get_session()
-        async with session.get(f"{self.base_url}{endpoint}") as response:
-            response.raise_for_status()
-            return await response.json()
-    async def model_ready(self, model_name: str) -> bool:
-        """Check if model is ready"""
-        try:
-            await self.get(f"/v2/models/{model_name}/ready")
-            return True
-        except Exception:
-            return False
-    async def model_metadata(self, model_name: str) -> Dict[str, Any]:
-        """Get model metadata"""
-        return await self.get(f"/v2/models/{model_name}")
-    async def server_ready(self) -> bool:
-        """Check if server is ready"""
-        try:
-            await self.get("/v2/health/ready")
-            return True
-        except Exception:
-            return False
-    async def health_check(self) -> bool:
-        """Check server health"""
-        return await self.server_ready()
-    async def close(self):
-        """Close the HTTP session"""
-        if self._session:
-            await self._session.close()
-            self._session = None
-# Keep old class name for backward compatibility
-class TritonClient(TritonBackendClient):
-    """Backward compatibility alias"""
-    def __init__(self, backend_connector_config: Dict = None, config: Dict = None):
-        if backend_connector_config:
-            url = backend_connector_config.get("url", "localhost:8000")
-        else:
-            url = "localhost:8000"
-        super().__init__(url)
-    async def infer(self,
-                    model_runtime_config: Dict,
-                    unified_request_payload: Dict,
-                    task_type: str,
-                    request_id: str) -> Dict:
-        """Legacy method for backward compatibility"""
-        # This is a placeholder for the old interface
-        # New code should use the direct HTTP methods
-        raise NotImplementedError("Use direct HTTP methods instead")
-    async def stream(self,
-                     model_runtime_config: Dict,
-                     unified_request_payload: Dict,
-                     task_type: str,
-                     request_id: str) -> AsyncGenerator[Dict, None]:
-        """Legacy method for backward compatibility"""
-        # This is a placeholder for the old interface
-        # New code should use the direct HTTP methods
-        raise NotImplementedError("Use direct HTTP methods instead")
-        yield  # Make it a generator

isa_model/inference/client_sdk/client.py DELETED Viewed

@@ -1,134 +0,0 @@
-# Universal Inference Client
-"""
-旨在为开发者提供一个高级、统一且易于使用的Python客户端库，用于与“通用推理平台”（Universal Inference Platform，
-即您正在构建的整个系统）进行交互。该客户端封装了与平台后端 orchestrator_adapter 服务通信的所有复杂性，
-允许用户通过面向任务（task-oriented）的方法来调用各种AI模型（语言、视觉、语音等），
-而无需关心这些模型具体由哪个推理引擎（PyTorch, vLLM, Triton, Ollama）承载，或者它们是本地部署的模型还是外部API服务。
-"""
-import httpx
-from typing import Dict, List, Union, Optional, AsyncGenerator
-from .client_sdk_schema import *
-class UniversalInferenceClient:
-    def __init__(self):
-        self.adapter_url = "http://adapter.isa_model.com/api/v1"
-        self.adapter_key = "isa_model_adapter"
-        self.client = httpx.AsyncClient(
-            base_url=self.adapter_url,
-            headers={"Authorization": f"Bearer {self.adapter_key}"}
-        )
-    async def _make_request(self,
-                            method: str,
-                            url: str,
-                            params: Optional[Dict] = None,
-                            data: Optional[Dict] = None,
-                            headers: Optional[Dict] = None,
-                            **kwargs) -> httpx.Response:
-        """
-        Make a request to the adapter service
-        """
-        headers = headers or {}
-        headers["Authorization"] = f"Bearer {self.adapter_key}"
-        async with self.client as client:
-            response = await client.request(
-                method,
-                url,
-                params=params,
-                data=data,
-                headers=headers,
-                **kwargs
-            )
-            response.raise_for_status()
-            return response.json()
-    async def invoke(self,
-                     model_id: str,
-                     raw_task_payload: Dict,
-                     stream: bool = False,
-                     **kwargs) -> Union[Dict, AsyncGenerator[Dict, None]]:
-        pass
-    async def chat(self,
-                   model_id: str,
-                   messages: List[Dict[str, str]],
-                   stream: bool = False,
-                   temperature: float = 0.7,
-                   max_tokens: int = 1000) -> Union[UnifiedChatResponse, AsyncGenerator[UnifiedChatResponse, None]]:
-        pass
-    async def generate_text(self,
-                            model_id: str,
-                            prompt: str,
-                            stream: bool = False,
-                            temperature: float = 0.7,
-                            max_tokens: int = 1000) -> Union[UnifiedTextResponse, AsyncGenerator[UnifiedTextChunk, None]]:
-        pass
-    async def embed(self,
-                    model_id: str,
-                    inputs: Union[str, List[str]],
-                    input_type: str = "document",
-                    **kwargs) -> UnifiedEmbeddingResponse:
-        pass
-    async def rerank(self,
-                     model_id: str,
-                     query: str,
-                     documents: List[Union[str, Dict]],
-                     top_k: Optional[int] = None,
-                     **kwargs) -> UnifiedRerankResponse:
-        pass
-    async def transcribe_audio(self,
-                               model_id: str,
-                               audio_data: bytes,
-                               language: Optional[str] = None,
-                               **kwargs) -> UnifiedAudioTranscriptionResponse:
-        pass
-    async def generate_speech(self,
-                              model_id: str,
-                              text: str,
-                              voice_id: Optional[str] = None,
-                              **kwargs) -> UnifiedSpeechGenerationResponse:
-        pass
-    async def analyze_image(self,
-                            model_id: str,
-                            image_data: bytes,
-                            query: str,
-                            **kwargs) -> UnifiedImageAnalysisResponse:
-        pass
-    async def generate_image(self,
-                             model_id: str,
-                             prompt: str,
-                             **kwargs) -> UnifiedImageGenerationResponse:
-        pass
-    async def generate_video(self,
-                             model_id: str,
-                             prompt: str,
-                             **kwargs) -> UnifiedVideoGenerationResponse:
-        pass
-    async def generate_audio(self,
-                             model_id: str,
-                             text: str,
-                             voice_id: Optional[str] = None,
-                             **kwargs) -> UnifiedAudioGenerationResponse:
-        pass

isa_model/inference/client_sdk/client_data_std.py DELETED Viewed

@@ -1,34 +0,0 @@
-# Inside UniversalInferenceClient.chat method
-processed_messages = []
-for user_msg in messages: # messages is what the end-user provided to the SDK
-    if isinstance(user_msg, dict) and "role" in user_msg and "content" in user_msg:
-        processed_messages.append(user_msg)
-    elif isinstance(user_msg, BaseMessage): # If user passes LangChain messages directly
-        # Serialize BaseMessage to our standard dict format
-        msg_dict = {"role": "", "content": user_msg.content}
-        if isinstance(user_msg, HumanMessage):
-            msg_dict["role"] = "user"
-        elif isinstance(user_msg, AIMessage):
-            msg_dict["role"] = "assistant"
-            if hasattr(user_msg, "tool_calls") and user_msg.tool_calls:
-                 # Serialize tool_calls to a list of dicts
-                msg_dict["tool_calls"] = [
-                    {"id": tc.get("id"), "type": "function", "function": {"name": tc.get("name"), "arguments": json.dumps(tc.get("args", {}))}}
-                    for tc in user_msg.tool_calls # Assuming user_msg.tool_calls are already dicts or serializable
-                ]
-        elif isinstance(user_msg, SystemMessage):
-            msg_dict["role"] = "system"
-        elif isinstance(user_msg, ToolMessage):
-            msg_dict["role"] = "tool"
-            msg_dict["tool_call_id"] = user_msg.tool_call_id
-        else:
-            # Handle other BaseMessage types or raise error
-            pass
-        processed_messages.append(msg_dict)
-    # ... (add more flexible input handling if needed, e.g., a list of tuples)
-    else:
-        raise ValueError("Unsupported message format in chat method input.")
-# task_specific_payload for orchestrator would be:
-# task_payload = {"messages": processed_messages}
-# Then call self._invoke_orchestrator(model_id, task_payload, ...)

isa_model/inference/client_sdk/client_sdk_schema.py DELETED Viewed

@@ -1,16 +0,0 @@
-"""
-在这里定义（使用Pydantic或Protobuf）一个标准的 ChatMessageSchema，它包含 role: str, content: Optional[str],
-tool_calls: Optional[List[ToolCallSchema]], tool_call_id: Optional[str] 等字段。
-UnifiedRestInvokeRequest (或gRPC的 UnifiedRequest) 中的 task_specific_payload 字段，在处理聊天任务时，
-其内部的 "messages" 键对应的值就是 List[ChatMessageSchema]。
-"""
-from pydantic import BaseModel
-class UnifiedChatResponse(BaseModel):
-    pass
-class UnifiedTextResponse(BaseModel):
-    pass

isa_model/inference/client_sdk/exceptions.py DELETED Viewed

File without changes

isa-model 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

isa-model 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl