PyPI - isa-model - Versions diffs - 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

isa-model 0.0.2py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

isa_model/__init__.py +1 -1
isa_model/core/model_manager.py +69 -4
isa_model/core/model_registry.py +273 -46
isa_model/core/storage/hf_storage.py +419 -0
isa_model/deployment/__init__.py +52 -0
isa_model/deployment/core/__init__.py +34 -0
isa_model/deployment/core/deployment_config.py +356 -0
isa_model/deployment/core/deployment_manager.py +549 -0
isa_model/deployment/core/isa_deployment_service.py +401 -0
isa_model/eval/factory.py +381 -140
isa_model/inference/ai_factory.py +427 -236
isa_model/inference/billing_tracker.py +406 -0
isa_model/inference/providers/base_provider.py +51 -4
isa_model/inference/providers/ml_provider.py +50 -0
isa_model/inference/providers/ollama_provider.py +37 -18
isa_model/inference/providers/openai_provider.py +65 -36
isa_model/inference/providers/replicate_provider.py +42 -30
isa_model/inference/services/audio/base_stt_service.py +21 -2
isa_model/inference/services/audio/openai_realtime_service.py +353 -0
isa_model/inference/services/audio/openai_stt_service.py +252 -0
isa_model/inference/services/audio/openai_tts_service.py +149 -9
isa_model/inference/services/audio/replicate_tts_service.py +239 -0
isa_model/inference/services/base_service.py +36 -1
isa_model/inference/services/embedding/base_embed_service.py +112 -0
isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
isa_model/inference/services/embedding/openai_embed_service.py +223 -0
isa_model/inference/services/llm/__init__.py +2 -0
isa_model/inference/services/llm/base_llm_service.py +158 -86
isa_model/inference/services/llm/llm_adapter.py +414 -0
isa_model/inference/services/llm/ollama_llm_service.py +252 -63
isa_model/inference/services/llm/openai_llm_service.py +231 -93
isa_model/inference/services/llm/triton_llm_service.py +481 -0
isa_model/inference/services/ml/base_ml_service.py +78 -0
isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
isa_model/inference/services/vision/__init__.py +3 -3
isa_model/inference/services/vision/base_image_gen_service.py +161 -0
isa_model/inference/services/vision/base_vision_service.py +177 -0
isa_model/inference/services/vision/helpers/image_utils.py +4 -3
isa_model/inference/services/vision/ollama_vision_service.py +151 -17
isa_model/inference/services/vision/openai_vision_service.py +275 -41
isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
isa_model/training/__init__.py +62 -32
isa_model/training/cloud/__init__.py +22 -0
isa_model/training/cloud/job_orchestrator.py +402 -0
isa_model/training/cloud/runpod_trainer.py +454 -0
isa_model/training/cloud/storage_manager.py +482 -0
isa_model/training/core/__init__.py +23 -0
isa_model/training/core/config.py +181 -0
isa_model/training/core/dataset.py +222 -0
isa_model/training/core/trainer.py +720 -0
isa_model/training/core/utils.py +213 -0
isa_model/training/factory.py +229 -198
isa_model-0.3.1.dist-info/METADATA +465 -0
isa_model-0.3.1.dist-info/RECORD +91 -0
isa_model/core/model_router.py +0 -226
isa_model/core/model_version.py +0 -0
isa_model/core/resource_manager.py +0 -202
isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
isa_model/training/engine/llama_factory/__init__.py +0 -39
isa_model/training/engine/llama_factory/config.py +0 -115
isa_model/training/engine/llama_factory/data_adapter.py +0 -284
isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
isa_model/training/engine/llama_factory/factory.py +0 -331
isa_model/training/engine/llama_factory/rl.py +0 -254
isa_model/training/engine/llama_factory/trainer.py +0 -171
isa_model/training/image_model/configs/create_config.py +0 -37
isa_model/training/image_model/configs/create_flux_config.py +0 -26
isa_model/training/image_model/configs/create_lora_config.py +0 -21
isa_model/training/image_model/prepare_massed_compute.py +0 -97
isa_model/training/image_model/prepare_upload.py +0 -17
isa_model/training/image_model/raw_data/create_captions.py +0 -16
isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
isa_model/training/image_model/raw_data/pre_processing.py +0 -200
isa_model/training/image_model/train/train.py +0 -42
isa_model/training/image_model/train/train_flux.py +0 -41
isa_model/training/image_model/train/train_lora.py +0 -57
isa_model/training/image_model/train_main.py +0 -25
isa_model-0.0.2.dist-info/METADATA +0 -327
isa_model-0.0.2.dist-info/RECORD +0 -92
isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
/isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
{isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
{isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0

isa_model/inference/services/llm/openai_llm_service.py CHANGED Viewed

@@ -1,138 +1,276 @@
 import logging
 import os
-from typing import Dict, Any, List, Union, AsyncGenerator, Optional
+import json
+from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
-# 使用官方 OpenAI 库和 dotenv
+# 使用官方 OpenAI 库
 from openai import AsyncOpenAI
-from dotenv import load_dotenv
-from isa_model.inference.services.base_service import BaseLLMService
+from isa_model.inference.services.llm.base_llm_service import BaseLLMService
 from isa_model.inference.providers.base_provider import BaseProvider
-# 加载 .env.local 文件中的环境变量
-load_dotenv(dotenv_path='.env.local')
+from isa_model.inference.billing_tracker import ServiceType
 logger = logging.getLogger(__name__)
 class OpenAILLMService(BaseLLMService):
-    """OpenAI LLM service implementation"""
+    """OpenAI LLM service implementation with unified invoke interface"""
-    def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-3.5-turbo"):
+    def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
         super().__init__(provider, model_name)
-        # 从provider配置初始化 AsyncOpenAI 客户端
+        # Get full configuration from provider (including sensitive data)
+        provider_config = provider.get_full_config()
+        # Initialize AsyncOpenAI client with provider configuration
         try:
-            api_key = provider.config.get("api_key") or os.getenv("OPENAI_API_KEY")
-            base_url = provider.config.get("api_base") or os.getenv("OPENAI_API_BASE")
+            if not provider_config.get("api_key"):
+                raise ValueError("OpenAI API key not found in provider configuration")
             self.client = AsyncOpenAI(
-                api_key=api_key,
-                base_url=base_url
+                api_key=provider_config["api_key"],
+                base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
+                organization=provider_config.get("organization")
             )
-        except TypeError as e:
-            logger.error("初始化 OpenAI 客户端失败。请检查您的 .env.local 文件中是否正确设置了 OPENAI_API_KEY。")
-            raise ValueError("OPENAI_API_KEY 未设置。") from e
+            logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
+        except Exception as e:
+            logger.error(f"Failed to initialize OpenAI client: {e}")
+            raise ValueError(f"Failed to initialize OpenAI client. Check your API key configuration: {e}") from e
         self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
+        self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
+        # Tool binding attributes
+        self._bound_tools: List[Dict[str, Any]] = []
+        self._tool_binding_kwargs: Dict[str, Any] = {}
+        self._tool_functions: Dict[str, Callable] = {}
-    async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]) -> str:
-        """Universal invocation method"""
-        if isinstance(prompt, str):
-            return await self.acompletion(prompt)
-        elif isinstance(prompt, list):
-            return await self.achat(prompt)
-        else:
-            raise ValueError("Prompt must be a string or a list of messages")
+    def _create_bound_copy(self) -> 'OpenAILLMService':
+        """Create a copy of this service for tool binding"""
+        bound_service = OpenAILLMService(self.provider, self.model_name)
+        bound_service._bound_tools = self._bound_tools.copy()
+        bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
+        bound_service._tool_functions = self._tool_functions.copy()
+        return bound_service
+    def bind_tools(self, tools: List[Any], **kwargs) -> 'OpenAILLMService':
+        """
+        Bind tools to this LLM service for function calling
+        Args:
+            tools: List of tools (functions, dicts, or LangChain tools)
+            **kwargs: Additional arguments for tool binding
+        Returns:
+            New LLM service instance with tools bound
+        """
+        # Create a copy of this service
+        bound_service = self._create_bound_copy()
+        # Use the adapter manager to handle tools
+        bound_service._bound_tools = tools
+        return bound_service
-    async def achat(self, messages: List[Dict[str, str]]) -> str:
-        """Chat completion method"""
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
+        """Unified invoke method for all input types"""
         try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
+            # Use adapter manager to prepare messages
+            messages = self._prepare_messages(input_data)
-            response = await self.client.chat.completions.create(
-                model=self.model_name,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
+            # Prepare request kwargs
+            kwargs = {
+                "model": self.model_name,
+                "messages": messages,
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024)
+            }
-            if response.usage:
-                self.last_token_usage = {
-                    "prompt_tokens": response.usage.prompt_tokens,
-                    "completion_tokens": response.usage.completion_tokens,
-                    "total_tokens": response.usage.total_tokens
-                }
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                kwargs["tools"] = tool_schemas
+                kwargs["tool_choice"] = "auto"
-            return response.choices[0].message.content or ""
+            # Handle streaming vs non-streaming
+            if self.streaming:
+                # Streaming mode - collect all chunks
+                content_chunks = []
+                async for chunk in await self._stream_response(kwargs):
+                    content_chunks.append(chunk)
+                content = "".join(content_chunks)
+                # Create a mock usage object for tracking
+                class MockUsage:
+                    def __init__(self):
+                        self.prompt_tokens = len(str(messages)) // 4  # Rough estimate
+                        self.completion_tokens = len(content) // 4   # Rough estimate
+                        self.total_tokens = self.prompt_tokens + self.completion_tokens
+                usage = MockUsage()
+                self._update_token_usage(usage)
+                self._track_billing(usage)
+                return self._format_response(content, input_data)
+            else:
+                # Non-streaming mode
+                response = await self.client.chat.completions.create(**kwargs)
+                message = response.choices[0].message
+                # Update usage tracking
+                if response.usage:
+                    self._update_token_usage(response.usage)
+                    self._track_billing(response.usage)
+                # Handle tool calls if present
+                if message.tool_calls:
+                    final_content = await self._handle_tool_calls(message, messages)
+                    return self._format_response(final_content, input_data)
+                # Return appropriate format based on input type
+                return self._format_response(message.content or "", input_data)
         except Exception as e:
-            logger.error(f"Error in chat completion: {e}")
+            logger.error(f"Error in ainvoke: {e}")
             raise
-    async def acompletion(self, prompt: str) -> str:
-        """Text completion method (using chat API)"""
-        messages = [{"role": "user", "content": prompt}]
-        return await self.achat(messages)
+    def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
+        """使用适配器管理器转换消息格式（覆盖基类方法以保持兼容性）"""
+        return self.adapter_manager.convert_messages(input_data)
-    async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
-        """Generate multiple completions"""
-        try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
-            response = await self.client.chat.completions.create(
-                model=self.model_name,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                n=n
-            )
-            if response.usage:
-                self.last_token_usage = {
-                    "prompt_tokens": response.usage.prompt_tokens,
-                    "completion_tokens": response.usage.completion_tokens,
-                    "total_tokens": response.usage.total_tokens
-                }
-            return [choice.message.content or "" for choice in response.choices]
-        except Exception as e:
-            logger.error(f"Error in generate: {e}")
-            raise
+    def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
+        """使用适配器管理器格式化响应（覆盖基类方法以保持兼容性）"""
+        return self.adapter_manager.format_response(content, original_input)
-    async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
-        """Stream chat responses"""
+    async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
+        """Handle streaming responses"""
+        kwargs["stream"] = True
+        async def stream_generator():
+            try:
+                stream = await self.client.chat.completions.create(**kwargs)
+                async for chunk in stream:
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        yield content
+            except Exception as e:
+                logger.error(f"Error in streaming: {e}")
+                raise
+        return stream_generator()
+    async def _handle_tool_calls(self, assistant_message, original_messages: List[Dict[str, str]]) -> str:
+        """Handle tool calls from the assistant using adapter manager"""
+        # Add assistant message with tool calls to conversation
+        messages = original_messages + [{
+            "role": "assistant",
+            "content": assistant_message.content or "",
+            "tool_calls": [
+                {
+                    "id": tc.id,
+                    "type": tc.type,
+                    "function": {
+                        "name": tc.function.name,
+                        "arguments": tc.function.arguments
+                    }
+                } for tc in assistant_message.tool_calls
+            ]
+        }]
+        # Execute each tool call using adapter manager
+        for tool_call in assistant_message.tool_calls:
+            function_name = tool_call.function.name
+            arguments = json.loads(tool_call.function.arguments)
+            try:
+                # Use adapter manager to execute tool
+                result = await self._execute_tool_call(function_name, arguments)
+                # Add tool result to messages
+                messages.append({
+                    "role": "tool",
+                    "content": str(result),
+                    "tool_call_id": tool_call.id
+                })
+            except Exception as e:
+                logger.error(f"Error executing tool {function_name}: {e}")
+                messages.append({
+                    "role": "tool",
+                    "content": f"Error executing {function_name}: {str(e)}",
+                    "tool_call_id": tool_call.id
+                })
+        # Get final response from the model
         try:
-            temperature = self.config.get("temperature", 0.7)
-            max_tokens = self.config.get("max_tokens", 1024)
+            kwargs = {
+                "model": self.model_name,
+                "messages": messages,
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024)
+            }
-            stream = await self.client.chat.completions.create(
-                model=self.model_name,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                stream=True
-            )
+            response = await self.client.chat.completions.create(**kwargs)
+            return response.choices[0].message.content or ""
-            async for chunk in stream:
-                content = chunk.choices[0].delta.content
-                if content:
-                    yield content
         except Exception as e:
-            logger.error(f"Error in stream chat: {e}")
+            logger.error(f"Error getting final response after tool calls: {e}")
             raise
-    def get_token_usage(self) -> Dict[str, int]:
+    def _update_token_usage(self, usage):
+        """Update token usage statistics"""
+        self.last_token_usage = {
+            "prompt_tokens": usage.prompt_tokens,
+            "completion_tokens": usage.completion_tokens,
+            "total_tokens": usage.total_tokens
+        }
+        # Update total usage
+        self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+        self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+        self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+        self.total_token_usage["requests_count"] += 1
+    def _track_billing(self, usage):
+        """Track billing information"""
+        self._track_usage(
+            service_type=ServiceType.LLM,
+            operation="chat",
+            input_tokens=usage.prompt_tokens,
+            output_tokens=usage.completion_tokens,
+            metadata={
+                "temperature": self.config.get("temperature", 0.7),
+                "max_tokens": self.config.get("max_tokens", 1024)
+            }
+        )
+    def get_token_usage(self) -> Dict[str, Any]:
         """Get total token usage statistics"""
-        return self.last_token_usage
+        return self.total_token_usage
     def get_last_token_usage(self) -> Dict[str, int]:
         """Get token usage from last request"""
         return self.last_token_usage
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the current model"""
+        return {
+            "name": self.model_name,
+            "max_tokens": self.config.get("max_tokens", 1024),
+            "supports_streaming": True,
+            "supports_functions": True,
+            "provider": "openai"
+        }
+    def _has_bound_tools(self) -> bool:
+        """Check if this service has bound tools"""
+        return bool(self._bound_tools)
+    def _get_bound_tools(self) -> List[Dict[str, Any]]:
+        """Get the bound tools schema"""
+        return self._bound_tools
     async def close(self):
         """Close the backend client"""
-        await self.client.aclose()
+        await self.client.close()

isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

isa-model 0.0.2py3-none-any.whl → 0.3.1py3-none-any.whl