PyPI - isa-model - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

isa-model 0.2.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

isa_model/__init__.py +1 -1
isa_model/core/model_manager.py +69 -4
isa_model/core/storage/hf_storage.py +419 -0
isa_model/deployment/__init__.py +52 -0
isa_model/deployment/core/__init__.py +34 -0
isa_model/deployment/core/deployment_config.py +356 -0
isa_model/deployment/core/deployment_manager.py +549 -0
isa_model/deployment/core/isa_deployment_service.py +401 -0
isa_model/eval/factory.py +381 -140
isa_model/inference/ai_factory.py +427 -236
isa_model/inference/billing_tracker.py +406 -0
isa_model/inference/providers/base_provider.py +51 -4
isa_model/inference/providers/ml_provider.py +50 -0
isa_model/inference/providers/ollama_provider.py +37 -18
isa_model/inference/providers/openai_provider.py +65 -36
isa_model/inference/providers/replicate_provider.py +42 -30
isa_model/inference/services/audio/base_stt_service.py +21 -2
isa_model/inference/services/audio/openai_realtime_service.py +353 -0
isa_model/inference/services/audio/openai_stt_service.py +252 -0
isa_model/inference/services/audio/openai_tts_service.py +149 -9
isa_model/inference/services/audio/replicate_tts_service.py +239 -0
isa_model/inference/services/base_service.py +36 -1
isa_model/inference/services/embedding/base_embed_service.py +112 -0
isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
isa_model/inference/services/embedding/openai_embed_service.py +223 -0
isa_model/inference/services/llm/__init__.py +2 -0
isa_model/inference/services/llm/base_llm_service.py +158 -86
isa_model/inference/services/llm/llm_adapter.py +414 -0
isa_model/inference/services/llm/ollama_llm_service.py +252 -63
isa_model/inference/services/llm/openai_llm_service.py +231 -93
isa_model/inference/services/llm/triton_llm_service.py +481 -0
isa_model/inference/services/ml/base_ml_service.py +78 -0
isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
isa_model/inference/services/vision/__init__.py +3 -3
isa_model/inference/services/vision/base_image_gen_service.py +161 -0
isa_model/inference/services/vision/base_vision_service.py +177 -0
isa_model/inference/services/vision/helpers/image_utils.py +4 -3
isa_model/inference/services/vision/ollama_vision_service.py +151 -17
isa_model/inference/services/vision/openai_vision_service.py +275 -41
isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
isa_model/training/__init__.py +62 -32
isa_model/training/cloud/__init__.py +22 -0
isa_model/training/cloud/job_orchestrator.py +402 -0
isa_model/training/cloud/runpod_trainer.py +454 -0
isa_model/training/cloud/storage_manager.py +482 -0
isa_model/training/core/__init__.py +23 -0
isa_model/training/core/config.py +181 -0
isa_model/training/core/dataset.py +222 -0
isa_model/training/core/trainer.py +720 -0
isa_model/training/core/utils.py +213 -0
isa_model/training/factory.py +229 -198
isa_model-0.3.1.dist-info/METADATA +465 -0
isa_model-0.3.1.dist-info/RECORD +91 -0
isa_model/core/model_router.py +0 -226
isa_model/core/model_version.py +0 -0
isa_model/core/resource_manager.py +0 -202
isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
isa_model/training/engine/llama_factory/__init__.py +0 -39
isa_model/training/engine/llama_factory/config.py +0 -115
isa_model/training/engine/llama_factory/data_adapter.py +0 -284
isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
isa_model/training/engine/llama_factory/factory.py +0 -331
isa_model/training/engine/llama_factory/rl.py +0 -254
isa_model/training/engine/llama_factory/trainer.py +0 -171
isa_model/training/image_model/configs/create_config.py +0 -37
isa_model/training/image_model/configs/create_flux_config.py +0 -26
isa_model/training/image_model/configs/create_lora_config.py +0 -21
isa_model/training/image_model/prepare_massed_compute.py +0 -97
isa_model/training/image_model/prepare_upload.py +0 -17
isa_model/training/image_model/raw_data/create_captions.py +0 -16
isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
isa_model/training/image_model/raw_data/pre_processing.py +0 -200
isa_model/training/image_model/train/train.py +0 -42
isa_model/training/image_model/train/train_flux.py +0 -41
isa_model/training/image_model/train/train_lora.py +0 -57
isa_model/training/image_model/train_main.py +0 -25
isa_model-0.2.0.dist-info/METADATA +0 -327
isa_model-0.2.0.dist-info/RECORD +0 -92
isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
/isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
{isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
{isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0

isa_model/inference/services/llm/ollama_llm_service.py CHANGED Viewed

@@ -1,99 +1,288 @@
 import logging
-from typing import Dict, Any, List, Union, AsyncGenerator, Optional
-from isa_model.inference.services.base_service import BaseLLMService
+import httpx
+import json
+from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
+from isa_model.inference.services.llm.base_llm_service import BaseLLMService
 from isa_model.inference.providers.base_provider import BaseProvider
 logger = logging.getLogger(__name__)
 class OllamaLLMService(BaseLLMService):
-    """Ollama LLM service using backend client"""
+    """Ollama LLM service with unified invoke interface"""
-    def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.1"):
+    def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.2:3b-instruct-fp16"):
         super().__init__(provider, model_name)
+        # Create HTTP client for Ollama API
+        base_url = self.config.get("base_url", "http://localhost:11434")
+        timeout = self.config.get("timeout", 60)
+        self.client = httpx.AsyncClient(
+            base_url=base_url,
+            timeout=timeout
+        )
         self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        logger.info(f"Initialized OllamaLLMService with model {model_name}")
+        self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
+        # Tool binding attributes
+        self._bound_tools: List[Any] = []
+        self._tool_binding_kwargs: Dict[str, Any] = {}
+        self._tool_functions: Dict[str, Callable] = {}
+        logger.info(f"Initialized OllamaLLMService with model {model_name} at {base_url}")
-    async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]):
-        """Universal invocation method"""
-        if isinstance(prompt, str):
-            return await self.acompletion(prompt)
-        elif isinstance(prompt, list):
-            return await self.achat(prompt)
-        else:
-            raise ValueError("Prompt must be string or list of messages")
+    def _ensure_client(self):
+        """Ensure the HTTP client is available and not closed"""
+        if not hasattr(self, 'client') or not self.client or self.client.is_closed:
+            base_url = self.config.get("base_url", "http://localhost:11434")
+            timeout = self.config.get("timeout", 60)
+            self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
+    def _create_bound_copy(self) -> 'OllamaLLMService':
+        """Create a copy of this service for tool binding"""
+        bound_service = OllamaLLMService(self.provider, self.model_name)
+        bound_service._bound_tools = self._bound_tools.copy()
+        bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
+        bound_service._tool_functions = self._tool_functions.copy()
+        return bound_service
-    async def achat(self, messages: List[Dict[str, str]]):
-        """Chat completion method"""
+    def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OllamaLLMService':
+        """Bind tools to this LLM service for function calling"""
+        bound_service = self._create_bound_copy()
+        # 使用基类的适配器管理器方法
+        bound_service._bound_tools = tools
+        bound_service._tool_binding_kwargs = kwargs
+        return bound_service
+    async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
+        """
+        Universal async invocation method that handles different input types
+        Args:
+            input_data: Can be:
+                - str: Simple text prompt
+                - list: Message history like [{"role": "user", "content": "hello"}]
+                - Any: LangChain message objects or other formats
+        Returns:
+            Model response (string for simple cases, object for complex cases)
+        """
         try:
+            # Ensure client is available
+            self._ensure_client()
+            # Convert input to messages format
+            messages = self._prepare_messages(input_data)
+            # Prepare request parameters
             payload = {
                 "model": self.model_name,
                 "messages": messages,
-                "stream": False
+                "stream": self.streaming,
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
             }
-            response = await self.backend.post("/api/chat", payload)
-            # Update token usage if available
-            if "eval_count" in response:
-                self.last_token_usage = {
-                    "prompt_tokens": response.get("prompt_eval_count", 0),
-                    "completion_tokens": response.get("eval_count", 0),
-                    "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
-                }
+            # Add tools if bound using adapter manager
+            tool_schemas = await self._prepare_tools_for_request()
+            if tool_schemas:
+                payload["tools"] = tool_schemas
-            return response["message"]["content"]
+            # Handle streaming
+            if self.streaming:
+                return self._stream_response(payload)
-        except Exception as e:
-            logger.error(f"Error in chat completion: {e}")
-            raise
-    async def acompletion(self, prompt: str):
-        """Text completion method"""
-        try:
-            payload = {
-                "model": self.model_name,
-                "prompt": prompt,
-                "stream": False
-            }
-            response = await self.backend.post("/api/generate", payload)
+            # Regular request
+            response = await self.client.post("/api/chat", json=payload)
+            response.raise_for_status()
+            result = response.json()
             # Update token usage if available
-            if "eval_count" in response:
-                self.last_token_usage = {
-                    "prompt_tokens": response.get("prompt_eval_count", 0),
-                    "completion_tokens": response.get("eval_count", 0),
-                    "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
-                }
+            if "eval_count" in result:
+                self._update_token_usage(result)
+            # Handle tool calls if present
+            message = result["message"]
+            if "tool_calls" in message and message["tool_calls"]:
+                return await self._handle_tool_calls(message, messages)
-            return response["response"]
+            # Return appropriate format based on input type
+            return self._format_response(message["content"], input_data)
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in ainvoke: {e}")
+            raise
         except Exception as e:
-            logger.error(f"Error in text completion: {e}")
+            logger.error(f"Error in ainvoke: {e}")
             raise
-    async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
-        """Generate multiple completions"""
-        results = []
-        for _ in range(n):
-            result = await self.achat(messages)
-            results.append(result)
-        return results
+    def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
+        """Convert various input formats to Ollama messages format (same as OpenAI)"""
+        if isinstance(input_data, str):
+            # Simple string prompt
+            return [{"role": "user", "content": input_data}]
+        elif isinstance(input_data, list):
+            if not input_data:
+                raise ValueError("Empty message list provided")
+            # Check if it's LangChain messages or standard messages
+            first_msg = input_data[0]
+            if hasattr(first_msg, 'content') and hasattr(first_msg, 'type'):
+                # LangChain message objects - use base class method
+                return self._convert_langchain_to_openai(input_data)
+            elif isinstance(first_msg, dict):
+                # Standard message dictionaries
+                return input_data
+            else:
+                # List of strings or other formats
+                messages = []
+                for i, msg in enumerate(input_data):
+                    if isinstance(msg, str):
+                        role = "user" if i % 2 == 0 else "assistant"
+                        messages.append({"role": role, "content": msg})
+                    elif isinstance(msg, dict):
+                        messages.append(msg)
+                    else:
+                        messages.append({"role": "user", "content": str(msg)})
+                return messages
+        else:
+            # Handle single LangChain message objects or other objects
+            if hasattr(input_data, 'content') and hasattr(input_data, 'type'):
+                return self._convert_langchain_to_openai([input_data])
+            else:
+                return [{"role": "user", "content": str(input_data)}]
-    async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
-        """Stream chat responses"""
-        # Note: This would require modifying the backend to support streaming
-        # For now, return the full response
-        response = await self.achat(messages)
-        yield response
+    def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
+        """Format response based on original input type"""
+        # For LangGraph compatibility, return AIMessage object if needed
+        if hasattr(original_input, 'type') or (isinstance(original_input, list) and
+                                               original_input and hasattr(original_input[0], 'type')):
+            try:
+                from langchain_core.messages import AIMessage
+                return AIMessage(content=content)
+            except ImportError:
+                # Fallback to simple object
+                class SimpleAIMessage:
+                    def __init__(self, content):
+                        self.content = content
+                        self.type = "ai"
+                return SimpleAIMessage(content)
+        # Default to string
+        return content
-    def get_token_usage(self):
+    async def _stream_response(self, payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
+        """Handle streaming responses"""
+        async def stream_generator():
+            try:
+                async with self.client.stream("POST", "/api/chat", json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if line.strip():
+                            try:
+                                chunk = json.loads(line)
+                                if "message" in chunk and "content" in chunk["message"]:
+                                    content = chunk["message"]["content"]
+                                    if content:
+                                        yield content
+                            except json.JSONDecodeError:
+                                continue
+            except Exception as e:
+                logger.error(f"Error in streaming: {e}")
+                raise
+        return stream_generator()
+    async def _handle_tool_calls(self, assistant_message: Dict[str, Any], original_messages: List[Dict[str, str]]) -> str:
+        """Handle tool calls from the assistant using adapter manager"""
+        tool_calls = assistant_message.get("tool_calls", [])
+        # Add assistant message with tool calls to conversation
+        messages = original_messages + [assistant_message]
+        # Execute each tool call using adapter manager
+        for tool_call in tool_calls:
+            function_name = tool_call["function"]["name"]
+            try:
+                # Parse arguments if they're a string
+                arguments = tool_call["function"]["arguments"]
+                if isinstance(arguments, str):
+                    arguments = json.loads(arguments)
+                # Use adapter manager to execute tool
+                result = await self._execute_tool_call(function_name, arguments)
+                # Add tool result to messages
+                messages.append({
+                    "role": "tool",
+                    "content": str(result),
+                    "tool_call_id": tool_call.get("id", function_name)
+                })
+            except Exception as e:
+                logger.error(f"Error executing tool {function_name}: {e}")
+                messages.append({
+                    "role": "tool",
+                    "content": f"Error executing {function_name}: {str(e)}",
+                    "tool_call_id": tool_call.get("id", function_name)
+                })
+        # Get final response from the model
+        return await self.ainvoke(messages)
+    def _update_token_usage(self, result: Dict[str, Any]):
+        """Update token usage statistics"""
+        self.last_token_usage = {
+            "prompt_tokens": result.get("prompt_eval_count", 0),
+            "completion_tokens": result.get("eval_count", 0),
+            "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
+        }
+        # Update total usage
+        self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+        self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+        self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+        self.total_token_usage["requests_count"] += 1
+    def get_token_usage(self) -> Dict[str, Any]:
         """Get total token usage statistics"""
-        return self.last_token_usage
+        return self.total_token_usage
     def get_last_token_usage(self) -> Dict[str, int]:
         """Get token usage from last request"""
         return self.last_token_usage
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the current model"""
+        return {
+            "name": self.model_name,
+            "max_tokens": self.config.get("max_tokens", 2048),
+            "supports_streaming": True,
+            "supports_functions": True,
+            "provider": "ollama"
+        }
+    def _has_bound_tools(self) -> bool:
+        """Check if this service has bound tools"""
+        return bool(self._bound_tools)
+    def _get_bound_tools(self) -> List[Any]:
+        """Get the bound tools schema"""
+        return self._bound_tools
     async def close(self):
-        """Close the backend client"""
-        await self.backend.close()
+        """Close the HTTP client"""
+        if hasattr(self, 'client') and self.client:
+            try:
+                if not self.client.is_closed:
+                    await self.client.aclose()
+            except Exception as e:
+                logger.warning(f"Error closing Ollama client: {e}")

isa-model 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

isa-model 0.2.0py3-none-any.whl → 0.3.1py3-none-any.whl