PyPI - isa-model - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

isa-model 0.2.0py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

isa_model/__init__.py +1 -1
isa_model/core/storage/hf_storage.py +419 -0
isa_model/deployment/__init__.py +52 -0
isa_model/deployment/core/__init__.py +34 -0
isa_model/deployment/core/deployment_config.py +356 -0
isa_model/deployment/core/deployment_manager.py +549 -0
isa_model/deployment/core/isa_deployment_service.py +401 -0
isa_model/eval/factory.py +381 -140
isa_model/inference/ai_factory.py +142 -240
isa_model/inference/providers/ml_provider.py +50 -0
isa_model/inference/services/audio/openai_tts_service.py +104 -3
isa_model/inference/services/embedding/base_embed_service.py +112 -0
isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
isa_model/inference/services/llm/__init__.py +2 -0
isa_model/inference/services/llm/base_llm_service.py +111 -1
isa_model/inference/services/llm/ollama_llm_service.py +234 -26
isa_model/inference/services/llm/openai_llm_service.py +243 -28
isa_model/inference/services/llm/triton_llm_service.py +481 -0
isa_model/inference/services/ml/base_ml_service.py +78 -0
isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
isa_model/inference/services/vision/__init__.py +3 -3
isa_model/inference/services/vision/base_image_gen_service.py +161 -0
isa_model/inference/services/vision/base_vision_service.py +177 -0
isa_model/inference/services/vision/ollama_vision_service.py +143 -17
isa_model/inference/services/vision/replicate_image_gen_service.py +139 -7
isa_model/training/__init__.py +62 -32
isa_model/training/cloud/__init__.py +22 -0
isa_model/training/cloud/job_orchestrator.py +402 -0
isa_model/training/cloud/runpod_trainer.py +454 -0
isa_model/training/cloud/storage_manager.py +482 -0
isa_model/training/core/__init__.py +23 -0
isa_model/training/core/config.py +181 -0
isa_model/training/core/dataset.py +222 -0
isa_model/training/core/trainer.py +720 -0
isa_model/training/core/utils.py +213 -0
isa_model/training/factory.py +229 -198
isa_model-0.2.9.dist-info/METADATA +465 -0
isa_model-0.2.9.dist-info/RECORD +86 -0
isa_model/core/model_router.py +0 -226
isa_model/core/model_version.py +0 -0
isa_model/core/resource_manager.py +0 -202
isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
isa_model/training/engine/llama_factory/__init__.py +0 -39
isa_model/training/engine/llama_factory/config.py +0 -115
isa_model/training/engine/llama_factory/data_adapter.py +0 -284
isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
isa_model/training/engine/llama_factory/factory.py +0 -331
isa_model/training/engine/llama_factory/rl.py +0 -254
isa_model/training/engine/llama_factory/trainer.py +0 -171
isa_model/training/image_model/configs/create_config.py +0 -37
isa_model/training/image_model/configs/create_flux_config.py +0 -26
isa_model/training/image_model/configs/create_lora_config.py +0 -21
isa_model/training/image_model/prepare_massed_compute.py +0 -97
isa_model/training/image_model/prepare_upload.py +0 -17
isa_model/training/image_model/raw_data/create_captions.py +0 -16
isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
isa_model/training/image_model/raw_data/pre_processing.py +0 -200
isa_model/training/image_model/train/train.py +0 -42
isa_model/training/image_model/train/train_flux.py +0 -41
isa_model/training/image_model/train/train_lora.py +0 -57
isa_model/training/image_model/train_main.py +0 -25
isa_model-0.2.0.dist-info/METADATA +0 -327
isa_model-0.2.0.dist-info/RECORD +0 -92
isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
/isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
/isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
{isa_model-0.2.0.dist-info → isa_model-0.2.9.dist-info}/WHEEL +0 -0
{isa_model-0.2.0.dist-info → isa_model-0.2.9.dist-info}/top_level.txt +0 -0

isa_model/inference/services/embedding/base_embed_service.py ADDED Viewed

@@ -0,0 +1,112 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List, Union, Optional
+from isa_model.inference.services.base_service import BaseService
+class BaseEmbedService(BaseService):
+    """Base class for embedding services"""
+    @abstractmethod
+    async def create_text_embedding(self, text: str) -> List[float]:
+        """
+        Create embedding for single text
+        Args:
+            text: Input text to embed
+        Returns:
+            List of float values representing the embedding vector
+        """
+        pass
+    @abstractmethod
+    async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Create embeddings for multiple texts
+        Args:
+            texts: List of input texts to embed
+        Returns:
+            List of embedding vectors, one for each input text
+        """
+        pass
+    @abstractmethod
+    async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
+        """
+        Create text chunks with embeddings
+        Args:
+            text: Input text to chunk and embed
+            metadata: Optional metadata to attach to chunks
+        Returns:
+            List of dictionaries containing:
+            - text: The chunk text
+            - embedding: The embedding vector
+            - metadata: Associated metadata
+            - start_index: Start position in original text
+            - end_index: End position in original text
+        """
+        pass
+    @abstractmethod
+    async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
+        """
+        Compute similarity between two embeddings
+        Args:
+            embedding1: First embedding vector
+            embedding2: Second embedding vector
+        Returns:
+            Similarity score (typically cosine similarity, range -1 to 1)
+        """
+        pass
+    @abstractmethod
+    async def find_similar_texts(
+        self,
+        query_embedding: List[float],
+        candidate_embeddings: List[List[float]],
+        top_k: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Find most similar texts based on embeddings
+        Args:
+            query_embedding: Query embedding vector
+            candidate_embeddings: List of candidate embedding vectors
+            top_k: Number of top similar results to return
+        Returns:
+            List of dictionaries containing:
+            - index: Index in candidate_embeddings
+            - similarity: Similarity score
+        """
+        pass
+    @abstractmethod
+    def get_embedding_dimension(self) -> int:
+        """
+        Get the dimension of embeddings produced by this service
+        Returns:
+            Integer dimension of embedding vectors
+        """
+        pass
+    @abstractmethod
+    def get_max_input_length(self) -> int:
+        """
+        Get maximum input text length supported
+        Returns:
+            Maximum number of characters/tokens supported
+        """
+        pass
+    @abstractmethod
+    async def close(self):
+        """Cleanup resources"""
+        pass

isa_model/inference/services/embedding/ollama_embed_service.py CHANGED Viewed

@@ -4,12 +4,12 @@ import asyncio
 from typing import List, Dict, Any, Optional
 # 保留您指定的导入和框架结构
-from isa_model.inference.services.base_service import BaseEmbeddingService
+from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
 from isa_model.inference.providers.base_provider import BaseProvider
 logger = logging.getLogger(__name__)
-class OllamaEmbedService(BaseEmbeddingService):
+class OllamaEmbedService(BaseEmbedService):
     """
     Ollama embedding service.
     此类遵循基础服务架构，但使用其自己的 HTTP 客户端与 Ollama API 通信，
@@ -91,6 +91,32 @@ class OllamaEmbedService(BaseEmbeddingService):
         return dot_product / (norm1 * norm2)
+    async def find_similar_texts(
+        self,
+        query_embedding: List[float],
+        candidate_embeddings: List[List[float]],
+        top_k: int = 5
+    ) -> List[Dict[str, Any]]:
+        """Find most similar texts based on embeddings"""
+        similarities = []
+        for i, candidate in enumerate(candidate_embeddings):
+            similarity = await self.compute_similarity(query_embedding, candidate)
+            similarities.append({"index": i, "similarity": similarity})
+        # Sort by similarity in descending order and return top_k
+        similarities.sort(key=lambda x: x["similarity"], reverse=True)
+        return similarities[:top_k]
+    def get_embedding_dimension(self) -> int:
+        """Get the dimension of embeddings produced by this service"""
+        # BGE-M3 produces 1024-dimensional embeddings
+        return 1024
+    def get_max_input_length(self) -> int:
+        """Get maximum input text length supported"""
+        # BGE-M3 supports up to 8192 tokens
+        return 8192
     async def close(self):
         """关闭内置的 HTTP 客户端"""
         await self.client.aclose()

isa_model/inference/services/llm/__init__.py CHANGED Viewed

@@ -5,8 +5,10 @@ LLM Services - Business logic services for Language Models
 # Import LLM services here when created
 from .ollama_llm_service import OllamaLLMService
 from .openai_llm_service import OpenAILLMService
+from .triton_llm_service import TritonLLMService
 __all__ = [
     "OllamaLLMService",
     "OpenAILLMService",
+    "TritonLLMService"
 ]

isa_model/inference/services/llm/base_llm_service.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, Any, List, Union, Optional, AsyncGenerator, TypeVar
+from typing import Dict, Any, List, Union, Optional, AsyncGenerator, TypeVar, Callable
 from isa_model.inference.services.base_service import BaseService
 T = TypeVar('T')  # Generic type for responses
@@ -87,6 +87,116 @@ class BaseLLMService(BaseService):
         """
         pass
+    def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'BaseLLMService':
+        """
+        Bind tools to this LLM service for function calling (LangChain interface)
+        Args:
+            tools: List of tools to bind. Can be:
+                  - Dictionary with tool schema
+                  - Callable functions (will be converted to schema)
+            **kwargs: Additional tool binding parameters
+        Returns:
+            A new instance of the service with tools bound
+        Example:
+            def get_weather(location: str) -> str:
+                '''Get weather for a location'''
+                return f"Weather in {location}: Sunny, 25°C"
+            llm_with_tools = llm.bind_tools([get_weather])
+            response = await llm_with_tools.ainvoke("What's the weather in Paris?")
+        """
+        # Create a copy of the current service
+        bound_service = self._create_bound_copy()
+        bound_service._bound_tools = self._convert_tools_to_schema(tools)
+        bound_service._tool_binding_kwargs = kwargs
+        return bound_service
+    def _create_bound_copy(self) -> 'BaseLLMService':
+        """Create a copy of this service for tool binding"""
+        # Default implementation - subclasses should override if needed
+        bound_service = self.__class__(self.provider, self.model_name)
+        bound_service.config = self.config.copy()
+        return bound_service
+    def _convert_tools_to_schema(self, tools: List[Union[Dict[str, Any], Callable]]) -> List[Dict[str, Any]]:
+        """Convert tools to OpenAI function calling schema"""
+        schemas = []
+        for tool in tools:
+            if callable(tool):
+                schema = self._function_to_schema(tool)
+            elif isinstance(tool, dict):
+                schema = tool
+            else:
+                raise ValueError(f"Tool must be callable or dict, got {type(tool)}")
+            schemas.append(schema)
+        return schemas
+    def _function_to_schema(self, func: Callable) -> Dict[str, Any]:
+        """Convert a Python function to OpenAI function schema"""
+        import inspect
+        import json
+        from typing import get_type_hints
+        sig = inspect.signature(func)
+        type_hints = get_type_hints(func)
+        properties = {}
+        required = []
+        for param_name, param in sig.parameters.items():
+            param_type = type_hints.get(param_name, str)
+            # Convert Python types to JSON schema types
+            if param_type == str:
+                prop_type = "string"
+            elif param_type == int:
+                prop_type = "integer"
+            elif param_type == float:
+                prop_type = "number"
+            elif param_type == bool:
+                prop_type = "boolean"
+            elif param_type == list:
+                prop_type = "array"
+            elif param_type == dict:
+                prop_type = "object"
+            else:
+                prop_type = "string"  # Default fallback
+            properties[param_name] = {"type": prop_type}
+            # Add parameter to required if it has no default value
+            if param.default == inspect.Parameter.empty:
+                required.append(param_name)
+        return {
+            "type": "function",
+            "function": {
+                "name": func.__name__,
+                "description": func.__doc__ or f"Function {func.__name__}",
+                "parameters": {
+                    "type": "object",
+                    "properties": properties,
+                    "required": required
+                }
+            }
+        }
+    def _has_bound_tools(self) -> bool:
+        """Check if this service has bound tools"""
+        return hasattr(self, '_bound_tools') and self._bound_tools
+    def _get_bound_tools(self) -> List[Dict[str, Any]]:
+        """Get the bound tools schema"""
+        return getattr(self, '_bound_tools', [])
+    def _execute_tool_call(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a tool call by name with arguments"""
+        # This is a placeholder - subclasses should implement actual tool execution
+        raise NotImplementedError("Tool execution not implemented in base class")
     @abstractmethod
     def get_token_usage(self) -> Dict[str, Any]:
         """

isa_model/inference/services/llm/ollama_llm_service.py CHANGED Viewed

@@ -1,18 +1,57 @@
 import logging
-from typing import Dict, Any, List, Union, AsyncGenerator, Optional
-from isa_model.inference.services.base_service import BaseLLMService
+import httpx
+import json
+from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
+from isa_model.inference.services.llm.base_llm_service import BaseLLMService
 from isa_model.inference.providers.base_provider import BaseProvider
 logger = logging.getLogger(__name__)
 class OllamaLLMService(BaseLLMService):
-    """Ollama LLM service using backend client"""
+    """Ollama LLM service using HTTP client"""
     def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.1"):
         super().__init__(provider, model_name)
+        # Create HTTP client for Ollama API
+        base_url = self.config.get("base_url", "http://localhost:11434")
+        timeout = self.config.get("timeout", 60)
+        self.client = httpx.AsyncClient(
+            base_url=base_url,
+            timeout=timeout
+        )
         self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-        logger.info(f"Initialized OllamaLLMService with model {model_name}")
+        self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
+        # Tool binding attributes
+        self._bound_tools: List[Dict[str, Any]] = []
+        self._tool_binding_kwargs: Dict[str, Any] = {}
+        self._tool_functions: Dict[str, Callable] = {}
+        logger.info(f"Initialized OllamaLLMService with model {model_name} at {base_url}")
+    def _create_bound_copy(self) -> 'OllamaLLMService':
+        """Create a copy of this service for tool binding"""
+        bound_service = OllamaLLMService(self.provider, self.model_name)
+        bound_service._bound_tools = self._bound_tools.copy()
+        bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
+        bound_service._tool_functions = self._tool_functions.copy()
+        return bound_service
+    def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OllamaLLMService':
+        """Bind tools to this LLM service for function calling"""
+        bound_service = self._create_bound_copy()
+        bound_service._bound_tools = self._convert_tools_to_schema(tools)
+        bound_service._tool_binding_kwargs = kwargs
+        # Store the actual functions for execution
+        for tool in tools:
+            if callable(tool):
+                bound_service._tool_functions[tool.__name__] = tool
+        return bound_service
     async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]):
         """Universal invocation method"""
@@ -29,44 +68,130 @@ class OllamaLLMService(BaseLLMService):
             payload = {
                 "model": self.model_name,
                 "messages": messages,
-                "stream": False
+                "stream": False,
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
             }
-            response = await self.backend.post("/api/chat", payload)
+            # Add tools if bound
+            if self._has_bound_tools():
+                payload["tools"] = self._get_bound_tools()
+            response = await self.client.post("/api/chat", json=payload)
+            response.raise_for_status()
+            result = response.json()
             # Update token usage if available
-            if "eval_count" in response:
+            if "eval_count" in result:
                 self.last_token_usage = {
-                    "prompt_tokens": response.get("prompt_eval_count", 0),
-                    "completion_tokens": response.get("eval_count", 0),
-                    "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
+                    "prompt_tokens": result.get("prompt_eval_count", 0),
+                    "completion_tokens": result.get("eval_count", 0),
+                    "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
                 }
+                # Update total usage
+                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+                self.total_token_usage["requests_count"] += 1
+            # Handle tool calls if present
+            message = result["message"]
+            if "tool_calls" in message and message["tool_calls"]:
+                return await self._handle_tool_calls(message, messages)
-            return response["message"]["content"]
+            return message["content"]
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in chat completion: {e}")
+            raise
         except Exception as e:
             logger.error(f"Error in chat completion: {e}")
             raise
+    async def _handle_tool_calls(self, assistant_message: Dict[str, Any], original_messages: List[Dict[str, str]]) -> str:
+        """Handle tool calls from the assistant"""
+        tool_calls = assistant_message.get("tool_calls", [])
+        # Add assistant message with tool calls to conversation
+        messages = original_messages + [assistant_message]
+        # Execute each tool call
+        for tool_call in tool_calls:
+            function_name = tool_call["function"]["name"]
+            arguments = tool_call["function"]["arguments"]
+            try:
+                # Parse arguments if they're a string
+                if isinstance(arguments, str):
+                    arguments = json.loads(arguments)
+                # Execute the tool
+                if function_name in self._tool_functions:
+                    result = self._tool_functions[function_name](**arguments)
+                    if hasattr(result, '__await__'):  # Handle async functions
+                        result = await result
+                else:
+                    result = f"Error: Function {function_name} not found"
+                # Add tool result to messages
+                messages.append({
+                    "role": "tool",
+                    "content": str(result),
+                    "tool_call_id": tool_call.get("id", function_name)
+                })
+            except Exception as e:
+                logger.error(f"Error executing tool {function_name}: {e}")
+                messages.append({
+                    "role": "tool",
+                    "content": f"Error executing {function_name}: {str(e)}",
+                    "tool_call_id": tool_call.get("id", function_name)
+                })
+        # Get final response from the model
+        return await self.achat(messages)
     async def acompletion(self, prompt: str):
         """Text completion method"""
         try:
             payload = {
                 "model": self.model_name,
                 "prompt": prompt,
-                "stream": False
+                "stream": False,
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
             }
-            response = await self.backend.post("/api/generate", payload)
+            response = await self.client.post("/api/generate", json=payload)
+            response.raise_for_status()
+            result = response.json()
             # Update token usage if available
-            if "eval_count" in response:
+            if "eval_count" in result:
                 self.last_token_usage = {
-                    "prompt_tokens": response.get("prompt_eval_count", 0),
-                    "completion_tokens": response.get("eval_count", 0),
-                    "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
+                    "prompt_tokens": result.get("prompt_eval_count", 0),
+                    "completion_tokens": result.get("eval_count", 0),
+                    "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
                 }
+                # Update total usage
+                self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
+                self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
+                self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
+                self.total_token_usage["requests_count"] += 1
-            return response["response"]
+            return result["response"]
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in text completion: {e}")
+            raise
         except Exception as e:
             logger.error(f"Error in text completion: {e}")
             raise
@@ -81,19 +206,102 @@ class OllamaLLMService(BaseLLMService):
     async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
         """Stream chat responses"""
-        # Note: This would require modifying the backend to support streaming
-        # For now, return the full response
-        response = await self.achat(messages)
-        yield response
+        try:
+            payload = {
+                "model": self.model_name,
+                "messages": messages,
+                "stream": True,
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
+            }
+            # Add tools if bound
+            if self._has_bound_tools():
+                payload["tools"] = self._get_bound_tools()
+            async with self.client.stream("POST", "/api/chat", json=payload) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    if line.strip():
+                        try:
+                            chunk = json.loads(line)
+                            if "message" in chunk and "content" in chunk["message"]:
+                                content = chunk["message"]["content"]
+                                if content:
+                                    yield content
+                        except json.JSONDecodeError:
+                            continue
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in stream chat: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"Error in stream chat: {e}")
+            raise
-    def get_token_usage(self):
+    async def astream_completion(self, prompt: str) -> AsyncGenerator[str, None]:
+        """Stream completion responses"""
+        try:
+            payload = {
+                "model": self.model_name,
+                "prompt": prompt,
+                "stream": True,
+                "options": {
+                    "temperature": self.config.get("temperature", 0.7),
+                    "top_p": self.config.get("top_p", 0.9),
+                    "num_predict": self.config.get("max_tokens", 2048)
+                }
+            }
+            async with self.client.stream("POST", "/api/generate", json=payload) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    if line.strip():
+                        try:
+                            chunk = json.loads(line)
+                            if "response" in chunk:
+                                content = chunk["response"]
+                                if content:
+                                    yield content
+                        except json.JSONDecodeError:
+                            continue
+        except httpx.RequestError as e:
+            logger.error(f"HTTP request error in stream completion: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"Error in stream completion: {e}")
+            raise
+    def get_token_usage(self) -> Dict[str, Any]:
         """Get total token usage statistics"""
-        return self.last_token_usage
+        return self.total_token_usage
     def get_last_token_usage(self) -> Dict[str, int]:
         """Get token usage from last request"""
         return self.last_token_usage
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the current model"""
+        return {
+            "name": self.model_name,
+            "max_tokens": self.config.get("max_tokens", 2048),
+            "supports_streaming": True,
+            "supports_functions": True,
+            "provider": "ollama"
+        }
+    def _has_bound_tools(self) -> bool:
+        """Check if this service has bound tools"""
+        return bool(self._bound_tools)
+    def _get_bound_tools(self) -> List[Dict[str, Any]]:
+        """Get the bound tools schema"""
+        return self._bound_tools
     async def close(self):
-        """Close the backend client"""
-        await self.backend.close()
+        """Close the HTTP client"""
+        await self.client.aclose()

isa-model 0.2.0__py3-none-any.whl → 0.2.9__py3-none-any.whl

isa-model 0.2.0py3-none-any.whl → 0.2.9py3-none-any.whl