isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/model_manager.py +69 -4
- isa_model/core/model_registry.py +273 -46
- isa_model/core/storage/hf_storage.py +419 -0
- isa_model/deployment/__init__.py +52 -0
- isa_model/deployment/core/__init__.py +34 -0
- isa_model/deployment/core/deployment_config.py +356 -0
- isa_model/deployment/core/deployment_manager.py +549 -0
- isa_model/deployment/core/isa_deployment_service.py +401 -0
- isa_model/eval/factory.py +381 -140
- isa_model/inference/ai_factory.py +427 -236
- isa_model/inference/billing_tracker.py +406 -0
- isa_model/inference/providers/base_provider.py +51 -4
- isa_model/inference/providers/ml_provider.py +50 -0
- isa_model/inference/providers/ollama_provider.py +37 -18
- isa_model/inference/providers/openai_provider.py +65 -36
- isa_model/inference/providers/replicate_provider.py +42 -30
- isa_model/inference/services/audio/base_stt_service.py +21 -2
- isa_model/inference/services/audio/openai_realtime_service.py +353 -0
- isa_model/inference/services/audio/openai_stt_service.py +252 -0
- isa_model/inference/services/audio/openai_tts_service.py +149 -9
- isa_model/inference/services/audio/replicate_tts_service.py +239 -0
- isa_model/inference/services/base_service.py +36 -1
- isa_model/inference/services/embedding/base_embed_service.py +112 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
- isa_model/inference/services/embedding/openai_embed_service.py +223 -0
- isa_model/inference/services/llm/__init__.py +2 -0
- isa_model/inference/services/llm/base_llm_service.py +158 -86
- isa_model/inference/services/llm/llm_adapter.py +414 -0
- isa_model/inference/services/llm/ollama_llm_service.py +252 -63
- isa_model/inference/services/llm/openai_llm_service.py +231 -93
- isa_model/inference/services/llm/triton_llm_service.py +481 -0
- isa_model/inference/services/ml/base_ml_service.py +78 -0
- isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
- isa_model/inference/services/vision/__init__.py +3 -3
- isa_model/inference/services/vision/base_image_gen_service.py +161 -0
- isa_model/inference/services/vision/base_vision_service.py +177 -0
- isa_model/inference/services/vision/helpers/image_utils.py +4 -3
- isa_model/inference/services/vision/ollama_vision_service.py +151 -17
- isa_model/inference/services/vision/openai_vision_service.py +275 -41
- isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
- isa_model/training/__init__.py +62 -32
- isa_model/training/cloud/__init__.py +22 -0
- isa_model/training/cloud/job_orchestrator.py +402 -0
- isa_model/training/cloud/runpod_trainer.py +454 -0
- isa_model/training/cloud/storage_manager.py +482 -0
- isa_model/training/core/__init__.py +23 -0
- isa_model/training/core/config.py +181 -0
- isa_model/training/core/dataset.py +222 -0
- isa_model/training/core/trainer.py +720 -0
- isa_model/training/core/utils.py +213 -0
- isa_model/training/factory.py +229 -198
- isa_model-0.3.1.dist-info/METADATA +465 -0
- isa_model-0.3.1.dist-info/RECORD +91 -0
- isa_model/core/model_router.py +0 -226
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +0 -202
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
- isa_model/training/engine/llama_factory/__init__.py +0 -39
- isa_model/training/engine/llama_factory/config.py +0 -115
- isa_model/training/engine/llama_factory/data_adapter.py +0 -284
- isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
- isa_model/training/engine/llama_factory/factory.py +0 -331
- isa_model/training/engine/llama_factory/rl.py +0 -254
- isa_model/training/engine/llama_factory/trainer.py +0 -171
- isa_model/training/image_model/configs/create_config.py +0 -37
- isa_model/training/image_model/configs/create_flux_config.py +0 -26
- isa_model/training/image_model/configs/create_lora_config.py +0 -21
- isa_model/training/image_model/prepare_massed_compute.py +0 -97
- isa_model/training/image_model/prepare_upload.py +0 -17
- isa_model/training/image_model/raw_data/create_captions.py +0 -16
- isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
- isa_model/training/image_model/raw_data/pre_processing.py +0 -200
- isa_model/training/image_model/train/train.py +0 -42
- isa_model/training/image_model/train/train_flux.py +0 -41
- isa_model/training/image_model/train/train_lora.py +0 -57
- isa_model/training/image_model/train_main.py +0 -25
- isa_model-0.0.2.dist-info/METADATA +0 -327
- isa_model-0.0.2.dist-info/RECORD +0 -92
- isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
- /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,99 +1,288 @@
|
|
1
1
|
import logging
|
2
|
-
|
3
|
-
|
2
|
+
import httpx
|
3
|
+
import json
|
4
|
+
from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
|
5
|
+
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
4
6
|
from isa_model.inference.providers.base_provider import BaseProvider
|
5
7
|
|
6
8
|
logger = logging.getLogger(__name__)
|
7
9
|
|
8
10
|
class OllamaLLMService(BaseLLMService):
|
9
|
-
"""Ollama LLM service
|
11
|
+
"""Ollama LLM service with unified invoke interface"""
|
10
12
|
|
11
|
-
def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.
|
13
|
+
def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.2:3b-instruct-fp16"):
|
12
14
|
super().__init__(provider, model_name)
|
15
|
+
|
16
|
+
# Create HTTP client for Ollama API
|
17
|
+
base_url = self.config.get("base_url", "http://localhost:11434")
|
18
|
+
timeout = self.config.get("timeout", 60)
|
19
|
+
|
20
|
+
self.client = httpx.AsyncClient(
|
21
|
+
base_url=base_url,
|
22
|
+
timeout=timeout
|
23
|
+
)
|
13
24
|
|
14
25
|
self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
15
|
-
|
26
|
+
self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
|
27
|
+
|
28
|
+
# Tool binding attributes
|
29
|
+
self._bound_tools: List[Any] = []
|
30
|
+
self._tool_binding_kwargs: Dict[str, Any] = {}
|
31
|
+
self._tool_functions: Dict[str, Callable] = {}
|
32
|
+
|
33
|
+
logger.info(f"Initialized OllamaLLMService with model {model_name} at {base_url}")
|
16
34
|
|
17
|
-
|
18
|
-
"""
|
19
|
-
if
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
35
|
+
def _ensure_client(self):
|
36
|
+
"""Ensure the HTTP client is available and not closed"""
|
37
|
+
if not hasattr(self, 'client') or not self.client or self.client.is_closed:
|
38
|
+
base_url = self.config.get("base_url", "http://localhost:11434")
|
39
|
+
timeout = self.config.get("timeout", 60)
|
40
|
+
self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
|
41
|
+
|
42
|
+
def _create_bound_copy(self) -> 'OllamaLLMService':
|
43
|
+
"""Create a copy of this service for tool binding"""
|
44
|
+
bound_service = OllamaLLMService(self.provider, self.model_name)
|
45
|
+
bound_service._bound_tools = self._bound_tools.copy()
|
46
|
+
bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
|
47
|
+
bound_service._tool_functions = self._tool_functions.copy()
|
48
|
+
return bound_service
|
25
49
|
|
26
|
-
|
27
|
-
"""
|
50
|
+
def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OllamaLLMService':
|
51
|
+
"""Bind tools to this LLM service for function calling"""
|
52
|
+
bound_service = self._create_bound_copy()
|
53
|
+
# 使用基类的适配器管理器方法
|
54
|
+
bound_service._bound_tools = tools
|
55
|
+
bound_service._tool_binding_kwargs = kwargs
|
56
|
+
|
57
|
+
return bound_service
|
58
|
+
|
59
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
60
|
+
"""
|
61
|
+
Universal async invocation method that handles different input types
|
62
|
+
|
63
|
+
Args:
|
64
|
+
input_data: Can be:
|
65
|
+
- str: Simple text prompt
|
66
|
+
- list: Message history like [{"role": "user", "content": "hello"}]
|
67
|
+
- Any: LangChain message objects or other formats
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
Model response (string for simple cases, object for complex cases)
|
71
|
+
"""
|
28
72
|
try:
|
73
|
+
# Ensure client is available
|
74
|
+
self._ensure_client()
|
75
|
+
|
76
|
+
# Convert input to messages format
|
77
|
+
messages = self._prepare_messages(input_data)
|
78
|
+
|
79
|
+
# Prepare request parameters
|
29
80
|
payload = {
|
30
81
|
"model": self.model_name,
|
31
82
|
"messages": messages,
|
32
|
-
"stream":
|
83
|
+
"stream": self.streaming,
|
84
|
+
"options": {
|
85
|
+
"temperature": self.config.get("temperature", 0.7),
|
86
|
+
"top_p": self.config.get("top_p", 0.9),
|
87
|
+
"num_predict": self.config.get("max_tokens", 2048)
|
88
|
+
}
|
33
89
|
}
|
34
|
-
response = await self.backend.post("/api/chat", payload)
|
35
90
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
"completion_tokens": response.get("eval_count", 0),
|
41
|
-
"total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
|
42
|
-
}
|
91
|
+
# Add tools if bound using adapter manager
|
92
|
+
tool_schemas = await self._prepare_tools_for_request()
|
93
|
+
if tool_schemas:
|
94
|
+
payload["tools"] = tool_schemas
|
43
95
|
|
44
|
-
|
96
|
+
# Handle streaming
|
97
|
+
if self.streaming:
|
98
|
+
return self._stream_response(payload)
|
45
99
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
async def acompletion(self, prompt: str):
|
51
|
-
"""Text completion method"""
|
52
|
-
try:
|
53
|
-
payload = {
|
54
|
-
"model": self.model_name,
|
55
|
-
"prompt": prompt,
|
56
|
-
"stream": False
|
57
|
-
}
|
58
|
-
response = await self.backend.post("/api/generate", payload)
|
100
|
+
# Regular request
|
101
|
+
response = await self.client.post("/api/chat", json=payload)
|
102
|
+
response.raise_for_status()
|
103
|
+
result = response.json()
|
59
104
|
|
60
105
|
# Update token usage if available
|
61
|
-
if "eval_count" in
|
62
|
-
self.
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
106
|
+
if "eval_count" in result:
|
107
|
+
self._update_token_usage(result)
|
108
|
+
|
109
|
+
# Handle tool calls if present
|
110
|
+
message = result["message"]
|
111
|
+
if "tool_calls" in message and message["tool_calls"]:
|
112
|
+
return await self._handle_tool_calls(message, messages)
|
67
113
|
|
68
|
-
|
114
|
+
# Return appropriate format based on input type
|
115
|
+
return self._format_response(message["content"], input_data)
|
69
116
|
|
117
|
+
except httpx.RequestError as e:
|
118
|
+
logger.error(f"HTTP request error in ainvoke: {e}")
|
119
|
+
raise
|
70
120
|
except Exception as e:
|
71
|
-
logger.error(f"Error in
|
121
|
+
logger.error(f"Error in ainvoke: {e}")
|
72
122
|
raise
|
73
123
|
|
74
|
-
|
75
|
-
"""
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
124
|
+
def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
|
125
|
+
"""Convert various input formats to Ollama messages format (same as OpenAI)"""
|
126
|
+
if isinstance(input_data, str):
|
127
|
+
# Simple string prompt
|
128
|
+
return [{"role": "user", "content": input_data}]
|
129
|
+
|
130
|
+
elif isinstance(input_data, list):
|
131
|
+
if not input_data:
|
132
|
+
raise ValueError("Empty message list provided")
|
133
|
+
|
134
|
+
# Check if it's LangChain messages or standard messages
|
135
|
+
first_msg = input_data[0]
|
136
|
+
if hasattr(first_msg, 'content') and hasattr(first_msg, 'type'):
|
137
|
+
# LangChain message objects - use base class method
|
138
|
+
return self._convert_langchain_to_openai(input_data)
|
139
|
+
elif isinstance(first_msg, dict):
|
140
|
+
# Standard message dictionaries
|
141
|
+
return input_data
|
142
|
+
else:
|
143
|
+
# List of strings or other formats
|
144
|
+
messages = []
|
145
|
+
for i, msg in enumerate(input_data):
|
146
|
+
if isinstance(msg, str):
|
147
|
+
role = "user" if i % 2 == 0 else "assistant"
|
148
|
+
messages.append({"role": role, "content": msg})
|
149
|
+
elif isinstance(msg, dict):
|
150
|
+
messages.append(msg)
|
151
|
+
else:
|
152
|
+
messages.append({"role": "user", "content": str(msg)})
|
153
|
+
return messages
|
154
|
+
|
155
|
+
else:
|
156
|
+
# Handle single LangChain message objects or other objects
|
157
|
+
if hasattr(input_data, 'content') and hasattr(input_data, 'type'):
|
158
|
+
return self._convert_langchain_to_openai([input_data])
|
159
|
+
else:
|
160
|
+
return [{"role": "user", "content": str(input_data)}]
|
81
161
|
|
82
|
-
|
83
|
-
"""
|
84
|
-
#
|
85
|
-
|
86
|
-
|
87
|
-
|
162
|
+
def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
|
163
|
+
"""Format response based on original input type"""
|
164
|
+
# For LangGraph compatibility, return AIMessage object if needed
|
165
|
+
if hasattr(original_input, 'type') or (isinstance(original_input, list) and
|
166
|
+
original_input and hasattr(original_input[0], 'type')):
|
167
|
+
try:
|
168
|
+
from langchain_core.messages import AIMessage
|
169
|
+
return AIMessage(content=content)
|
170
|
+
except ImportError:
|
171
|
+
# Fallback to simple object
|
172
|
+
class SimpleAIMessage:
|
173
|
+
def __init__(self, content):
|
174
|
+
self.content = content
|
175
|
+
self.type = "ai"
|
176
|
+
return SimpleAIMessage(content)
|
177
|
+
|
178
|
+
# Default to string
|
179
|
+
return content
|
88
180
|
|
89
|
-
def
|
181
|
+
async def _stream_response(self, payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
|
182
|
+
"""Handle streaming responses"""
|
183
|
+
async def stream_generator():
|
184
|
+
try:
|
185
|
+
async with self.client.stream("POST", "/api/chat", json=payload) as response:
|
186
|
+
response.raise_for_status()
|
187
|
+
async for line in response.aiter_lines():
|
188
|
+
if line.strip():
|
189
|
+
try:
|
190
|
+
chunk = json.loads(line)
|
191
|
+
if "message" in chunk and "content" in chunk["message"]:
|
192
|
+
content = chunk["message"]["content"]
|
193
|
+
if content:
|
194
|
+
yield content
|
195
|
+
except json.JSONDecodeError:
|
196
|
+
continue
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(f"Error in streaming: {e}")
|
199
|
+
raise
|
200
|
+
|
201
|
+
return stream_generator()
|
202
|
+
|
203
|
+
async def _handle_tool_calls(self, assistant_message: Dict[str, Any], original_messages: List[Dict[str, str]]) -> str:
|
204
|
+
"""Handle tool calls from the assistant using adapter manager"""
|
205
|
+
tool_calls = assistant_message.get("tool_calls", [])
|
206
|
+
|
207
|
+
# Add assistant message with tool calls to conversation
|
208
|
+
messages = original_messages + [assistant_message]
|
209
|
+
|
210
|
+
# Execute each tool call using adapter manager
|
211
|
+
for tool_call in tool_calls:
|
212
|
+
function_name = tool_call["function"]["name"]
|
213
|
+
|
214
|
+
try:
|
215
|
+
# Parse arguments if they're a string
|
216
|
+
arguments = tool_call["function"]["arguments"]
|
217
|
+
if isinstance(arguments, str):
|
218
|
+
arguments = json.loads(arguments)
|
219
|
+
|
220
|
+
# Use adapter manager to execute tool
|
221
|
+
result = await self._execute_tool_call(function_name, arguments)
|
222
|
+
|
223
|
+
# Add tool result to messages
|
224
|
+
messages.append({
|
225
|
+
"role": "tool",
|
226
|
+
"content": str(result),
|
227
|
+
"tool_call_id": tool_call.get("id", function_name)
|
228
|
+
})
|
229
|
+
|
230
|
+
except Exception as e:
|
231
|
+
logger.error(f"Error executing tool {function_name}: {e}")
|
232
|
+
messages.append({
|
233
|
+
"role": "tool",
|
234
|
+
"content": f"Error executing {function_name}: {str(e)}",
|
235
|
+
"tool_call_id": tool_call.get("id", function_name)
|
236
|
+
})
|
237
|
+
|
238
|
+
# Get final response from the model
|
239
|
+
return await self.ainvoke(messages)
|
240
|
+
|
241
|
+
def _update_token_usage(self, result: Dict[str, Any]):
|
242
|
+
"""Update token usage statistics"""
|
243
|
+
self.last_token_usage = {
|
244
|
+
"prompt_tokens": result.get("prompt_eval_count", 0),
|
245
|
+
"completion_tokens": result.get("eval_count", 0),
|
246
|
+
"total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
|
247
|
+
}
|
248
|
+
|
249
|
+
# Update total usage
|
250
|
+
self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
|
251
|
+
self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
|
252
|
+
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
253
|
+
self.total_token_usage["requests_count"] += 1
|
254
|
+
|
255
|
+
def get_token_usage(self) -> Dict[str, Any]:
|
90
256
|
"""Get total token usage statistics"""
|
91
|
-
return self.
|
257
|
+
return self.total_token_usage
|
92
258
|
|
93
259
|
def get_last_token_usage(self) -> Dict[str, int]:
|
94
260
|
"""Get token usage from last request"""
|
95
261
|
return self.last_token_usage
|
262
|
+
|
263
|
+
def get_model_info(self) -> Dict[str, Any]:
|
264
|
+
"""Get information about the current model"""
|
265
|
+
return {
|
266
|
+
"name": self.model_name,
|
267
|
+
"max_tokens": self.config.get("max_tokens", 2048),
|
268
|
+
"supports_streaming": True,
|
269
|
+
"supports_functions": True,
|
270
|
+
"provider": "ollama"
|
271
|
+
}
|
272
|
+
|
273
|
+
def _has_bound_tools(self) -> bool:
|
274
|
+
"""Check if this service has bound tools"""
|
275
|
+
return bool(self._bound_tools)
|
276
|
+
|
277
|
+
def _get_bound_tools(self) -> List[Any]:
|
278
|
+
"""Get the bound tools schema"""
|
279
|
+
return self._bound_tools
|
96
280
|
|
97
281
|
async def close(self):
|
98
|
-
"""Close the
|
99
|
-
|
282
|
+
"""Close the HTTP client"""
|
283
|
+
if hasattr(self, 'client') and self.client:
|
284
|
+
try:
|
285
|
+
if not self.client.is_closed:
|
286
|
+
await self.client.aclose()
|
287
|
+
except Exception as e:
|
288
|
+
logger.warning(f"Error closing Ollama client: {e}")
|