isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/core/model_manager.py +69 -4
- isa_model/core/model_registry.py +273 -46
- isa_model/core/storage/hf_storage.py +419 -0
- isa_model/deployment/__init__.py +52 -0
- isa_model/deployment/core/__init__.py +34 -0
- isa_model/deployment/core/deployment_config.py +356 -0
- isa_model/deployment/core/deployment_manager.py +549 -0
- isa_model/deployment/core/isa_deployment_service.py +401 -0
- isa_model/eval/factory.py +381 -140
- isa_model/inference/ai_factory.py +427 -236
- isa_model/inference/billing_tracker.py +406 -0
- isa_model/inference/providers/base_provider.py +51 -4
- isa_model/inference/providers/ml_provider.py +50 -0
- isa_model/inference/providers/ollama_provider.py +37 -18
- isa_model/inference/providers/openai_provider.py +65 -36
- isa_model/inference/providers/replicate_provider.py +42 -30
- isa_model/inference/services/audio/base_stt_service.py +21 -2
- isa_model/inference/services/audio/openai_realtime_service.py +353 -0
- isa_model/inference/services/audio/openai_stt_service.py +252 -0
- isa_model/inference/services/audio/openai_tts_service.py +149 -9
- isa_model/inference/services/audio/replicate_tts_service.py +239 -0
- isa_model/inference/services/base_service.py +36 -1
- isa_model/inference/services/embedding/base_embed_service.py +112 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
- isa_model/inference/services/embedding/openai_embed_service.py +223 -0
- isa_model/inference/services/llm/__init__.py +2 -0
- isa_model/inference/services/llm/base_llm_service.py +158 -86
- isa_model/inference/services/llm/llm_adapter.py +414 -0
- isa_model/inference/services/llm/ollama_llm_service.py +252 -63
- isa_model/inference/services/llm/openai_llm_service.py +231 -93
- isa_model/inference/services/llm/triton_llm_service.py +481 -0
- isa_model/inference/services/ml/base_ml_service.py +78 -0
- isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
- isa_model/inference/services/vision/__init__.py +3 -3
- isa_model/inference/services/vision/base_image_gen_service.py +161 -0
- isa_model/inference/services/vision/base_vision_service.py +177 -0
- isa_model/inference/services/vision/helpers/image_utils.py +4 -3
- isa_model/inference/services/vision/ollama_vision_service.py +151 -17
- isa_model/inference/services/vision/openai_vision_service.py +275 -41
- isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
- isa_model/training/__init__.py +62 -32
- isa_model/training/cloud/__init__.py +22 -0
- isa_model/training/cloud/job_orchestrator.py +402 -0
- isa_model/training/cloud/runpod_trainer.py +454 -0
- isa_model/training/cloud/storage_manager.py +482 -0
- isa_model/training/core/__init__.py +23 -0
- isa_model/training/core/config.py +181 -0
- isa_model/training/core/dataset.py +222 -0
- isa_model/training/core/trainer.py +720 -0
- isa_model/training/core/utils.py +213 -0
- isa_model/training/factory.py +229 -198
- isa_model-0.3.1.dist-info/METADATA +465 -0
- isa_model-0.3.1.dist-info/RECORD +91 -0
- isa_model/core/model_router.py +0 -226
- isa_model/core/model_version.py +0 -0
- isa_model/core/resource_manager.py +0 -202
- isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
- isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
- isa_model/training/engine/llama_factory/__init__.py +0 -39
- isa_model/training/engine/llama_factory/config.py +0 -115
- isa_model/training/engine/llama_factory/data_adapter.py +0 -284
- isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
- isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
- isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
- isa_model/training/engine/llama_factory/factory.py +0 -331
- isa_model/training/engine/llama_factory/rl.py +0 -254
- isa_model/training/engine/llama_factory/trainer.py +0 -171
- isa_model/training/image_model/configs/create_config.py +0 -37
- isa_model/training/image_model/configs/create_flux_config.py +0 -26
- isa_model/training/image_model/configs/create_lora_config.py +0 -21
- isa_model/training/image_model/prepare_massed_compute.py +0 -97
- isa_model/training/image_model/prepare_upload.py +0 -17
- isa_model/training/image_model/raw_data/create_captions.py +0 -16
- isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
- isa_model/training/image_model/raw_data/pre_processing.py +0 -200
- isa_model/training/image_model/train/train.py +0 -42
- isa_model/training/image_model/train/train_flux.py +0 -41
- isa_model/training/image_model/train/train_lora.py +0 -57
- isa_model/training/image_model/train_main.py +0 -25
- isa_model-0.0.2.dist-info/METADATA +0 -327
- isa_model-0.0.2.dist-info/RECORD +0 -92
- isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
- /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
- /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
- {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,138 +1,276 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
-
|
3
|
+
import json
|
4
|
+
from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
|
4
5
|
|
5
|
-
# 使用官方 OpenAI
|
6
|
+
# 使用官方 OpenAI 库
|
6
7
|
from openai import AsyncOpenAI
|
7
|
-
from dotenv import load_dotenv
|
8
8
|
|
9
|
-
from isa_model.inference.services.
|
9
|
+
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
10
10
|
from isa_model.inference.providers.base_provider import BaseProvider
|
11
|
-
|
12
|
-
# 加载 .env.local 文件中的环境变量
|
13
|
-
load_dotenv(dotenv_path='.env.local')
|
11
|
+
from isa_model.inference.billing_tracker import ServiceType
|
14
12
|
|
15
13
|
logger = logging.getLogger(__name__)
|
16
14
|
|
17
15
|
class OpenAILLMService(BaseLLMService):
|
18
|
-
"""OpenAI LLM service implementation"""
|
16
|
+
"""OpenAI LLM service implementation with unified invoke interface"""
|
19
17
|
|
20
|
-
def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-
|
18
|
+
def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
|
21
19
|
super().__init__(provider, model_name)
|
22
20
|
|
23
|
-
#
|
21
|
+
# Get full configuration from provider (including sensitive data)
|
22
|
+
provider_config = provider.get_full_config()
|
23
|
+
|
24
|
+
# Initialize AsyncOpenAI client with provider configuration
|
24
25
|
try:
|
25
|
-
|
26
|
-
|
26
|
+
if not provider_config.get("api_key"):
|
27
|
+
raise ValueError("OpenAI API key not found in provider configuration")
|
27
28
|
|
28
29
|
self.client = AsyncOpenAI(
|
29
|
-
api_key=api_key,
|
30
|
-
base_url=base_url
|
30
|
+
api_key=provider_config["api_key"],
|
31
|
+
base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
|
32
|
+
organization=provider_config.get("organization")
|
31
33
|
)
|
32
|
-
|
33
|
-
logger.
|
34
|
-
|
34
|
+
|
35
|
+
logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
|
36
|
+
|
37
|
+
except Exception as e:
|
38
|
+
logger.error(f"Failed to initialize OpenAI client: {e}")
|
39
|
+
raise ValueError(f"Failed to initialize OpenAI client. Check your API key configuration: {e}") from e
|
35
40
|
|
36
41
|
self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
37
|
-
|
42
|
+
self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
|
43
|
+
|
44
|
+
# Tool binding attributes
|
45
|
+
self._bound_tools: List[Dict[str, Any]] = []
|
46
|
+
self._tool_binding_kwargs: Dict[str, Any] = {}
|
47
|
+
self._tool_functions: Dict[str, Callable] = {}
|
38
48
|
|
39
|
-
|
40
|
-
"""
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
49
|
+
def _create_bound_copy(self) -> 'OpenAILLMService':
|
50
|
+
"""Create a copy of this service for tool binding"""
|
51
|
+
bound_service = OpenAILLMService(self.provider, self.model_name)
|
52
|
+
bound_service._bound_tools = self._bound_tools.copy()
|
53
|
+
bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
|
54
|
+
bound_service._tool_functions = self._tool_functions.copy()
|
55
|
+
return bound_service
|
56
|
+
|
57
|
+
def bind_tools(self, tools: List[Any], **kwargs) -> 'OpenAILLMService':
|
58
|
+
"""
|
59
|
+
Bind tools to this LLM service for function calling
|
60
|
+
|
61
|
+
Args:
|
62
|
+
tools: List of tools (functions, dicts, or LangChain tools)
|
63
|
+
**kwargs: Additional arguments for tool binding
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
New LLM service instance with tools bound
|
67
|
+
"""
|
68
|
+
# Create a copy of this service
|
69
|
+
bound_service = self._create_bound_copy()
|
70
|
+
|
71
|
+
# Use the adapter manager to handle tools
|
72
|
+
bound_service._bound_tools = tools
|
73
|
+
|
74
|
+
return bound_service
|
47
75
|
|
48
|
-
async def
|
49
|
-
"""
|
76
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
77
|
+
"""Unified invoke method for all input types"""
|
50
78
|
try:
|
51
|
-
|
52
|
-
|
79
|
+
# Use adapter manager to prepare messages
|
80
|
+
messages = self._prepare_messages(input_data)
|
53
81
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
82
|
+
# Prepare request kwargs
|
83
|
+
kwargs = {
|
84
|
+
"model": self.model_name,
|
85
|
+
"messages": messages,
|
86
|
+
"temperature": self.config.get("temperature", 0.7),
|
87
|
+
"max_tokens": self.config.get("max_tokens", 1024)
|
88
|
+
}
|
60
89
|
|
61
|
-
if
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
}
|
90
|
+
# Add tools if bound using adapter manager
|
91
|
+
tool_schemas = await self._prepare_tools_for_request()
|
92
|
+
if tool_schemas:
|
93
|
+
kwargs["tools"] = tool_schemas
|
94
|
+
kwargs["tool_choice"] = "auto"
|
67
95
|
|
68
|
-
|
96
|
+
# Handle streaming vs non-streaming
|
97
|
+
if self.streaming:
|
98
|
+
# Streaming mode - collect all chunks
|
99
|
+
content_chunks = []
|
100
|
+
async for chunk in await self._stream_response(kwargs):
|
101
|
+
content_chunks.append(chunk)
|
102
|
+
content = "".join(content_chunks)
|
103
|
+
|
104
|
+
# Create a mock usage object for tracking
|
105
|
+
class MockUsage:
|
106
|
+
def __init__(self):
|
107
|
+
self.prompt_tokens = len(str(messages)) // 4 # Rough estimate
|
108
|
+
self.completion_tokens = len(content) // 4 # Rough estimate
|
109
|
+
self.total_tokens = self.prompt_tokens + self.completion_tokens
|
110
|
+
|
111
|
+
usage = MockUsage()
|
112
|
+
self._update_token_usage(usage)
|
113
|
+
self._track_billing(usage)
|
114
|
+
|
115
|
+
return self._format_response(content, input_data)
|
116
|
+
else:
|
117
|
+
# Non-streaming mode
|
118
|
+
response = await self.client.chat.completions.create(**kwargs)
|
119
|
+
message = response.choices[0].message
|
120
|
+
|
121
|
+
# Update usage tracking
|
122
|
+
if response.usage:
|
123
|
+
self._update_token_usage(response.usage)
|
124
|
+
self._track_billing(response.usage)
|
125
|
+
|
126
|
+
# Handle tool calls if present
|
127
|
+
if message.tool_calls:
|
128
|
+
final_content = await self._handle_tool_calls(message, messages)
|
129
|
+
return self._format_response(final_content, input_data)
|
130
|
+
|
131
|
+
# Return appropriate format based on input type
|
132
|
+
return self._format_response(message.content or "", input_data)
|
69
133
|
|
70
134
|
except Exception as e:
|
71
|
-
logger.error(f"Error in
|
135
|
+
logger.error(f"Error in ainvoke: {e}")
|
72
136
|
raise
|
73
137
|
|
74
|
-
|
75
|
-
"""
|
76
|
-
|
77
|
-
return await self.achat(messages)
|
138
|
+
def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
|
139
|
+
"""使用适配器管理器转换消息格式(覆盖基类方法以保持兼容性)"""
|
140
|
+
return self.adapter_manager.convert_messages(input_data)
|
78
141
|
|
79
|
-
|
80
|
-
"""
|
81
|
-
|
82
|
-
temperature = self.config.get("temperature", 0.7)
|
83
|
-
max_tokens = self.config.get("max_tokens", 1024)
|
84
|
-
|
85
|
-
response = await self.client.chat.completions.create(
|
86
|
-
model=self.model_name,
|
87
|
-
messages=messages,
|
88
|
-
temperature=temperature,
|
89
|
-
max_tokens=max_tokens,
|
90
|
-
n=n
|
91
|
-
)
|
92
|
-
|
93
|
-
if response.usage:
|
94
|
-
self.last_token_usage = {
|
95
|
-
"prompt_tokens": response.usage.prompt_tokens,
|
96
|
-
"completion_tokens": response.usage.completion_tokens,
|
97
|
-
"total_tokens": response.usage.total_tokens
|
98
|
-
}
|
99
|
-
|
100
|
-
return [choice.message.content or "" for choice in response.choices]
|
101
|
-
except Exception as e:
|
102
|
-
logger.error(f"Error in generate: {e}")
|
103
|
-
raise
|
142
|
+
def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
|
143
|
+
"""使用适配器管理器格式化响应(覆盖基类方法以保持兼容性)"""
|
144
|
+
return self.adapter_manager.format_response(content, original_input)
|
104
145
|
|
105
|
-
async def
|
106
|
-
"""
|
146
|
+
async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
|
147
|
+
"""Handle streaming responses"""
|
148
|
+
kwargs["stream"] = True
|
149
|
+
|
150
|
+
async def stream_generator():
|
151
|
+
try:
|
152
|
+
stream = await self.client.chat.completions.create(**kwargs)
|
153
|
+
async for chunk in stream:
|
154
|
+
content = chunk.choices[0].delta.content
|
155
|
+
if content:
|
156
|
+
yield content
|
157
|
+
except Exception as e:
|
158
|
+
logger.error(f"Error in streaming: {e}")
|
159
|
+
raise
|
160
|
+
|
161
|
+
return stream_generator()
|
162
|
+
|
163
|
+
async def _handle_tool_calls(self, assistant_message, original_messages: List[Dict[str, str]]) -> str:
|
164
|
+
"""Handle tool calls from the assistant using adapter manager"""
|
165
|
+
# Add assistant message with tool calls to conversation
|
166
|
+
messages = original_messages + [{
|
167
|
+
"role": "assistant",
|
168
|
+
"content": assistant_message.content or "",
|
169
|
+
"tool_calls": [
|
170
|
+
{
|
171
|
+
"id": tc.id,
|
172
|
+
"type": tc.type,
|
173
|
+
"function": {
|
174
|
+
"name": tc.function.name,
|
175
|
+
"arguments": tc.function.arguments
|
176
|
+
}
|
177
|
+
} for tc in assistant_message.tool_calls
|
178
|
+
]
|
179
|
+
}]
|
180
|
+
|
181
|
+
# Execute each tool call using adapter manager
|
182
|
+
for tool_call in assistant_message.tool_calls:
|
183
|
+
function_name = tool_call.function.name
|
184
|
+
arguments = json.loads(tool_call.function.arguments)
|
185
|
+
|
186
|
+
try:
|
187
|
+
# Use adapter manager to execute tool
|
188
|
+
result = await self._execute_tool_call(function_name, arguments)
|
189
|
+
|
190
|
+
# Add tool result to messages
|
191
|
+
messages.append({
|
192
|
+
"role": "tool",
|
193
|
+
"content": str(result),
|
194
|
+
"tool_call_id": tool_call.id
|
195
|
+
})
|
196
|
+
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(f"Error executing tool {function_name}: {e}")
|
199
|
+
messages.append({
|
200
|
+
"role": "tool",
|
201
|
+
"content": f"Error executing {function_name}: {str(e)}",
|
202
|
+
"tool_call_id": tool_call.id
|
203
|
+
})
|
204
|
+
|
205
|
+
# Get final response from the model
|
107
206
|
try:
|
108
|
-
|
109
|
-
|
207
|
+
kwargs = {
|
208
|
+
"model": self.model_name,
|
209
|
+
"messages": messages,
|
210
|
+
"temperature": self.config.get("temperature", 0.7),
|
211
|
+
"max_tokens": self.config.get("max_tokens", 1024)
|
212
|
+
}
|
110
213
|
|
111
|
-
|
112
|
-
|
113
|
-
messages=messages,
|
114
|
-
temperature=temperature,
|
115
|
-
max_tokens=max_tokens,
|
116
|
-
stream=True
|
117
|
-
)
|
214
|
+
response = await self.client.chat.completions.create(**kwargs)
|
215
|
+
return response.choices[0].message.content or ""
|
118
216
|
|
119
|
-
async for chunk in stream:
|
120
|
-
content = chunk.choices[0].delta.content
|
121
|
-
if content:
|
122
|
-
yield content
|
123
|
-
|
124
217
|
except Exception as e:
|
125
|
-
logger.error(f"Error
|
218
|
+
logger.error(f"Error getting final response after tool calls: {e}")
|
126
219
|
raise
|
127
220
|
|
128
|
-
def
|
221
|
+
def _update_token_usage(self, usage):
|
222
|
+
"""Update token usage statistics"""
|
223
|
+
self.last_token_usage = {
|
224
|
+
"prompt_tokens": usage.prompt_tokens,
|
225
|
+
"completion_tokens": usage.completion_tokens,
|
226
|
+
"total_tokens": usage.total_tokens
|
227
|
+
}
|
228
|
+
|
229
|
+
# Update total usage
|
230
|
+
self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
|
231
|
+
self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
|
232
|
+
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
233
|
+
self.total_token_usage["requests_count"] += 1
|
234
|
+
|
235
|
+
def _track_billing(self, usage):
|
236
|
+
"""Track billing information"""
|
237
|
+
self._track_usage(
|
238
|
+
service_type=ServiceType.LLM,
|
239
|
+
operation="chat",
|
240
|
+
input_tokens=usage.prompt_tokens,
|
241
|
+
output_tokens=usage.completion_tokens,
|
242
|
+
metadata={
|
243
|
+
"temperature": self.config.get("temperature", 0.7),
|
244
|
+
"max_tokens": self.config.get("max_tokens", 1024)
|
245
|
+
}
|
246
|
+
)
|
247
|
+
|
248
|
+
def get_token_usage(self) -> Dict[str, Any]:
|
129
249
|
"""Get total token usage statistics"""
|
130
|
-
return self.
|
250
|
+
return self.total_token_usage
|
131
251
|
|
132
252
|
def get_last_token_usage(self) -> Dict[str, int]:
|
133
253
|
"""Get token usage from last request"""
|
134
254
|
return self.last_token_usage
|
255
|
+
|
256
|
+
def get_model_info(self) -> Dict[str, Any]:
|
257
|
+
"""Get information about the current model"""
|
258
|
+
return {
|
259
|
+
"name": self.model_name,
|
260
|
+
"max_tokens": self.config.get("max_tokens", 1024),
|
261
|
+
"supports_streaming": True,
|
262
|
+
"supports_functions": True,
|
263
|
+
"provider": "openai"
|
264
|
+
}
|
265
|
+
|
266
|
+
def _has_bound_tools(self) -> bool:
|
267
|
+
"""Check if this service has bound tools"""
|
268
|
+
return bool(self._bound_tools)
|
269
|
+
|
270
|
+
def _get_bound_tools(self) -> List[Dict[str, Any]]:
|
271
|
+
"""Get the bound tools schema"""
|
272
|
+
return self._bound_tools
|
135
273
|
|
136
274
|
async def close(self):
|
137
275
|
"""Close the backend client"""
|
138
|
-
await self.client.
|
276
|
+
await self.client.close()
|