isa-model 0.0.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/model_manager.py +69 -4
  3. isa_model/core/model_registry.py +273 -46
  4. isa_model/core/storage/hf_storage.py +419 -0
  5. isa_model/deployment/__init__.py +52 -0
  6. isa_model/deployment/core/__init__.py +34 -0
  7. isa_model/deployment/core/deployment_config.py +356 -0
  8. isa_model/deployment/core/deployment_manager.py +549 -0
  9. isa_model/deployment/core/isa_deployment_service.py +401 -0
  10. isa_model/eval/factory.py +381 -140
  11. isa_model/inference/ai_factory.py +427 -236
  12. isa_model/inference/billing_tracker.py +406 -0
  13. isa_model/inference/providers/base_provider.py +51 -4
  14. isa_model/inference/providers/ml_provider.py +50 -0
  15. isa_model/inference/providers/ollama_provider.py +37 -18
  16. isa_model/inference/providers/openai_provider.py +65 -36
  17. isa_model/inference/providers/replicate_provider.py +42 -30
  18. isa_model/inference/services/audio/base_stt_service.py +21 -2
  19. isa_model/inference/services/audio/openai_realtime_service.py +353 -0
  20. isa_model/inference/services/audio/openai_stt_service.py +252 -0
  21. isa_model/inference/services/audio/openai_tts_service.py +149 -9
  22. isa_model/inference/services/audio/replicate_tts_service.py +239 -0
  23. isa_model/inference/services/base_service.py +36 -1
  24. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  25. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  26. isa_model/inference/services/embedding/openai_embed_service.py +223 -0
  27. isa_model/inference/services/llm/__init__.py +2 -0
  28. isa_model/inference/services/llm/base_llm_service.py +158 -86
  29. isa_model/inference/services/llm/llm_adapter.py +414 -0
  30. isa_model/inference/services/llm/ollama_llm_service.py +252 -63
  31. isa_model/inference/services/llm/openai_llm_service.py +231 -93
  32. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  33. isa_model/inference/services/ml/base_ml_service.py +78 -0
  34. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  35. isa_model/inference/services/vision/__init__.py +3 -3
  36. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  37. isa_model/inference/services/vision/base_vision_service.py +177 -0
  38. isa_model/inference/services/vision/helpers/image_utils.py +4 -3
  39. isa_model/inference/services/vision/ollama_vision_service.py +151 -17
  40. isa_model/inference/services/vision/openai_vision_service.py +275 -41
  41. isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
  42. isa_model/training/__init__.py +62 -32
  43. isa_model/training/cloud/__init__.py +22 -0
  44. isa_model/training/cloud/job_orchestrator.py +402 -0
  45. isa_model/training/cloud/runpod_trainer.py +454 -0
  46. isa_model/training/cloud/storage_manager.py +482 -0
  47. isa_model/training/core/__init__.py +23 -0
  48. isa_model/training/core/config.py +181 -0
  49. isa_model/training/core/dataset.py +222 -0
  50. isa_model/training/core/trainer.py +720 -0
  51. isa_model/training/core/utils.py +213 -0
  52. isa_model/training/factory.py +229 -198
  53. isa_model-0.3.1.dist-info/METADATA +465 -0
  54. isa_model-0.3.1.dist-info/RECORD +91 -0
  55. isa_model/core/model_router.py +0 -226
  56. isa_model/core/model_version.py +0 -0
  57. isa_model/core/resource_manager.py +0 -202
  58. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  59. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  60. isa_model/training/engine/llama_factory/__init__.py +0 -39
  61. isa_model/training/engine/llama_factory/config.py +0 -115
  62. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  63. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  64. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  65. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  66. isa_model/training/engine/llama_factory/factory.py +0 -331
  67. isa_model/training/engine/llama_factory/rl.py +0 -254
  68. isa_model/training/engine/llama_factory/trainer.py +0 -171
  69. isa_model/training/image_model/configs/create_config.py +0 -37
  70. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  71. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  72. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  73. isa_model/training/image_model/prepare_upload.py +0 -17
  74. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  75. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  76. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  77. isa_model/training/image_model/train/train.py +0 -42
  78. isa_model/training/image_model/train/train_flux.py +0 -41
  79. isa_model/training/image_model/train/train_lora.py +0 -57
  80. isa_model/training/image_model/train_main.py +0 -25
  81. isa_model-0.0.2.dist-info/METADATA +0 -327
  82. isa_model-0.0.2.dist-info/RECORD +0 -92
  83. isa_model-0.0.2.dist-info/licenses/LICENSE +0 -21
  84. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  85. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  86. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  87. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  88. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  89. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  90. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  91. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  92. {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
  93. {isa_model-0.0.2.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,138 +1,276 @@
1
1
  import logging
2
2
  import os
3
- from typing import Dict, Any, List, Union, AsyncGenerator, Optional
3
+ import json
4
+ from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
4
5
 
5
- # 使用官方 OpenAI 库和 dotenv
6
+ # 使用官方 OpenAI
6
7
  from openai import AsyncOpenAI
7
- from dotenv import load_dotenv
8
8
 
9
- from isa_model.inference.services.base_service import BaseLLMService
9
+ from isa_model.inference.services.llm.base_llm_service import BaseLLMService
10
10
  from isa_model.inference.providers.base_provider import BaseProvider
11
-
12
- # 加载 .env.local 文件中的环境变量
13
- load_dotenv(dotenv_path='.env.local')
11
+ from isa_model.inference.billing_tracker import ServiceType
14
12
 
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
  class OpenAILLMService(BaseLLMService):
18
- """OpenAI LLM service implementation"""
16
+ """OpenAI LLM service implementation with unified invoke interface"""
19
17
 
20
- def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-3.5-turbo"):
18
+ def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
21
19
  super().__init__(provider, model_name)
22
20
 
23
- # provider配置初始化 AsyncOpenAI 客户端
21
+ # Get full configuration from provider (including sensitive data)
22
+ provider_config = provider.get_full_config()
23
+
24
+ # Initialize AsyncOpenAI client with provider configuration
24
25
  try:
25
- api_key = provider.config.get("api_key") or os.getenv("OPENAI_API_KEY")
26
- base_url = provider.config.get("api_base") or os.getenv("OPENAI_API_BASE")
26
+ if not provider_config.get("api_key"):
27
+ raise ValueError("OpenAI API key not found in provider configuration")
27
28
 
28
29
  self.client = AsyncOpenAI(
29
- api_key=api_key,
30
- base_url=base_url
30
+ api_key=provider_config["api_key"],
31
+ base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
32
+ organization=provider_config.get("organization")
31
33
  )
32
- except TypeError as e:
33
- logger.error("初始化 OpenAI 客户端失败。请检查您的 .env.local 文件中是否正确设置了 OPENAI_API_KEY。")
34
- raise ValueError("OPENAI_API_KEY 未设置。") from e
34
+
35
+ logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
36
+
37
+ except Exception as e:
38
+ logger.error(f"Failed to initialize OpenAI client: {e}")
39
+ raise ValueError(f"Failed to initialize OpenAI client. Check your API key configuration: {e}") from e
35
40
 
36
41
  self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
37
- logger.info(f"Initialized OpenAILLMService with model {self.model_name} and endpoint {self.client.base_url}")
42
+ self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
43
+
44
+ # Tool binding attributes
45
+ self._bound_tools: List[Dict[str, Any]] = []
46
+ self._tool_binding_kwargs: Dict[str, Any] = {}
47
+ self._tool_functions: Dict[str, Callable] = {}
38
48
 
39
- async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]) -> str:
40
- """Universal invocation method"""
41
- if isinstance(prompt, str):
42
- return await self.acompletion(prompt)
43
- elif isinstance(prompt, list):
44
- return await self.achat(prompt)
45
- else:
46
- raise ValueError("Prompt must be a string or a list of messages")
49
+ def _create_bound_copy(self) -> 'OpenAILLMService':
50
+ """Create a copy of this service for tool binding"""
51
+ bound_service = OpenAILLMService(self.provider, self.model_name)
52
+ bound_service._bound_tools = self._bound_tools.copy()
53
+ bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
54
+ bound_service._tool_functions = self._tool_functions.copy()
55
+ return bound_service
56
+
57
+ def bind_tools(self, tools: List[Any], **kwargs) -> 'OpenAILLMService':
58
+ """
59
+ Bind tools to this LLM service for function calling
60
+
61
+ Args:
62
+ tools: List of tools (functions, dicts, or LangChain tools)
63
+ **kwargs: Additional arguments for tool binding
64
+
65
+ Returns:
66
+ New LLM service instance with tools bound
67
+ """
68
+ # Create a copy of this service
69
+ bound_service = self._create_bound_copy()
70
+
71
+ # Use the adapter manager to handle tools
72
+ bound_service._bound_tools = tools
73
+
74
+ return bound_service
47
75
 
48
- async def achat(self, messages: List[Dict[str, str]]) -> str:
49
- """Chat completion method"""
76
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
77
+ """Unified invoke method for all input types"""
50
78
  try:
51
- temperature = self.config.get("temperature", 0.7)
52
- max_tokens = self.config.get("max_tokens", 1024)
79
+ # Use adapter manager to prepare messages
80
+ messages = self._prepare_messages(input_data)
53
81
 
54
- response = await self.client.chat.completions.create(
55
- model=self.model_name,
56
- messages=messages,
57
- temperature=temperature,
58
- max_tokens=max_tokens
59
- )
82
+ # Prepare request kwargs
83
+ kwargs = {
84
+ "model": self.model_name,
85
+ "messages": messages,
86
+ "temperature": self.config.get("temperature", 0.7),
87
+ "max_tokens": self.config.get("max_tokens", 1024)
88
+ }
60
89
 
61
- if response.usage:
62
- self.last_token_usage = {
63
- "prompt_tokens": response.usage.prompt_tokens,
64
- "completion_tokens": response.usage.completion_tokens,
65
- "total_tokens": response.usage.total_tokens
66
- }
90
+ # Add tools if bound using adapter manager
91
+ tool_schemas = await self._prepare_tools_for_request()
92
+ if tool_schemas:
93
+ kwargs["tools"] = tool_schemas
94
+ kwargs["tool_choice"] = "auto"
67
95
 
68
- return response.choices[0].message.content or ""
96
+ # Handle streaming vs non-streaming
97
+ if self.streaming:
98
+ # Streaming mode - collect all chunks
99
+ content_chunks = []
100
+ async for chunk in await self._stream_response(kwargs):
101
+ content_chunks.append(chunk)
102
+ content = "".join(content_chunks)
103
+
104
+ # Create a mock usage object for tracking
105
+ class MockUsage:
106
+ def __init__(self):
107
+ self.prompt_tokens = len(str(messages)) // 4 # Rough estimate
108
+ self.completion_tokens = len(content) // 4 # Rough estimate
109
+ self.total_tokens = self.prompt_tokens + self.completion_tokens
110
+
111
+ usage = MockUsage()
112
+ self._update_token_usage(usage)
113
+ self._track_billing(usage)
114
+
115
+ return self._format_response(content, input_data)
116
+ else:
117
+ # Non-streaming mode
118
+ response = await self.client.chat.completions.create(**kwargs)
119
+ message = response.choices[0].message
120
+
121
+ # Update usage tracking
122
+ if response.usage:
123
+ self._update_token_usage(response.usage)
124
+ self._track_billing(response.usage)
125
+
126
+ # Handle tool calls if present
127
+ if message.tool_calls:
128
+ final_content = await self._handle_tool_calls(message, messages)
129
+ return self._format_response(final_content, input_data)
130
+
131
+ # Return appropriate format based on input type
132
+ return self._format_response(message.content or "", input_data)
69
133
 
70
134
  except Exception as e:
71
- logger.error(f"Error in chat completion: {e}")
135
+ logger.error(f"Error in ainvoke: {e}")
72
136
  raise
73
137
 
74
- async def acompletion(self, prompt: str) -> str:
75
- """Text completion method (using chat API)"""
76
- messages = [{"role": "user", "content": prompt}]
77
- return await self.achat(messages)
138
+ def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
139
+ """使用适配器管理器转换消息格式(覆盖基类方法以保持兼容性)"""
140
+ return self.adapter_manager.convert_messages(input_data)
78
141
 
79
- async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
80
- """Generate multiple completions"""
81
- try:
82
- temperature = self.config.get("temperature", 0.7)
83
- max_tokens = self.config.get("max_tokens", 1024)
84
-
85
- response = await self.client.chat.completions.create(
86
- model=self.model_name,
87
- messages=messages,
88
- temperature=temperature,
89
- max_tokens=max_tokens,
90
- n=n
91
- )
92
-
93
- if response.usage:
94
- self.last_token_usage = {
95
- "prompt_tokens": response.usage.prompt_tokens,
96
- "completion_tokens": response.usage.completion_tokens,
97
- "total_tokens": response.usage.total_tokens
98
- }
99
-
100
- return [choice.message.content or "" for choice in response.choices]
101
- except Exception as e:
102
- logger.error(f"Error in generate: {e}")
103
- raise
142
+ def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
143
+ """使用适配器管理器格式化响应(覆盖基类方法以保持兼容性)"""
144
+ return self.adapter_manager.format_response(content, original_input)
104
145
 
105
- async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
106
- """Stream chat responses"""
146
+ async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
147
+ """Handle streaming responses"""
148
+ kwargs["stream"] = True
149
+
150
+ async def stream_generator():
151
+ try:
152
+ stream = await self.client.chat.completions.create(**kwargs)
153
+ async for chunk in stream:
154
+ content = chunk.choices[0].delta.content
155
+ if content:
156
+ yield content
157
+ except Exception as e:
158
+ logger.error(f"Error in streaming: {e}")
159
+ raise
160
+
161
+ return stream_generator()
162
+
163
+ async def _handle_tool_calls(self, assistant_message, original_messages: List[Dict[str, str]]) -> str:
164
+ """Handle tool calls from the assistant using adapter manager"""
165
+ # Add assistant message with tool calls to conversation
166
+ messages = original_messages + [{
167
+ "role": "assistant",
168
+ "content": assistant_message.content or "",
169
+ "tool_calls": [
170
+ {
171
+ "id": tc.id,
172
+ "type": tc.type,
173
+ "function": {
174
+ "name": tc.function.name,
175
+ "arguments": tc.function.arguments
176
+ }
177
+ } for tc in assistant_message.tool_calls
178
+ ]
179
+ }]
180
+
181
+ # Execute each tool call using adapter manager
182
+ for tool_call in assistant_message.tool_calls:
183
+ function_name = tool_call.function.name
184
+ arguments = json.loads(tool_call.function.arguments)
185
+
186
+ try:
187
+ # Use adapter manager to execute tool
188
+ result = await self._execute_tool_call(function_name, arguments)
189
+
190
+ # Add tool result to messages
191
+ messages.append({
192
+ "role": "tool",
193
+ "content": str(result),
194
+ "tool_call_id": tool_call.id
195
+ })
196
+
197
+ except Exception as e:
198
+ logger.error(f"Error executing tool {function_name}: {e}")
199
+ messages.append({
200
+ "role": "tool",
201
+ "content": f"Error executing {function_name}: {str(e)}",
202
+ "tool_call_id": tool_call.id
203
+ })
204
+
205
+ # Get final response from the model
107
206
  try:
108
- temperature = self.config.get("temperature", 0.7)
109
- max_tokens = self.config.get("max_tokens", 1024)
207
+ kwargs = {
208
+ "model": self.model_name,
209
+ "messages": messages,
210
+ "temperature": self.config.get("temperature", 0.7),
211
+ "max_tokens": self.config.get("max_tokens", 1024)
212
+ }
110
213
 
111
- stream = await self.client.chat.completions.create(
112
- model=self.model_name,
113
- messages=messages,
114
- temperature=temperature,
115
- max_tokens=max_tokens,
116
- stream=True
117
- )
214
+ response = await self.client.chat.completions.create(**kwargs)
215
+ return response.choices[0].message.content or ""
118
216
 
119
- async for chunk in stream:
120
- content = chunk.choices[0].delta.content
121
- if content:
122
- yield content
123
-
124
217
  except Exception as e:
125
- logger.error(f"Error in stream chat: {e}")
218
+ logger.error(f"Error getting final response after tool calls: {e}")
126
219
  raise
127
220
 
128
- def get_token_usage(self) -> Dict[str, int]:
221
+ def _update_token_usage(self, usage):
222
+ """Update token usage statistics"""
223
+ self.last_token_usage = {
224
+ "prompt_tokens": usage.prompt_tokens,
225
+ "completion_tokens": usage.completion_tokens,
226
+ "total_tokens": usage.total_tokens
227
+ }
228
+
229
+ # Update total usage
230
+ self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
231
+ self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
232
+ self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
233
+ self.total_token_usage["requests_count"] += 1
234
+
235
+ def _track_billing(self, usage):
236
+ """Track billing information"""
237
+ self._track_usage(
238
+ service_type=ServiceType.LLM,
239
+ operation="chat",
240
+ input_tokens=usage.prompt_tokens,
241
+ output_tokens=usage.completion_tokens,
242
+ metadata={
243
+ "temperature": self.config.get("temperature", 0.7),
244
+ "max_tokens": self.config.get("max_tokens", 1024)
245
+ }
246
+ )
247
+
248
+ def get_token_usage(self) -> Dict[str, Any]:
129
249
  """Get total token usage statistics"""
130
- return self.last_token_usage
250
+ return self.total_token_usage
131
251
 
132
252
  def get_last_token_usage(self) -> Dict[str, int]:
133
253
  """Get token usage from last request"""
134
254
  return self.last_token_usage
255
+
256
+ def get_model_info(self) -> Dict[str, Any]:
257
+ """Get information about the current model"""
258
+ return {
259
+ "name": self.model_name,
260
+ "max_tokens": self.config.get("max_tokens", 1024),
261
+ "supports_streaming": True,
262
+ "supports_functions": True,
263
+ "provider": "openai"
264
+ }
265
+
266
+ def _has_bound_tools(self) -> bool:
267
+ """Check if this service has bound tools"""
268
+ return bool(self._bound_tools)
269
+
270
+ def _get_bound_tools(self) -> List[Dict[str, Any]]:
271
+ """Get the bound tools schema"""
272
+ return self._bound_tools
135
273
 
136
274
  async def close(self):
137
275
  """Close the backend client"""
138
- await self.client.aclose()
276
+ await self.client.close()