isa-model 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/core/model_manager.py +69 -4
  3. isa_model/core/storage/hf_storage.py +419 -0
  4. isa_model/deployment/__init__.py +52 -0
  5. isa_model/deployment/core/__init__.py +34 -0
  6. isa_model/deployment/core/deployment_config.py +356 -0
  7. isa_model/deployment/core/deployment_manager.py +549 -0
  8. isa_model/deployment/core/isa_deployment_service.py +401 -0
  9. isa_model/eval/factory.py +381 -140
  10. isa_model/inference/ai_factory.py +427 -236
  11. isa_model/inference/billing_tracker.py +406 -0
  12. isa_model/inference/providers/base_provider.py +51 -4
  13. isa_model/inference/providers/ml_provider.py +50 -0
  14. isa_model/inference/providers/ollama_provider.py +37 -18
  15. isa_model/inference/providers/openai_provider.py +65 -36
  16. isa_model/inference/providers/replicate_provider.py +42 -30
  17. isa_model/inference/services/audio/base_stt_service.py +21 -2
  18. isa_model/inference/services/audio/openai_realtime_service.py +353 -0
  19. isa_model/inference/services/audio/openai_stt_service.py +252 -0
  20. isa_model/inference/services/audio/openai_tts_service.py +149 -9
  21. isa_model/inference/services/audio/replicate_tts_service.py +239 -0
  22. isa_model/inference/services/base_service.py +36 -1
  23. isa_model/inference/services/embedding/base_embed_service.py +112 -0
  24. isa_model/inference/services/embedding/ollama_embed_service.py +28 -2
  25. isa_model/inference/services/embedding/openai_embed_service.py +223 -0
  26. isa_model/inference/services/llm/__init__.py +2 -0
  27. isa_model/inference/services/llm/base_llm_service.py +158 -86
  28. isa_model/inference/services/llm/llm_adapter.py +414 -0
  29. isa_model/inference/services/llm/ollama_llm_service.py +252 -63
  30. isa_model/inference/services/llm/openai_llm_service.py +231 -93
  31. isa_model/inference/services/llm/triton_llm_service.py +481 -0
  32. isa_model/inference/services/ml/base_ml_service.py +78 -0
  33. isa_model/inference/services/ml/sklearn_ml_service.py +140 -0
  34. isa_model/inference/services/vision/__init__.py +3 -3
  35. isa_model/inference/services/vision/base_image_gen_service.py +161 -0
  36. isa_model/inference/services/vision/base_vision_service.py +177 -0
  37. isa_model/inference/services/vision/helpers/image_utils.py +4 -3
  38. isa_model/inference/services/vision/ollama_vision_service.py +151 -17
  39. isa_model/inference/services/vision/openai_vision_service.py +275 -41
  40. isa_model/inference/services/vision/replicate_image_gen_service.py +278 -118
  41. isa_model/training/__init__.py +62 -32
  42. isa_model/training/cloud/__init__.py +22 -0
  43. isa_model/training/cloud/job_orchestrator.py +402 -0
  44. isa_model/training/cloud/runpod_trainer.py +454 -0
  45. isa_model/training/cloud/storage_manager.py +482 -0
  46. isa_model/training/core/__init__.py +23 -0
  47. isa_model/training/core/config.py +181 -0
  48. isa_model/training/core/dataset.py +222 -0
  49. isa_model/training/core/trainer.py +720 -0
  50. isa_model/training/core/utils.py +213 -0
  51. isa_model/training/factory.py +229 -198
  52. isa_model-0.3.1.dist-info/METADATA +465 -0
  53. isa_model-0.3.1.dist-info/RECORD +91 -0
  54. isa_model/core/model_router.py +0 -226
  55. isa_model/core/model_version.py +0 -0
  56. isa_model/core/resource_manager.py +0 -202
  57. isa_model/deployment/gpu_fp16_ds8/models/deepseek_r1/1/model.py +0 -120
  58. isa_model/deployment/gpu_fp16_ds8/scripts/download_model.py +0 -18
  59. isa_model/training/engine/llama_factory/__init__.py +0 -39
  60. isa_model/training/engine/llama_factory/config.py +0 -115
  61. isa_model/training/engine/llama_factory/data_adapter.py +0 -284
  62. isa_model/training/engine/llama_factory/examples/__init__.py +0 -6
  63. isa_model/training/engine/llama_factory/examples/finetune_with_tracking.py +0 -185
  64. isa_model/training/engine/llama_factory/examples/rlhf_with_tracking.py +0 -163
  65. isa_model/training/engine/llama_factory/factory.py +0 -331
  66. isa_model/training/engine/llama_factory/rl.py +0 -254
  67. isa_model/training/engine/llama_factory/trainer.py +0 -171
  68. isa_model/training/image_model/configs/create_config.py +0 -37
  69. isa_model/training/image_model/configs/create_flux_config.py +0 -26
  70. isa_model/training/image_model/configs/create_lora_config.py +0 -21
  71. isa_model/training/image_model/prepare_massed_compute.py +0 -97
  72. isa_model/training/image_model/prepare_upload.py +0 -17
  73. isa_model/training/image_model/raw_data/create_captions.py +0 -16
  74. isa_model/training/image_model/raw_data/create_lora_captions.py +0 -20
  75. isa_model/training/image_model/raw_data/pre_processing.py +0 -200
  76. isa_model/training/image_model/train/train.py +0 -42
  77. isa_model/training/image_model/train/train_flux.py +0 -41
  78. isa_model/training/image_model/train/train_lora.py +0 -57
  79. isa_model/training/image_model/train_main.py +0 -25
  80. isa_model-0.2.0.dist-info/METADATA +0 -327
  81. isa_model-0.2.0.dist-info/RECORD +0 -92
  82. isa_model-0.2.0.dist-info/licenses/LICENSE +0 -21
  83. /isa_model/training/{llm_model/annotation → annotation}/annotation_schema.py +0 -0
  84. /isa_model/training/{llm_model/annotation → annotation}/processors/annotation_processor.py +0 -0
  85. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_manager.py +0 -0
  86. /isa_model/training/{llm_model/annotation → annotation}/storage/dataset_schema.py +0 -0
  87. /isa_model/training/{llm_model/annotation → annotation}/tests/test_annotation_flow.py +0 -0
  88. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio copy.py +0 -0
  89. /isa_model/training/{llm_model/annotation → annotation}/tests/test_minio_upload.py +0 -0
  90. /isa_model/training/{llm_model/annotation → annotation}/views/annotation_controller.py +0 -0
  91. {isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/WHEEL +0 -0
  92. {isa_model-0.2.0.dist-info → isa_model-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,99 +1,288 @@
1
1
  import logging
2
- from typing import Dict, Any, List, Union, AsyncGenerator, Optional
3
- from isa_model.inference.services.base_service import BaseLLMService
2
+ import httpx
3
+ import json
4
+ from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
5
+ from isa_model.inference.services.llm.base_llm_service import BaseLLMService
4
6
  from isa_model.inference.providers.base_provider import BaseProvider
5
7
 
6
8
  logger = logging.getLogger(__name__)
7
9
 
8
10
  class OllamaLLMService(BaseLLMService):
9
- """Ollama LLM service using backend client"""
11
+ """Ollama LLM service with unified invoke interface"""
10
12
 
11
- def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.1"):
13
+ def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.2:3b-instruct-fp16"):
12
14
  super().__init__(provider, model_name)
15
+
16
+ # Create HTTP client for Ollama API
17
+ base_url = self.config.get("base_url", "http://localhost:11434")
18
+ timeout = self.config.get("timeout", 60)
19
+
20
+ self.client = httpx.AsyncClient(
21
+ base_url=base_url,
22
+ timeout=timeout
23
+ )
13
24
 
14
25
  self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
15
- logger.info(f"Initialized OllamaLLMService with model {model_name}")
26
+ self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
27
+
28
+ # Tool binding attributes
29
+ self._bound_tools: List[Any] = []
30
+ self._tool_binding_kwargs: Dict[str, Any] = {}
31
+ self._tool_functions: Dict[str, Callable] = {}
32
+
33
+ logger.info(f"Initialized OllamaLLMService with model {model_name} at {base_url}")
16
34
 
17
- async def ainvoke(self, prompt: Union[str, List[Dict[str, str]], Any]):
18
- """Universal invocation method"""
19
- if isinstance(prompt, str):
20
- return await self.acompletion(prompt)
21
- elif isinstance(prompt, list):
22
- return await self.achat(prompt)
23
- else:
24
- raise ValueError("Prompt must be string or list of messages")
35
+ def _ensure_client(self):
36
+ """Ensure the HTTP client is available and not closed"""
37
+ if not hasattr(self, 'client') or not self.client or self.client.is_closed:
38
+ base_url = self.config.get("base_url", "http://localhost:11434")
39
+ timeout = self.config.get("timeout", 60)
40
+ self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
41
+
42
+ def _create_bound_copy(self) -> 'OllamaLLMService':
43
+ """Create a copy of this service for tool binding"""
44
+ bound_service = OllamaLLMService(self.provider, self.model_name)
45
+ bound_service._bound_tools = self._bound_tools.copy()
46
+ bound_service._tool_binding_kwargs = self._tool_binding_kwargs.copy()
47
+ bound_service._tool_functions = self._tool_functions.copy()
48
+ return bound_service
25
49
 
26
- async def achat(self, messages: List[Dict[str, str]]):
27
- """Chat completion method"""
50
+ def bind_tools(self, tools: List[Union[Dict[str, Any], Callable]], **kwargs) -> 'OllamaLLMService':
51
+ """Bind tools to this LLM service for function calling"""
52
+ bound_service = self._create_bound_copy()
53
+ # 使用基类的适配器管理器方法
54
+ bound_service._bound_tools = tools
55
+ bound_service._tool_binding_kwargs = kwargs
56
+
57
+ return bound_service
58
+
59
+ async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
60
+ """
61
+ Universal async invocation method that handles different input types
62
+
63
+ Args:
64
+ input_data: Can be:
65
+ - str: Simple text prompt
66
+ - list: Message history like [{"role": "user", "content": "hello"}]
67
+ - Any: LangChain message objects or other formats
68
+
69
+ Returns:
70
+ Model response (string for simple cases, object for complex cases)
71
+ """
28
72
  try:
73
+ # Ensure client is available
74
+ self._ensure_client()
75
+
76
+ # Convert input to messages format
77
+ messages = self._prepare_messages(input_data)
78
+
79
+ # Prepare request parameters
29
80
  payload = {
30
81
  "model": self.model_name,
31
82
  "messages": messages,
32
- "stream": False
83
+ "stream": self.streaming,
84
+ "options": {
85
+ "temperature": self.config.get("temperature", 0.7),
86
+ "top_p": self.config.get("top_p", 0.9),
87
+ "num_predict": self.config.get("max_tokens", 2048)
88
+ }
33
89
  }
34
- response = await self.backend.post("/api/chat", payload)
35
90
 
36
- # Update token usage if available
37
- if "eval_count" in response:
38
- self.last_token_usage = {
39
- "prompt_tokens": response.get("prompt_eval_count", 0),
40
- "completion_tokens": response.get("eval_count", 0),
41
- "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
42
- }
91
+ # Add tools if bound using adapter manager
92
+ tool_schemas = await self._prepare_tools_for_request()
93
+ if tool_schemas:
94
+ payload["tools"] = tool_schemas
43
95
 
44
- return response["message"]["content"]
96
+ # Handle streaming
97
+ if self.streaming:
98
+ return self._stream_response(payload)
45
99
 
46
- except Exception as e:
47
- logger.error(f"Error in chat completion: {e}")
48
- raise
49
-
50
- async def acompletion(self, prompt: str):
51
- """Text completion method"""
52
- try:
53
- payload = {
54
- "model": self.model_name,
55
- "prompt": prompt,
56
- "stream": False
57
- }
58
- response = await self.backend.post("/api/generate", payload)
100
+ # Regular request
101
+ response = await self.client.post("/api/chat", json=payload)
102
+ response.raise_for_status()
103
+ result = response.json()
59
104
 
60
105
  # Update token usage if available
61
- if "eval_count" in response:
62
- self.last_token_usage = {
63
- "prompt_tokens": response.get("prompt_eval_count", 0),
64
- "completion_tokens": response.get("eval_count", 0),
65
- "total_tokens": response.get("prompt_eval_count", 0) + response.get("eval_count", 0)
66
- }
106
+ if "eval_count" in result:
107
+ self._update_token_usage(result)
108
+
109
+ # Handle tool calls if present
110
+ message = result["message"]
111
+ if "tool_calls" in message and message["tool_calls"]:
112
+ return await self._handle_tool_calls(message, messages)
67
113
 
68
- return response["response"]
114
+ # Return appropriate format based on input type
115
+ return self._format_response(message["content"], input_data)
69
116
 
117
+ except httpx.RequestError as e:
118
+ logger.error(f"HTTP request error in ainvoke: {e}")
119
+ raise
70
120
  except Exception as e:
71
- logger.error(f"Error in text completion: {e}")
121
+ logger.error(f"Error in ainvoke: {e}")
72
122
  raise
73
123
 
74
- async def agenerate(self, messages: List[Dict[str, str]], n: int = 1) -> List[str]:
75
- """Generate multiple completions"""
76
- results = []
77
- for _ in range(n):
78
- result = await self.achat(messages)
79
- results.append(result)
80
- return results
124
+ def _prepare_messages(self, input_data: Union[str, List[Dict[str, str]], Any]) -> List[Dict[str, str]]:
125
+ """Convert various input formats to Ollama messages format (same as OpenAI)"""
126
+ if isinstance(input_data, str):
127
+ # Simple string prompt
128
+ return [{"role": "user", "content": input_data}]
129
+
130
+ elif isinstance(input_data, list):
131
+ if not input_data:
132
+ raise ValueError("Empty message list provided")
133
+
134
+ # Check if it's LangChain messages or standard messages
135
+ first_msg = input_data[0]
136
+ if hasattr(first_msg, 'content') and hasattr(first_msg, 'type'):
137
+ # LangChain message objects - use base class method
138
+ return self._convert_langchain_to_openai(input_data)
139
+ elif isinstance(first_msg, dict):
140
+ # Standard message dictionaries
141
+ return input_data
142
+ else:
143
+ # List of strings or other formats
144
+ messages = []
145
+ for i, msg in enumerate(input_data):
146
+ if isinstance(msg, str):
147
+ role = "user" if i % 2 == 0 else "assistant"
148
+ messages.append({"role": role, "content": msg})
149
+ elif isinstance(msg, dict):
150
+ messages.append(msg)
151
+ else:
152
+ messages.append({"role": "user", "content": str(msg)})
153
+ return messages
154
+
155
+ else:
156
+ # Handle single LangChain message objects or other objects
157
+ if hasattr(input_data, 'content') and hasattr(input_data, 'type'):
158
+ return self._convert_langchain_to_openai([input_data])
159
+ else:
160
+ return [{"role": "user", "content": str(input_data)}]
81
161
 
82
- async def astream_chat(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
83
- """Stream chat responses"""
84
- # Note: This would require modifying the backend to support streaming
85
- # For now, return the full response
86
- response = await self.achat(messages)
87
- yield response
162
+ def _format_response(self, content: str, original_input: Any) -> Union[str, Any]:
163
+ """Format response based on original input type"""
164
+ # For LangGraph compatibility, return AIMessage object if needed
165
+ if hasattr(original_input, 'type') or (isinstance(original_input, list) and
166
+ original_input and hasattr(original_input[0], 'type')):
167
+ try:
168
+ from langchain_core.messages import AIMessage
169
+ return AIMessage(content=content)
170
+ except ImportError:
171
+ # Fallback to simple object
172
+ class SimpleAIMessage:
173
+ def __init__(self, content):
174
+ self.content = content
175
+ self.type = "ai"
176
+ return SimpleAIMessage(content)
177
+
178
+ # Default to string
179
+ return content
88
180
 
89
- def get_token_usage(self):
181
+ async def _stream_response(self, payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
182
+ """Handle streaming responses"""
183
+ async def stream_generator():
184
+ try:
185
+ async with self.client.stream("POST", "/api/chat", json=payload) as response:
186
+ response.raise_for_status()
187
+ async for line in response.aiter_lines():
188
+ if line.strip():
189
+ try:
190
+ chunk = json.loads(line)
191
+ if "message" in chunk and "content" in chunk["message"]:
192
+ content = chunk["message"]["content"]
193
+ if content:
194
+ yield content
195
+ except json.JSONDecodeError:
196
+ continue
197
+ except Exception as e:
198
+ logger.error(f"Error in streaming: {e}")
199
+ raise
200
+
201
+ return stream_generator()
202
+
203
+ async def _handle_tool_calls(self, assistant_message: Dict[str, Any], original_messages: List[Dict[str, str]]) -> str:
204
+ """Handle tool calls from the assistant using adapter manager"""
205
+ tool_calls = assistant_message.get("tool_calls", [])
206
+
207
+ # Add assistant message with tool calls to conversation
208
+ messages = original_messages + [assistant_message]
209
+
210
+ # Execute each tool call using adapter manager
211
+ for tool_call in tool_calls:
212
+ function_name = tool_call["function"]["name"]
213
+
214
+ try:
215
+ # Parse arguments if they're a string
216
+ arguments = tool_call["function"]["arguments"]
217
+ if isinstance(arguments, str):
218
+ arguments = json.loads(arguments)
219
+
220
+ # Use adapter manager to execute tool
221
+ result = await self._execute_tool_call(function_name, arguments)
222
+
223
+ # Add tool result to messages
224
+ messages.append({
225
+ "role": "tool",
226
+ "content": str(result),
227
+ "tool_call_id": tool_call.get("id", function_name)
228
+ })
229
+
230
+ except Exception as e:
231
+ logger.error(f"Error executing tool {function_name}: {e}")
232
+ messages.append({
233
+ "role": "tool",
234
+ "content": f"Error executing {function_name}: {str(e)}",
235
+ "tool_call_id": tool_call.get("id", function_name)
236
+ })
237
+
238
+ # Get final response from the model
239
+ return await self.ainvoke(messages)
240
+
241
+ def _update_token_usage(self, result: Dict[str, Any]):
242
+ """Update token usage statistics"""
243
+ self.last_token_usage = {
244
+ "prompt_tokens": result.get("prompt_eval_count", 0),
245
+ "completion_tokens": result.get("eval_count", 0),
246
+ "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0)
247
+ }
248
+
249
+ # Update total usage
250
+ self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
251
+ self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
252
+ self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
253
+ self.total_token_usage["requests_count"] += 1
254
+
255
+ def get_token_usage(self) -> Dict[str, Any]:
90
256
  """Get total token usage statistics"""
91
- return self.last_token_usage
257
+ return self.total_token_usage
92
258
 
93
259
  def get_last_token_usage(self) -> Dict[str, int]:
94
260
  """Get token usage from last request"""
95
261
  return self.last_token_usage
262
+
263
+ def get_model_info(self) -> Dict[str, Any]:
264
+ """Get information about the current model"""
265
+ return {
266
+ "name": self.model_name,
267
+ "max_tokens": self.config.get("max_tokens", 2048),
268
+ "supports_streaming": True,
269
+ "supports_functions": True,
270
+ "provider": "ollama"
271
+ }
272
+
273
+ def _has_bound_tools(self) -> bool:
274
+ """Check if this service has bound tools"""
275
+ return bool(self._bound_tools)
276
+
277
+ def _get_bound_tools(self) -> List[Any]:
278
+ """Get the bound tools schema"""
279
+ return self._bound_tools
96
280
 
97
281
  async def close(self):
98
- """Close the backend client"""
99
- await self.backend.close()
282
+ """Close the HTTP client"""
283
+ if hasattr(self, 'client') and self.client:
284
+ try:
285
+ if not self.client.is_closed:
286
+ await self.client.aclose()
287
+ except Exception as e:
288
+ logger.warning(f"Error closing Ollama client: {e}")