isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/eval/__init__.py +80 -44
  25. isa_model/eval/config/__init__.py +10 -0
  26. isa_model/eval/config/evaluation_config.py +108 -0
  27. isa_model/eval/evaluators/__init__.py +18 -0
  28. isa_model/eval/evaluators/base_evaluator.py +503 -0
  29. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  30. isa_model/eval/factory.py +417 -709
  31. isa_model/eval/infrastructure/__init__.py +24 -0
  32. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  33. isa_model/eval/metrics.py +191 -21
  34. isa_model/inference/ai_factory.py +181 -605
  35. isa_model/inference/services/audio/base_stt_service.py +65 -1
  36. isa_model/inference/services/audio/base_tts_service.py +75 -1
  37. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  38. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  39. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  40. isa_model/inference/services/base_service.py +55 -17
  41. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  42. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  43. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  44. isa_model/inference/services/helpers/stacked_config.py +148 -0
  45. isa_model/inference/services/img/__init__.py +18 -0
  46. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  47. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  48. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  49. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  50. isa_model/inference/services/llm/__init__.py +3 -3
  51. isa_model/inference/services/llm/base_llm_service.py +492 -40
  52. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  53. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  54. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  55. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  56. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  57. isa_model/inference/services/vision/__init__.py +38 -4
  58. isa_model/inference/services/vision/base_vision_service.py +218 -117
  59. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  60. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  61. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  62. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  63. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  64. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  65. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  66. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  67. isa_model/scripts/register_models.py +370 -0
  68. isa_model/scripts/register_models_with_embeddings.py +510 -0
  69. isa_model/serving/api/fastapi_server.py +6 -1
  70. isa_model/serving/api/routes/unified.py +202 -0
  71. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
  73. isa_model/config/__init__.py +0 -9
  74. isa_model/config/config_manager.py +0 -213
  75. isa_model/core/model_manager.py +0 -213
  76. isa_model/core/model_registry.py +0 -375
  77. isa_model/core/vision_models_init.py +0 -116
  78. isa_model/inference/billing_tracker.py +0 -406
  79. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  80. isa_model/inference/services/stacked/__init__.py +0 -26
  81. isa_model/inference/services/stacked/config.py +0 -426
  82. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  83. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  84. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  85. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  86. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -3,19 +3,21 @@ import httpx
3
3
  import json
4
4
  from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
5
5
  from isa_model.inference.services.llm.base_llm_service import BaseLLMService
6
- from isa_model.inference.providers.base_provider import BaseProvider
7
6
 
8
7
  logger = logging.getLogger(__name__)
9
8
 
10
9
  class OllamaLLMService(BaseLLMService):
11
10
  """Ollama LLM service with unified invoke interface and proper adapter support"""
12
11
 
13
- def __init__(self, provider: 'BaseProvider', model_name: str = "llama3.2:3b-instruct-fp16"):
14
- super().__init__(provider, model_name)
12
+ def __init__(self, provider_name: str, model_name: str = "llama3.2:3b-instruct-fp16", **kwargs):
13
+ super().__init__(provider_name, model_name, **kwargs)
14
+
15
+ # Get configuration from centralized config manager
16
+ provider_config = self.get_provider_config()
15
17
 
16
18
  # Create HTTP client for Ollama API
17
- base_url = self.config.get("base_url", "http://localhost:11434")
18
- timeout = self.config.get("timeout", 60)
19
+ base_url = provider_config.get("base_url", "http://localhost:11434")
20
+ timeout = provider_config.get("timeout", 60)
19
21
 
20
22
  self.client = httpx.AsyncClient(
21
23
  base_url=base_url,
@@ -31,13 +33,14 @@ class OllamaLLMService(BaseLLMService):
31
33
  def _ensure_client(self):
32
34
  """Ensure the HTTP client is available and not closed"""
33
35
  if not hasattr(self, 'client') or not self.client or self.client.is_closed:
34
- base_url = self.config.get("base_url", "http://localhost:11434")
35
- timeout = self.config.get("timeout", 60)
36
+ provider_config = self.get_provider_config()
37
+ base_url = provider_config.get("base_url", "http://localhost:11434")
38
+ timeout = provider_config.get("timeout", 60)
36
39
  self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
37
40
 
38
41
  def _create_bound_copy(self) -> 'OllamaLLMService':
39
42
  """Create a copy of this service for tool binding"""
40
- bound_service = OllamaLLMService(self.provider, self.model_name)
43
+ bound_service = OllamaLLMService(self.provider_name, self.model_name)
41
44
  bound_service._bound_tools = self._bound_tools.copy()
42
45
  return bound_service
43
46
 
@@ -70,14 +73,15 @@ class OllamaLLMService(BaseLLMService):
70
73
  messages = self._prepare_messages(input_data)
71
74
 
72
75
  # Prepare request parameters
76
+ provider_config = self.get_provider_config()
73
77
  payload = {
74
78
  "model": self.model_name,
75
79
  "messages": messages,
76
80
  "stream": self.streaming,
77
81
  "options": {
78
- "temperature": self.config.get("temperature", 0.7),
79
- "top_p": self.config.get("top_p", 0.9),
80
- "num_predict": self.config.get("max_tokens", 2048)
82
+ "temperature": provider_config.get("temperature", 0.7),
83
+ "top_p": provider_config.get("top_p", 0.9),
84
+ "num_predict": provider_config.get("max_tokens", 2048)
81
85
  }
82
86
  }
83
87
 
@@ -104,6 +108,7 @@ class OllamaLLMService(BaseLLMService):
104
108
  # Update token usage if available
105
109
  if "eval_count" in result:
106
110
  self._update_token_usage(result)
111
+ await self._track_ollama_billing(result)
107
112
 
108
113
  # Handle tool calls if present - let adapter process the complete message
109
114
  message = result["message"]
@@ -196,7 +201,7 @@ class OllamaLLMService(BaseLLMService):
196
201
  # Get final response from the model
197
202
  return await self.ainvoke(messages)
198
203
 
199
- def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
204
+ async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
200
205
  """Track usage for streaming requests (estimated)"""
201
206
  # Create a mock usage object for tracking
202
207
  class MockUsage:
@@ -207,6 +212,18 @@ class OllamaLLMService(BaseLLMService):
207
212
 
208
213
  usage = MockUsage()
209
214
  self._update_token_usage_from_mock(usage)
215
+
216
+ # Track billing
217
+ await self._track_llm_usage(
218
+ operation="chat_stream",
219
+ input_tokens=usage.prompt_tokens,
220
+ output_tokens=usage.completion_tokens,
221
+ metadata={
222
+ "model": self.model_name,
223
+ "provider": "ollama",
224
+ "streaming": True
225
+ }
226
+ )
210
227
 
211
228
  def _update_token_usage_from_mock(self, usage):
212
229
  """Update token usage statistics from mock usage object"""
@@ -236,6 +253,21 @@ class OllamaLLMService(BaseLLMService):
236
253
  self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
237
254
  self.total_token_usage["requests_count"] += 1
238
255
 
256
+ async def _track_ollama_billing(self, result: Dict[str, Any]):
257
+ """Track billing information for Ollama requests"""
258
+ prompt_tokens = result.get("prompt_eval_count", 0)
259
+ completion_tokens = result.get("eval_count", 0)
260
+
261
+ await self._track_llm_usage(
262
+ operation="chat",
263
+ input_tokens=prompt_tokens,
264
+ output_tokens=completion_tokens,
265
+ metadata={
266
+ "model": self.model_name,
267
+ "provider": "ollama"
268
+ }
269
+ )
270
+
239
271
  def get_token_usage(self) -> Dict[str, Any]:
240
272
  """Get total token usage statistics"""
241
273
  return self.total_token_usage
@@ -246,9 +278,10 @@ class OllamaLLMService(BaseLLMService):
246
278
 
247
279
  def get_model_info(self) -> Dict[str, Any]:
248
280
  """Get information about the current model"""
281
+ provider_config = self.get_provider_config()
249
282
  return {
250
283
  "name": self.model_name,
251
- "max_tokens": self.config.get("max_tokens", 2048),
284
+ "max_tokens": provider_config.get("max_tokens", 2048),
252
285
  "supports_streaming": True,
253
286
  "supports_functions": True,
254
287
  "provider": "ollama"
@@ -285,14 +318,15 @@ class OllamaLLMService(BaseLLMService):
285
318
  messages = self._prepare_messages(input_data)
286
319
 
287
320
  # Prepare request parameters for streaming
321
+ provider_config = self.get_provider_config()
288
322
  payload = {
289
323
  "model": self.model_name,
290
324
  "messages": messages,
291
325
  "stream": True, # Force streaming for astream
292
326
  "options": {
293
- "temperature": self.config.get("temperature", 0.7),
294
- "top_p": self.config.get("top_p", 0.9),
295
- "num_predict": self.config.get("max_tokens", 2048)
327
+ "temperature": provider_config.get("temperature", 0.7),
328
+ "top_p": provider_config.get("top_p", 0.9),
329
+ "num_predict": provider_config.get("max_tokens", 2048)
296
330
  }
297
331
  }
298
332
 
@@ -320,7 +354,7 @@ class OllamaLLMService(BaseLLMService):
320
354
 
321
355
  # Track usage after streaming is complete (estimated)
322
356
  full_content = "".join(content_chunks)
323
- self._track_streaming_usage(messages, full_content)
357
+ await self._track_streaming_usage(messages, full_content)
324
358
 
325
359
  except Exception as e:
326
360
  logger.error(f"Error in streaming: {e}")
@@ -7,19 +7,18 @@ from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
7
7
  from openai import AsyncOpenAI
8
8
 
9
9
  from isa_model.inference.services.llm.base_llm_service import BaseLLMService
10
- from isa_model.inference.providers.base_provider import BaseProvider
11
- from isa_model.inference.billing_tracker import ServiceType
10
+ from ....core.types import ServiceType
12
11
 
13
12
  logger = logging.getLogger(__name__)
14
13
 
15
14
  class OpenAILLMService(BaseLLMService):
16
15
  """OpenAI LLM service implementation with unified invoke interface"""
17
16
 
18
- def __init__(self, provider: 'BaseProvider', model_name: str = "gpt-4.1-nano"):
19
- super().__init__(provider, model_name)
17
+ def __init__(self, model_name: str = "gpt-4o-mini", provider_name: str = "openai", **kwargs):
18
+ super().__init__(provider_name, model_name, **kwargs)
20
19
 
21
- # Get full configuration from provider (including sensitive data)
22
- provider_config = provider.get_full_config()
20
+ # Get configuration from centralized config manager
21
+ provider_config = self.get_provider_config()
23
22
 
24
23
  # Initialize AsyncOpenAI client with provider configuration
25
24
  try:
@@ -28,7 +27,7 @@ class OpenAILLMService(BaseLLMService):
28
27
 
29
28
  self.client = AsyncOpenAI(
30
29
  api_key=provider_config["api_key"],
31
- base_url=provider_config.get("base_url", "https://api.openai.com/v1"),
30
+ base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
32
31
  organization=provider_config.get("organization")
33
32
  )
34
33
 
@@ -44,7 +43,7 @@ class OpenAILLMService(BaseLLMService):
44
43
 
45
44
  def _create_bound_copy(self) -> 'OpenAILLMService':
46
45
  """Create a copy of this service for tool binding"""
47
- bound_service = OpenAILLMService(self.provider, self.model_name)
46
+ bound_service = OpenAILLMService(self.model_name, self.provider_name)
48
47
  bound_service._bound_tools = self._bound_tools.copy()
49
48
  return bound_service
50
49
 
@@ -82,11 +81,12 @@ class OpenAILLMService(BaseLLMService):
82
81
  messages = self._prepare_messages(input_data)
83
82
 
84
83
  # Prepare request kwargs
84
+ provider_config = self.get_provider_config()
85
85
  kwargs = {
86
86
  "model": self.model_name,
87
87
  "messages": messages,
88
- "temperature": self.config.get("temperature", 0.7),
89
- "max_tokens": self.config.get("max_tokens", 1024),
88
+ "temperature": provider_config.get("temperature", 0.7),
89
+ "max_tokens": provider_config.get("max_tokens", 1024),
90
90
  "stream": True
91
91
  }
92
92
 
@@ -125,11 +125,12 @@ class OpenAILLMService(BaseLLMService):
125
125
  messages = self._prepare_messages(input_data)
126
126
 
127
127
  # Prepare request kwargs
128
+ provider_config = self.get_provider_config()
128
129
  kwargs = {
129
130
  "model": self.model_name,
130
131
  "messages": messages,
131
- "temperature": self.config.get("temperature", 0.7),
132
- "max_tokens": self.config.get("max_tokens", 1024)
132
+ "temperature": provider_config.get("temperature", 0.7),
133
+ "max_tokens": provider_config.get("max_tokens", 1024)
133
134
  }
134
135
 
135
136
  # Add tools if bound using adapter manager
@@ -155,7 +156,7 @@ class OpenAILLMService(BaseLLMService):
155
156
  # Update usage tracking
156
157
  if response.usage:
157
158
  self._update_token_usage(response.usage)
158
- self._track_billing(response.usage)
159
+ await self._track_billing(response.usage)
159
160
 
160
161
  # Handle tool calls if present - let adapter process the complete message
161
162
  if message.tool_calls:
@@ -180,7 +181,14 @@ class OpenAILLMService(BaseLLMService):
180
181
 
181
182
  usage = MockUsage()
182
183
  self._update_token_usage(usage)
183
- self._track_billing(usage)
184
+ # Fire and forget async tracking
185
+ import asyncio
186
+ try:
187
+ loop = asyncio.get_event_loop()
188
+ loop.create_task(self._track_billing(usage))
189
+ except:
190
+ # If no event loop, skip tracking
191
+ pass
184
192
 
185
193
  async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
186
194
  """Handle streaming responses - DEPRECATED: Use astream() instead"""
@@ -214,16 +222,17 @@ class OpenAILLMService(BaseLLMService):
214
222
  self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
215
223
  self.total_token_usage["requests_count"] += 1
216
224
 
217
- def _track_billing(self, usage):
225
+ async def _track_billing(self, usage):
218
226
  """Track billing information"""
219
- self._track_usage(
227
+ provider_config = self.get_provider_config()
228
+ await self._track_usage(
220
229
  service_type=ServiceType.LLM,
221
230
  operation="chat",
222
231
  input_tokens=usage.prompt_tokens,
223
232
  output_tokens=usage.completion_tokens,
224
233
  metadata={
225
- "temperature": self.config.get("temperature", 0.7),
226
- "max_tokens": self.config.get("max_tokens", 1024)
234
+ "temperature": provider_config.get("temperature", 0.7),
235
+ "max_tokens": provider_config.get("max_tokens", 1024)
227
236
  }
228
237
  )
229
238
 
@@ -237,15 +246,57 @@ class OpenAILLMService(BaseLLMService):
237
246
 
238
247
  def get_model_info(self) -> Dict[str, Any]:
239
248
  """Get information about the current model"""
249
+ provider_config = self.get_provider_config()
240
250
  return {
241
251
  "name": self.model_name,
242
- "max_tokens": self.config.get("max_tokens", 1024),
252
+ "max_tokens": provider_config.get("max_tokens", 1024),
243
253
  "supports_streaming": True,
244
254
  "supports_functions": True,
245
255
  "provider": "openai"
246
256
  }
247
257
 
248
258
 
259
+ async def chat(
260
+ self,
261
+ input_data: Union[str, List[Dict[str, str]], Any],
262
+ max_tokens: Optional[int] = None
263
+ ) -> Dict[str, Any]:
264
+ """
265
+ Chat method that wraps ainvoke for compatibility with base class
266
+
267
+ Args:
268
+ input_data: Input messages
269
+ max_tokens: Maximum tokens to generate
270
+
271
+ Returns:
272
+ Dict containing chat response
273
+ """
274
+ try:
275
+ # Call ainvoke and get the response
276
+ response = await self.ainvoke(input_data)
277
+
278
+ # Return in expected format
279
+ return {
280
+ "text": response if isinstance(response, str) else str(response),
281
+ "success": True,
282
+ "metadata": {
283
+ "model": self.model_name,
284
+ "provider": self.provider_name,
285
+ "max_tokens": max_tokens or self.max_tokens
286
+ }
287
+ }
288
+ except Exception as e:
289
+ logger.error(f"Chat method failed: {e}")
290
+ return {
291
+ "text": "",
292
+ "success": False,
293
+ "error": str(e),
294
+ "metadata": {
295
+ "model": self.model_name,
296
+ "provider": self.provider_name
297
+ }
298
+ }
299
+
249
300
  async def close(self):
250
301
  """Close the backend client"""
251
302
  await self.client.close()
@@ -5,19 +5,17 @@ from typing import Dict, Any, List, Union, AsyncGenerator
5
5
  from openai import AsyncOpenAI
6
6
 
7
7
  from isa_model.inference.services.llm.base_llm_service import BaseLLMService
8
- from isa_model.inference.providers.base_provider import BaseProvider
9
- from isa_model.inference.billing_tracker import ServiceType
10
8
 
11
9
  logger = logging.getLogger(__name__)
12
10
 
13
11
  class YydsLLMService(BaseLLMService):
14
12
  """YYDS LLM service implementation with unified invoke interface"""
15
13
 
16
- def __init__(self, provider: 'BaseProvider', model_name: str = "claude-sonnet-4-20250514"):
17
- super().__init__(provider, model_name)
14
+ def __init__(self, provider_name: str, model_name: str = "claude-sonnet-4-20250514", **kwargs):
15
+ super().__init__(provider_name, model_name, **kwargs)
18
16
 
19
- # Get full configuration from provider (including sensitive data)
20
- provider_config = provider.get_full_config()
17
+ # Get configuration from centralized config manager
18
+ provider_config = self.get_provider_config()
21
19
 
22
20
  # Initialize AsyncOpenAI client with provider configuration
23
21
  try:
@@ -26,7 +24,7 @@ class YydsLLMService(BaseLLMService):
26
24
 
27
25
  self.client = AsyncOpenAI(
28
26
  api_key=provider_config["api_key"],
29
- base_url=provider_config.get("base_url", "https://api.yyds.com/v1"),
27
+ base_url=provider_config.get("base_url") or provider_config.get("api_base_url", "https://api.yyds.com/v1"),
30
28
  organization=provider_config.get("organization")
31
29
  )
32
30
 
@@ -42,7 +40,7 @@ class YydsLLMService(BaseLLMService):
42
40
 
43
41
  def _create_bound_copy(self) -> 'YydsLLMService':
44
42
  """Create a copy of this service for tool binding"""
45
- bound_service = YydsLLMService(self.provider, self.model_name)
43
+ bound_service = YydsLLMService(self.provider_name, self.model_name)
46
44
  bound_service._bound_tools = self._bound_tools.copy()
47
45
  return bound_service
48
46
 
@@ -80,11 +78,12 @@ class YydsLLMService(BaseLLMService):
80
78
  messages = self._prepare_messages(input_data)
81
79
 
82
80
  # Prepare request kwargs
81
+ provider_config = self.get_provider_config()
83
82
  kwargs = {
84
83
  "model": self.model_name,
85
84
  "messages": messages,
86
- "temperature": self.config.get("temperature", 0.7),
87
- "max_tokens": self.config.get("max_tokens", 1024),
85
+ "temperature": provider_config.get("temperature", 0.7),
86
+ "max_tokens": provider_config.get("max_tokens", 1024),
88
87
  "stream": True
89
88
  }
90
89
 
@@ -106,7 +105,7 @@ class YydsLLMService(BaseLLMService):
106
105
 
107
106
  # Track usage after streaming is complete
108
107
  full_content = "".join(content_chunks)
109
- self._track_streaming_usage(messages, full_content)
108
+ await self._track_streaming_usage(messages, full_content)
110
109
 
111
110
  except Exception as e:
112
111
  logger.error(f"Error in streaming: {e}")
@@ -123,11 +122,12 @@ class YydsLLMService(BaseLLMService):
123
122
  messages = self._prepare_messages(input_data)
124
123
 
125
124
  # Prepare request kwargs
125
+ provider_config = self.get_provider_config()
126
126
  kwargs = {
127
127
  "model": self.model_name,
128
128
  "messages": messages,
129
- "temperature": self.config.get("temperature", 0.7),
130
- "max_tokens": self.config.get("max_tokens", 1024)
129
+ "temperature": provider_config.get("temperature", 0.7),
130
+ "max_tokens": provider_config.get("max_tokens", 1024)
131
131
  }
132
132
 
133
133
  # Add tools if bound using adapter manager
@@ -153,7 +153,7 @@ class YydsLLMService(BaseLLMService):
153
153
  # Update usage tracking
154
154
  if response.usage:
155
155
  self._update_token_usage(response.usage)
156
- self._track_billing(response.usage)
156
+ await self._track_billing(response.usage)
157
157
 
158
158
  # Handle tool calls if present - let adapter process the complete message
159
159
  if message.tool_calls:
@@ -167,7 +167,7 @@ class YydsLLMService(BaseLLMService):
167
167
  logger.error(f"Error in ainvoke: {e}")
168
168
  raise
169
169
 
170
- def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
170
+ async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
171
171
  """Track usage for streaming requests (estimated)"""
172
172
  # Create a mock usage object for tracking
173
173
  class MockUsage:
@@ -178,7 +178,7 @@ class YydsLLMService(BaseLLMService):
178
178
 
179
179
  usage = MockUsage()
180
180
  self._update_token_usage(usage)
181
- self._track_billing(usage)
181
+ await self._track_billing(usage)
182
182
 
183
183
  async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
184
184
  """Handle streaming responses - DEPRECATED: Use astream() instead"""
@@ -212,16 +212,16 @@ class YydsLLMService(BaseLLMService):
212
212
  self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
213
213
  self.total_token_usage["requests_count"] += 1
214
214
 
215
- def _track_billing(self, usage):
216
- """Track billing information"""
217
- self._track_usage(
218
- service_type=ServiceType.LLM,
215
+ async def _track_billing(self, usage):
216
+ """Track billing information using unified billing system"""
217
+ provider_config = self.get_provider_config()
218
+ await self._track_llm_usage(
219
219
  operation="chat",
220
220
  input_tokens=usage.prompt_tokens,
221
221
  output_tokens=usage.completion_tokens,
222
222
  metadata={
223
- "temperature": self.config.get("temperature", 0.7),
224
- "max_tokens": self.config.get("max_tokens", 1024)
223
+ "temperature": provider_config.get("temperature", 0.7),
224
+ "max_tokens": provider_config.get("max_tokens", 1024)
225
225
  }
226
226
  )
227
227
 
@@ -235,9 +235,10 @@ class YydsLLMService(BaseLLMService):
235
235
 
236
236
  def get_model_info(self) -> Dict[str, Any]:
237
237
  """Get information about the current model"""
238
+ provider_config = self.get_provider_config()
238
239
  return {
239
240
  "name": self.model_name,
240
- "max_tokens": self.config.get("max_tokens", 1024),
241
+ "max_tokens": provider_config.get("max_tokens", 1024),
241
242
  "supports_streaming": True,
242
243
  "supports_functions": True,
243
244
  "provider": "yyds",
@@ -3,10 +3,44 @@
3
3
 
4
4
  """
5
5
  Vision服务包
6
- 包含所有视觉相关服务模块
6
+ 包含所有视觉相关服务模块,包括stacked services
7
7
  """
8
8
 
9
- # 导出ReplicateImageGenService
10
- from isa_model.inference.services.vision.replicate_image_gen_service import ReplicateImageGenService
9
+ # Vision understanding services
10
+ from .base_vision_service import BaseVisionService
11
+ from .openai_vision_service import OpenAIVisionService
12
+ from .replicate_vision_service import ReplicateVisionService
11
13
 
12
- __all__ = ["ReplicateImageGenService"]
14
+ # Stacked Vision Services
15
+ from .doc_analysis_service import DocAnalysisStackedService
16
+ from .ui_analysis_service import UIAnalysisService
17
+
18
+ # ISA Vision service
19
+ try:
20
+ from .isA_vision_service import ISAVisionService
21
+ ISA_VISION_AVAILABLE = True
22
+ except ImportError:
23
+ ISAVisionService = None
24
+ ISA_VISION_AVAILABLE = False
25
+
26
+ # Optional services - import only if available
27
+ try:
28
+ from .ollama_vision_service import OllamaVisionService
29
+ OLLAMA_VISION_AVAILABLE = True
30
+ except ImportError:
31
+ OllamaVisionService = None
32
+ OLLAMA_VISION_AVAILABLE = False
33
+
34
+ __all__ = [
35
+ "BaseVisionService",
36
+ "OpenAIVisionService",
37
+ "ReplicateVisionService",
38
+ "DocAnalysisStackedService",
39
+ "UIAnalysisService"
40
+ ]
41
+
42
+ if ISA_VISION_AVAILABLE:
43
+ __all__.append("ISAVisionService")
44
+
45
+ if OLLAMA_VISION_AVAILABLE:
46
+ __all__.append("OllamaVisionService")