isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +181 -605
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +202 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -3,19 +3,21 @@ import httpx
|
|
3
3
|
import json
|
4
4
|
from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
|
5
5
|
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
6
|
-
from isa_model.inference.providers.base_provider import BaseProvider
|
7
6
|
|
8
7
|
logger = logging.getLogger(__name__)
|
9
8
|
|
10
9
|
class OllamaLLMService(BaseLLMService):
|
11
10
|
"""Ollama LLM service with unified invoke interface and proper adapter support"""
|
12
11
|
|
13
|
-
def __init__(self,
|
14
|
-
super().__init__(
|
12
|
+
def __init__(self, provider_name: str, model_name: str = "llama3.2:3b-instruct-fp16", **kwargs):
|
13
|
+
super().__init__(provider_name, model_name, **kwargs)
|
14
|
+
|
15
|
+
# Get configuration from centralized config manager
|
16
|
+
provider_config = self.get_provider_config()
|
15
17
|
|
16
18
|
# Create HTTP client for Ollama API
|
17
|
-
base_url =
|
18
|
-
timeout =
|
19
|
+
base_url = provider_config.get("base_url", "http://localhost:11434")
|
20
|
+
timeout = provider_config.get("timeout", 60)
|
19
21
|
|
20
22
|
self.client = httpx.AsyncClient(
|
21
23
|
base_url=base_url,
|
@@ -31,13 +33,14 @@ class OllamaLLMService(BaseLLMService):
|
|
31
33
|
def _ensure_client(self):
|
32
34
|
"""Ensure the HTTP client is available and not closed"""
|
33
35
|
if not hasattr(self, 'client') or not self.client or self.client.is_closed:
|
34
|
-
|
35
|
-
|
36
|
+
provider_config = self.get_provider_config()
|
37
|
+
base_url = provider_config.get("base_url", "http://localhost:11434")
|
38
|
+
timeout = provider_config.get("timeout", 60)
|
36
39
|
self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
|
37
40
|
|
38
41
|
def _create_bound_copy(self) -> 'OllamaLLMService':
|
39
42
|
"""Create a copy of this service for tool binding"""
|
40
|
-
bound_service = OllamaLLMService(self.
|
43
|
+
bound_service = OllamaLLMService(self.provider_name, self.model_name)
|
41
44
|
bound_service._bound_tools = self._bound_tools.copy()
|
42
45
|
return bound_service
|
43
46
|
|
@@ -70,14 +73,15 @@ class OllamaLLMService(BaseLLMService):
|
|
70
73
|
messages = self._prepare_messages(input_data)
|
71
74
|
|
72
75
|
# Prepare request parameters
|
76
|
+
provider_config = self.get_provider_config()
|
73
77
|
payload = {
|
74
78
|
"model": self.model_name,
|
75
79
|
"messages": messages,
|
76
80
|
"stream": self.streaming,
|
77
81
|
"options": {
|
78
|
-
"temperature":
|
79
|
-
"top_p":
|
80
|
-
"num_predict":
|
82
|
+
"temperature": provider_config.get("temperature", 0.7),
|
83
|
+
"top_p": provider_config.get("top_p", 0.9),
|
84
|
+
"num_predict": provider_config.get("max_tokens", 2048)
|
81
85
|
}
|
82
86
|
}
|
83
87
|
|
@@ -104,6 +108,7 @@ class OllamaLLMService(BaseLLMService):
|
|
104
108
|
# Update token usage if available
|
105
109
|
if "eval_count" in result:
|
106
110
|
self._update_token_usage(result)
|
111
|
+
await self._track_ollama_billing(result)
|
107
112
|
|
108
113
|
# Handle tool calls if present - let adapter process the complete message
|
109
114
|
message = result["message"]
|
@@ -196,7 +201,7 @@ class OllamaLLMService(BaseLLMService):
|
|
196
201
|
# Get final response from the model
|
197
202
|
return await self.ainvoke(messages)
|
198
203
|
|
199
|
-
def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
|
204
|
+
async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
|
200
205
|
"""Track usage for streaming requests (estimated)"""
|
201
206
|
# Create a mock usage object for tracking
|
202
207
|
class MockUsage:
|
@@ -207,6 +212,18 @@ class OllamaLLMService(BaseLLMService):
|
|
207
212
|
|
208
213
|
usage = MockUsage()
|
209
214
|
self._update_token_usage_from_mock(usage)
|
215
|
+
|
216
|
+
# Track billing
|
217
|
+
await self._track_llm_usage(
|
218
|
+
operation="chat_stream",
|
219
|
+
input_tokens=usage.prompt_tokens,
|
220
|
+
output_tokens=usage.completion_tokens,
|
221
|
+
metadata={
|
222
|
+
"model": self.model_name,
|
223
|
+
"provider": "ollama",
|
224
|
+
"streaming": True
|
225
|
+
}
|
226
|
+
)
|
210
227
|
|
211
228
|
def _update_token_usage_from_mock(self, usage):
|
212
229
|
"""Update token usage statistics from mock usage object"""
|
@@ -236,6 +253,21 @@ class OllamaLLMService(BaseLLMService):
|
|
236
253
|
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
237
254
|
self.total_token_usage["requests_count"] += 1
|
238
255
|
|
256
|
+
async def _track_ollama_billing(self, result: Dict[str, Any]):
|
257
|
+
"""Track billing information for Ollama requests"""
|
258
|
+
prompt_tokens = result.get("prompt_eval_count", 0)
|
259
|
+
completion_tokens = result.get("eval_count", 0)
|
260
|
+
|
261
|
+
await self._track_llm_usage(
|
262
|
+
operation="chat",
|
263
|
+
input_tokens=prompt_tokens,
|
264
|
+
output_tokens=completion_tokens,
|
265
|
+
metadata={
|
266
|
+
"model": self.model_name,
|
267
|
+
"provider": "ollama"
|
268
|
+
}
|
269
|
+
)
|
270
|
+
|
239
271
|
def get_token_usage(self) -> Dict[str, Any]:
|
240
272
|
"""Get total token usage statistics"""
|
241
273
|
return self.total_token_usage
|
@@ -246,9 +278,10 @@ class OllamaLLMService(BaseLLMService):
|
|
246
278
|
|
247
279
|
def get_model_info(self) -> Dict[str, Any]:
|
248
280
|
"""Get information about the current model"""
|
281
|
+
provider_config = self.get_provider_config()
|
249
282
|
return {
|
250
283
|
"name": self.model_name,
|
251
|
-
"max_tokens":
|
284
|
+
"max_tokens": provider_config.get("max_tokens", 2048),
|
252
285
|
"supports_streaming": True,
|
253
286
|
"supports_functions": True,
|
254
287
|
"provider": "ollama"
|
@@ -285,14 +318,15 @@ class OllamaLLMService(BaseLLMService):
|
|
285
318
|
messages = self._prepare_messages(input_data)
|
286
319
|
|
287
320
|
# Prepare request parameters for streaming
|
321
|
+
provider_config = self.get_provider_config()
|
288
322
|
payload = {
|
289
323
|
"model": self.model_name,
|
290
324
|
"messages": messages,
|
291
325
|
"stream": True, # Force streaming for astream
|
292
326
|
"options": {
|
293
|
-
"temperature":
|
294
|
-
"top_p":
|
295
|
-
"num_predict":
|
327
|
+
"temperature": provider_config.get("temperature", 0.7),
|
328
|
+
"top_p": provider_config.get("top_p", 0.9),
|
329
|
+
"num_predict": provider_config.get("max_tokens", 2048)
|
296
330
|
}
|
297
331
|
}
|
298
332
|
|
@@ -320,7 +354,7 @@ class OllamaLLMService(BaseLLMService):
|
|
320
354
|
|
321
355
|
# Track usage after streaming is complete (estimated)
|
322
356
|
full_content = "".join(content_chunks)
|
323
|
-
self._track_streaming_usage(messages, full_content)
|
357
|
+
await self._track_streaming_usage(messages, full_content)
|
324
358
|
|
325
359
|
except Exception as e:
|
326
360
|
logger.error(f"Error in streaming: {e}")
|
@@ -7,19 +7,18 @@ from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
|
|
7
7
|
from openai import AsyncOpenAI
|
8
8
|
|
9
9
|
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
10
|
-
from
|
11
|
-
from isa_model.inference.billing_tracker import ServiceType
|
10
|
+
from ....core.types import ServiceType
|
12
11
|
|
13
12
|
logger = logging.getLogger(__name__)
|
14
13
|
|
15
14
|
class OpenAILLMService(BaseLLMService):
|
16
15
|
"""OpenAI LLM service implementation with unified invoke interface"""
|
17
16
|
|
18
|
-
def __init__(self,
|
19
|
-
super().__init__(
|
17
|
+
def __init__(self, model_name: str = "gpt-4o-mini", provider_name: str = "openai", **kwargs):
|
18
|
+
super().__init__(provider_name, model_name, **kwargs)
|
20
19
|
|
21
|
-
# Get
|
22
|
-
provider_config =
|
20
|
+
# Get configuration from centralized config manager
|
21
|
+
provider_config = self.get_provider_config()
|
23
22
|
|
24
23
|
# Initialize AsyncOpenAI client with provider configuration
|
25
24
|
try:
|
@@ -28,7 +27,7 @@ class OpenAILLMService(BaseLLMService):
|
|
28
27
|
|
29
28
|
self.client = AsyncOpenAI(
|
30
29
|
api_key=provider_config["api_key"],
|
31
|
-
base_url=provider_config.get("
|
30
|
+
base_url=provider_config.get("api_base_url", "https://api.openai.com/v1"),
|
32
31
|
organization=provider_config.get("organization")
|
33
32
|
)
|
34
33
|
|
@@ -44,7 +43,7 @@ class OpenAILLMService(BaseLLMService):
|
|
44
43
|
|
45
44
|
def _create_bound_copy(self) -> 'OpenAILLMService':
|
46
45
|
"""Create a copy of this service for tool binding"""
|
47
|
-
bound_service = OpenAILLMService(self.
|
46
|
+
bound_service = OpenAILLMService(self.model_name, self.provider_name)
|
48
47
|
bound_service._bound_tools = self._bound_tools.copy()
|
49
48
|
return bound_service
|
50
49
|
|
@@ -82,11 +81,12 @@ class OpenAILLMService(BaseLLMService):
|
|
82
81
|
messages = self._prepare_messages(input_data)
|
83
82
|
|
84
83
|
# Prepare request kwargs
|
84
|
+
provider_config = self.get_provider_config()
|
85
85
|
kwargs = {
|
86
86
|
"model": self.model_name,
|
87
87
|
"messages": messages,
|
88
|
-
"temperature":
|
89
|
-
"max_tokens":
|
88
|
+
"temperature": provider_config.get("temperature", 0.7),
|
89
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
90
90
|
"stream": True
|
91
91
|
}
|
92
92
|
|
@@ -125,11 +125,12 @@ class OpenAILLMService(BaseLLMService):
|
|
125
125
|
messages = self._prepare_messages(input_data)
|
126
126
|
|
127
127
|
# Prepare request kwargs
|
128
|
+
provider_config = self.get_provider_config()
|
128
129
|
kwargs = {
|
129
130
|
"model": self.model_name,
|
130
131
|
"messages": messages,
|
131
|
-
"temperature":
|
132
|
-
"max_tokens":
|
132
|
+
"temperature": provider_config.get("temperature", 0.7),
|
133
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
133
134
|
}
|
134
135
|
|
135
136
|
# Add tools if bound using adapter manager
|
@@ -155,7 +156,7 @@ class OpenAILLMService(BaseLLMService):
|
|
155
156
|
# Update usage tracking
|
156
157
|
if response.usage:
|
157
158
|
self._update_token_usage(response.usage)
|
158
|
-
self._track_billing(response.usage)
|
159
|
+
await self._track_billing(response.usage)
|
159
160
|
|
160
161
|
# Handle tool calls if present - let adapter process the complete message
|
161
162
|
if message.tool_calls:
|
@@ -180,7 +181,14 @@ class OpenAILLMService(BaseLLMService):
|
|
180
181
|
|
181
182
|
usage = MockUsage()
|
182
183
|
self._update_token_usage(usage)
|
183
|
-
|
184
|
+
# Fire and forget async tracking
|
185
|
+
import asyncio
|
186
|
+
try:
|
187
|
+
loop = asyncio.get_event_loop()
|
188
|
+
loop.create_task(self._track_billing(usage))
|
189
|
+
except:
|
190
|
+
# If no event loop, skip tracking
|
191
|
+
pass
|
184
192
|
|
185
193
|
async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
|
186
194
|
"""Handle streaming responses - DEPRECATED: Use astream() instead"""
|
@@ -214,16 +222,17 @@ class OpenAILLMService(BaseLLMService):
|
|
214
222
|
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
215
223
|
self.total_token_usage["requests_count"] += 1
|
216
224
|
|
217
|
-
def _track_billing(self, usage):
|
225
|
+
async def _track_billing(self, usage):
|
218
226
|
"""Track billing information"""
|
219
|
-
self.
|
227
|
+
provider_config = self.get_provider_config()
|
228
|
+
await self._track_usage(
|
220
229
|
service_type=ServiceType.LLM,
|
221
230
|
operation="chat",
|
222
231
|
input_tokens=usage.prompt_tokens,
|
223
232
|
output_tokens=usage.completion_tokens,
|
224
233
|
metadata={
|
225
|
-
"temperature":
|
226
|
-
"max_tokens":
|
234
|
+
"temperature": provider_config.get("temperature", 0.7),
|
235
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
227
236
|
}
|
228
237
|
)
|
229
238
|
|
@@ -237,15 +246,57 @@ class OpenAILLMService(BaseLLMService):
|
|
237
246
|
|
238
247
|
def get_model_info(self) -> Dict[str, Any]:
|
239
248
|
"""Get information about the current model"""
|
249
|
+
provider_config = self.get_provider_config()
|
240
250
|
return {
|
241
251
|
"name": self.model_name,
|
242
|
-
"max_tokens":
|
252
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
243
253
|
"supports_streaming": True,
|
244
254
|
"supports_functions": True,
|
245
255
|
"provider": "openai"
|
246
256
|
}
|
247
257
|
|
248
258
|
|
259
|
+
async def chat(
|
260
|
+
self,
|
261
|
+
input_data: Union[str, List[Dict[str, str]], Any],
|
262
|
+
max_tokens: Optional[int] = None
|
263
|
+
) -> Dict[str, Any]:
|
264
|
+
"""
|
265
|
+
Chat method that wraps ainvoke for compatibility with base class
|
266
|
+
|
267
|
+
Args:
|
268
|
+
input_data: Input messages
|
269
|
+
max_tokens: Maximum tokens to generate
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
Dict containing chat response
|
273
|
+
"""
|
274
|
+
try:
|
275
|
+
# Call ainvoke and get the response
|
276
|
+
response = await self.ainvoke(input_data)
|
277
|
+
|
278
|
+
# Return in expected format
|
279
|
+
return {
|
280
|
+
"text": response if isinstance(response, str) else str(response),
|
281
|
+
"success": True,
|
282
|
+
"metadata": {
|
283
|
+
"model": self.model_name,
|
284
|
+
"provider": self.provider_name,
|
285
|
+
"max_tokens": max_tokens or self.max_tokens
|
286
|
+
}
|
287
|
+
}
|
288
|
+
except Exception as e:
|
289
|
+
logger.error(f"Chat method failed: {e}")
|
290
|
+
return {
|
291
|
+
"text": "",
|
292
|
+
"success": False,
|
293
|
+
"error": str(e),
|
294
|
+
"metadata": {
|
295
|
+
"model": self.model_name,
|
296
|
+
"provider": self.provider_name
|
297
|
+
}
|
298
|
+
}
|
299
|
+
|
249
300
|
async def close(self):
|
250
301
|
"""Close the backend client"""
|
251
302
|
await self.client.close()
|
@@ -5,19 +5,17 @@ from typing import Dict, Any, List, Union, AsyncGenerator
|
|
5
5
|
from openai import AsyncOpenAI
|
6
6
|
|
7
7
|
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
8
|
-
from isa_model.inference.providers.base_provider import BaseProvider
|
9
|
-
from isa_model.inference.billing_tracker import ServiceType
|
10
8
|
|
11
9
|
logger = logging.getLogger(__name__)
|
12
10
|
|
13
11
|
class YydsLLMService(BaseLLMService):
|
14
12
|
"""YYDS LLM service implementation with unified invoke interface"""
|
15
13
|
|
16
|
-
def __init__(self,
|
17
|
-
super().__init__(
|
14
|
+
def __init__(self, provider_name: str, model_name: str = "claude-sonnet-4-20250514", **kwargs):
|
15
|
+
super().__init__(provider_name, model_name, **kwargs)
|
18
16
|
|
19
|
-
# Get
|
20
|
-
provider_config =
|
17
|
+
# Get configuration from centralized config manager
|
18
|
+
provider_config = self.get_provider_config()
|
21
19
|
|
22
20
|
# Initialize AsyncOpenAI client with provider configuration
|
23
21
|
try:
|
@@ -26,7 +24,7 @@ class YydsLLMService(BaseLLMService):
|
|
26
24
|
|
27
25
|
self.client = AsyncOpenAI(
|
28
26
|
api_key=provider_config["api_key"],
|
29
|
-
base_url=provider_config.get("base_url", "https://api.yyds.com/v1"),
|
27
|
+
base_url=provider_config.get("base_url") or provider_config.get("api_base_url", "https://api.yyds.com/v1"),
|
30
28
|
organization=provider_config.get("organization")
|
31
29
|
)
|
32
30
|
|
@@ -42,7 +40,7 @@ class YydsLLMService(BaseLLMService):
|
|
42
40
|
|
43
41
|
def _create_bound_copy(self) -> 'YydsLLMService':
|
44
42
|
"""Create a copy of this service for tool binding"""
|
45
|
-
bound_service = YydsLLMService(self.
|
43
|
+
bound_service = YydsLLMService(self.provider_name, self.model_name)
|
46
44
|
bound_service._bound_tools = self._bound_tools.copy()
|
47
45
|
return bound_service
|
48
46
|
|
@@ -80,11 +78,12 @@ class YydsLLMService(BaseLLMService):
|
|
80
78
|
messages = self._prepare_messages(input_data)
|
81
79
|
|
82
80
|
# Prepare request kwargs
|
81
|
+
provider_config = self.get_provider_config()
|
83
82
|
kwargs = {
|
84
83
|
"model": self.model_name,
|
85
84
|
"messages": messages,
|
86
|
-
"temperature":
|
87
|
-
"max_tokens":
|
85
|
+
"temperature": provider_config.get("temperature", 0.7),
|
86
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
88
87
|
"stream": True
|
89
88
|
}
|
90
89
|
|
@@ -106,7 +105,7 @@ class YydsLLMService(BaseLLMService):
|
|
106
105
|
|
107
106
|
# Track usage after streaming is complete
|
108
107
|
full_content = "".join(content_chunks)
|
109
|
-
self._track_streaming_usage(messages, full_content)
|
108
|
+
await self._track_streaming_usage(messages, full_content)
|
110
109
|
|
111
110
|
except Exception as e:
|
112
111
|
logger.error(f"Error in streaming: {e}")
|
@@ -123,11 +122,12 @@ class YydsLLMService(BaseLLMService):
|
|
123
122
|
messages = self._prepare_messages(input_data)
|
124
123
|
|
125
124
|
# Prepare request kwargs
|
125
|
+
provider_config = self.get_provider_config()
|
126
126
|
kwargs = {
|
127
127
|
"model": self.model_name,
|
128
128
|
"messages": messages,
|
129
|
-
"temperature":
|
130
|
-
"max_tokens":
|
129
|
+
"temperature": provider_config.get("temperature", 0.7),
|
130
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
131
131
|
}
|
132
132
|
|
133
133
|
# Add tools if bound using adapter manager
|
@@ -153,7 +153,7 @@ class YydsLLMService(BaseLLMService):
|
|
153
153
|
# Update usage tracking
|
154
154
|
if response.usage:
|
155
155
|
self._update_token_usage(response.usage)
|
156
|
-
self._track_billing(response.usage)
|
156
|
+
await self._track_billing(response.usage)
|
157
157
|
|
158
158
|
# Handle tool calls if present - let adapter process the complete message
|
159
159
|
if message.tool_calls:
|
@@ -167,7 +167,7 @@ class YydsLLMService(BaseLLMService):
|
|
167
167
|
logger.error(f"Error in ainvoke: {e}")
|
168
168
|
raise
|
169
169
|
|
170
|
-
def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
|
170
|
+
async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
|
171
171
|
"""Track usage for streaming requests (estimated)"""
|
172
172
|
# Create a mock usage object for tracking
|
173
173
|
class MockUsage:
|
@@ -178,7 +178,7 @@ class YydsLLMService(BaseLLMService):
|
|
178
178
|
|
179
179
|
usage = MockUsage()
|
180
180
|
self._update_token_usage(usage)
|
181
|
-
self._track_billing(usage)
|
181
|
+
await self._track_billing(usage)
|
182
182
|
|
183
183
|
async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
|
184
184
|
"""Handle streaming responses - DEPRECATED: Use astream() instead"""
|
@@ -212,16 +212,16 @@ class YydsLLMService(BaseLLMService):
|
|
212
212
|
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
213
213
|
self.total_token_usage["requests_count"] += 1
|
214
214
|
|
215
|
-
def _track_billing(self, usage):
|
216
|
-
"""Track billing information"""
|
217
|
-
self.
|
218
|
-
|
215
|
+
async def _track_billing(self, usage):
|
216
|
+
"""Track billing information using unified billing system"""
|
217
|
+
provider_config = self.get_provider_config()
|
218
|
+
await self._track_llm_usage(
|
219
219
|
operation="chat",
|
220
220
|
input_tokens=usage.prompt_tokens,
|
221
221
|
output_tokens=usage.completion_tokens,
|
222
222
|
metadata={
|
223
|
-
"temperature":
|
224
|
-
"max_tokens":
|
223
|
+
"temperature": provider_config.get("temperature", 0.7),
|
224
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
225
225
|
}
|
226
226
|
)
|
227
227
|
|
@@ -235,9 +235,10 @@ class YydsLLMService(BaseLLMService):
|
|
235
235
|
|
236
236
|
def get_model_info(self) -> Dict[str, Any]:
|
237
237
|
"""Get information about the current model"""
|
238
|
+
provider_config = self.get_provider_config()
|
238
239
|
return {
|
239
240
|
"name": self.model_name,
|
240
|
-
"max_tokens":
|
241
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
241
242
|
"supports_streaming": True,
|
242
243
|
"supports_functions": True,
|
243
244
|
"provider": "yyds",
|
@@ -3,10 +3,44 @@
|
|
3
3
|
|
4
4
|
"""
|
5
5
|
Vision服务包
|
6
|
-
|
6
|
+
包含所有视觉相关服务模块,包括stacked services
|
7
7
|
"""
|
8
8
|
|
9
|
-
#
|
10
|
-
from
|
9
|
+
# Vision understanding services
|
10
|
+
from .base_vision_service import BaseVisionService
|
11
|
+
from .openai_vision_service import OpenAIVisionService
|
12
|
+
from .replicate_vision_service import ReplicateVisionService
|
11
13
|
|
12
|
-
|
14
|
+
# Stacked Vision Services
|
15
|
+
from .doc_analysis_service import DocAnalysisStackedService
|
16
|
+
from .ui_analysis_service import UIAnalysisService
|
17
|
+
|
18
|
+
# ISA Vision service
|
19
|
+
try:
|
20
|
+
from .isA_vision_service import ISAVisionService
|
21
|
+
ISA_VISION_AVAILABLE = True
|
22
|
+
except ImportError:
|
23
|
+
ISAVisionService = None
|
24
|
+
ISA_VISION_AVAILABLE = False
|
25
|
+
|
26
|
+
# Optional services - import only if available
|
27
|
+
try:
|
28
|
+
from .ollama_vision_service import OllamaVisionService
|
29
|
+
OLLAMA_VISION_AVAILABLE = True
|
30
|
+
except ImportError:
|
31
|
+
OllamaVisionService = None
|
32
|
+
OLLAMA_VISION_AVAILABLE = False
|
33
|
+
|
34
|
+
__all__ = [
|
35
|
+
"BaseVisionService",
|
36
|
+
"OpenAIVisionService",
|
37
|
+
"ReplicateVisionService",
|
38
|
+
"DocAnalysisStackedService",
|
39
|
+
"UIAnalysisService"
|
40
|
+
]
|
41
|
+
|
42
|
+
if ISA_VISION_AVAILABLE:
|
43
|
+
__all__.append("ISAVisionService")
|
44
|
+
|
45
|
+
if OLLAMA_VISION_AVAILABLE:
|
46
|
+
__all__.append("OllamaVisionService")
|