isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/models/model_repo.py +343 -0
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/__init__.py +9 -0
- isa_model/deployment/cloud/modal/__init__.py +10 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +187 -387
- isa_model/inference/providers/modal_provider.py +109 -0
- isa_model/inference/providers/yyds_provider.py +108 -0
- isa_model/inference/services/__init__.py +2 -1
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -55
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
- isa_model/inference/services/img/flux_professional_service.py +603 -0
- isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +519 -35
- isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +150 -15
- isa_model/inference/services/llm/openai_llm_service.py +134 -31
- isa_model/inference/services/llm/yyds_llm_service.py +255 -0
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +241 -96
- isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
- isa_model/inference/services/vision/doc_analysis_service.py +640 -0
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +109 -170
- isa_model/inference/services/vision/replicate_vision_service.py +508 -0
- isa_model/inference/services/vision/ui_analysis_service.py +823 -0
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/__init__.py +19 -0
- isa_model/serving/api/__init__.py +10 -0
- isa_model/serving/api/fastapi_server.py +89 -0
- isa_model/serving/api/middleware/__init__.py +9 -0
- isa_model/serving/api/middleware/request_logger.py +88 -0
- isa_model/serving/api/routes/__init__.py +5 -0
- isa_model/serving/api/routes/health.py +82 -0
- isa_model/serving/api/routes/llm.py +19 -0
- isa_model/serving/api/routes/ui_analysis.py +223 -0
- isa_model/serving/api/routes/unified.py +202 -0
- isa_model/serving/api/routes/vision.py +19 -0
- isa_model/serving/api/schemas/__init__.py +17 -0
- isa_model/serving/api/schemas/common.py +33 -0
- isa_model/serving/api/schemas/ui_analysis.py +78 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- isa_model-0.3.6.dist-info/RECORD +147 -0
- isa_model/core/model_manager.py +0 -208
- isa_model/core/model_registry.py +0 -342
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- isa_model-0.3.4.dist-info/RECORD +0 -91
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,255 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Dict, Any, List, Union, AsyncGenerator
|
3
|
+
|
4
|
+
# (�� OpenAI �
|
5
|
+
from openai import AsyncOpenAI
|
6
|
+
|
7
|
+
from isa_model.inference.services.llm.base_llm_service import BaseLLMService
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
class YydsLLMService(BaseLLMService):
|
12
|
+
"""YYDS LLM service implementation with unified invoke interface"""
|
13
|
+
|
14
|
+
def __init__(self, provider_name: str, model_name: str = "claude-sonnet-4-20250514", **kwargs):
|
15
|
+
super().__init__(provider_name, model_name, **kwargs)
|
16
|
+
|
17
|
+
# Get configuration from centralized config manager
|
18
|
+
provider_config = self.get_provider_config()
|
19
|
+
|
20
|
+
# Initialize AsyncOpenAI client with provider configuration
|
21
|
+
try:
|
22
|
+
if not provider_config.get("api_key"):
|
23
|
+
raise ValueError("YYDS API key not found in provider configuration")
|
24
|
+
|
25
|
+
self.client = AsyncOpenAI(
|
26
|
+
api_key=provider_config["api_key"],
|
27
|
+
base_url=provider_config.get("base_url") or provider_config.get("api_base_url", "https://api.yyds.com/v1"),
|
28
|
+
organization=provider_config.get("organization")
|
29
|
+
)
|
30
|
+
|
31
|
+
logger.info(f"Initialized YydsLLMService with model {self.model_name} and endpoint {self.client.base_url}")
|
32
|
+
|
33
|
+
except Exception as e:
|
34
|
+
logger.error(f"Failed to initialize YYDS client: {e}")
|
35
|
+
raise ValueError(f"Failed to initialize YYDS client. Check your API key configuration: {e}") from e
|
36
|
+
|
37
|
+
self.last_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
38
|
+
self.total_token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "requests_count": 0}
|
39
|
+
|
40
|
+
|
41
|
+
def _create_bound_copy(self) -> 'YydsLLMService':
|
42
|
+
"""Create a copy of this service for tool binding"""
|
43
|
+
bound_service = YydsLLMService(self.provider_name, self.model_name)
|
44
|
+
bound_service._bound_tools = self._bound_tools.copy()
|
45
|
+
return bound_service
|
46
|
+
|
47
|
+
def bind_tools(self, tools: List[Any], **kwargs) -> 'YydsLLMService':
|
48
|
+
"""
|
49
|
+
Bind tools to this LLM service for function calling
|
50
|
+
|
51
|
+
Args:
|
52
|
+
tools: List of tools (functions, dicts, or LangChain tools)
|
53
|
+
**kwargs: Additional arguments for tool binding
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
New LLM service instance with tools bound
|
57
|
+
"""
|
58
|
+
# Create a copy of this service
|
59
|
+
bound_service = self._create_bound_copy()
|
60
|
+
|
61
|
+
# Use base class method to bind tools
|
62
|
+
bound_service._bound_tools = tools
|
63
|
+
|
64
|
+
return bound_service
|
65
|
+
|
66
|
+
async def astream(self, input_data: Union[str, List[Dict[str, str]], Any]) -> AsyncGenerator[str, None]:
|
67
|
+
"""
|
68
|
+
True streaming method - yields tokens one by one as they arrive
|
69
|
+
|
70
|
+
Args:
|
71
|
+
input_data: Same as ainvoke
|
72
|
+
|
73
|
+
Yields:
|
74
|
+
Individual tokens as they arrive from the API
|
75
|
+
"""
|
76
|
+
try:
|
77
|
+
# Use adapter manager to prepare messages
|
78
|
+
messages = self._prepare_messages(input_data)
|
79
|
+
|
80
|
+
# Prepare request kwargs
|
81
|
+
provider_config = self.get_provider_config()
|
82
|
+
kwargs = {
|
83
|
+
"model": self.model_name,
|
84
|
+
"messages": messages,
|
85
|
+
"temperature": provider_config.get("temperature", 0.7),
|
86
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
87
|
+
"stream": True
|
88
|
+
}
|
89
|
+
|
90
|
+
# Add tools if bound using adapter manager
|
91
|
+
tool_schemas = await self._prepare_tools_for_request()
|
92
|
+
if tool_schemas:
|
93
|
+
kwargs["tools"] = tool_schemas
|
94
|
+
kwargs["tool_choice"] = "auto"
|
95
|
+
|
96
|
+
# Stream tokens one by one
|
97
|
+
content_chunks = []
|
98
|
+
try:
|
99
|
+
stream = await self.client.chat.completions.create(**kwargs)
|
100
|
+
async for chunk in stream:
|
101
|
+
content = chunk.choices[0].delta.content
|
102
|
+
if content:
|
103
|
+
content_chunks.append(content)
|
104
|
+
yield content
|
105
|
+
|
106
|
+
# Track usage after streaming is complete
|
107
|
+
full_content = "".join(content_chunks)
|
108
|
+
await self._track_streaming_usage(messages, full_content)
|
109
|
+
|
110
|
+
except Exception as e:
|
111
|
+
logger.error(f"Error in streaming: {e}")
|
112
|
+
raise
|
113
|
+
|
114
|
+
except Exception as e:
|
115
|
+
logger.error(f"Error in astream: {e}")
|
116
|
+
raise
|
117
|
+
|
118
|
+
async def ainvoke(self, input_data: Union[str, List[Dict[str, str]], Any]) -> Union[str, Any]:
|
119
|
+
"""Unified invoke method for all input types"""
|
120
|
+
try:
|
121
|
+
# Use adapter manager to prepare messages
|
122
|
+
messages = self._prepare_messages(input_data)
|
123
|
+
|
124
|
+
# Prepare request kwargs
|
125
|
+
provider_config = self.get_provider_config()
|
126
|
+
kwargs = {
|
127
|
+
"model": self.model_name,
|
128
|
+
"messages": messages,
|
129
|
+
"temperature": provider_config.get("temperature", 0.7),
|
130
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
131
|
+
}
|
132
|
+
|
133
|
+
# Add tools if bound using adapter manager
|
134
|
+
tool_schemas = await self._prepare_tools_for_request()
|
135
|
+
if tool_schemas:
|
136
|
+
kwargs["tools"] = tool_schemas
|
137
|
+
kwargs["tool_choice"] = "auto"
|
138
|
+
|
139
|
+
# Handle streaming vs non-streaming
|
140
|
+
if self.streaming:
|
141
|
+
# TRUE STREAMING MODE - collect all chunks from the stream
|
142
|
+
content_chunks = []
|
143
|
+
async for token in self.astream(input_data):
|
144
|
+
content_chunks.append(token)
|
145
|
+
content = "".join(content_chunks)
|
146
|
+
|
147
|
+
return self._format_response(content, input_data)
|
148
|
+
else:
|
149
|
+
# Non-streaming mode
|
150
|
+
response = await self.client.chat.completions.create(**kwargs)
|
151
|
+
message = response.choices[0].message
|
152
|
+
|
153
|
+
# Update usage tracking
|
154
|
+
if response.usage:
|
155
|
+
self._update_token_usage(response.usage)
|
156
|
+
await self._track_billing(response.usage)
|
157
|
+
|
158
|
+
# Handle tool calls if present - let adapter process the complete message
|
159
|
+
if message.tool_calls:
|
160
|
+
# Pass the complete message object to adapter for proper tool_calls handling
|
161
|
+
return self._format_response(message, input_data)
|
162
|
+
|
163
|
+
# Return appropriate format based on input type
|
164
|
+
return self._format_response(message.content or "", input_data)
|
165
|
+
|
166
|
+
except Exception as e:
|
167
|
+
logger.error(f"Error in ainvoke: {e}")
|
168
|
+
raise
|
169
|
+
|
170
|
+
async def _track_streaming_usage(self, messages: List[Dict[str, str]], content: str):
|
171
|
+
"""Track usage for streaming requests (estimated)"""
|
172
|
+
# Create a mock usage object for tracking
|
173
|
+
class MockUsage:
|
174
|
+
def __init__(self):
|
175
|
+
self.prompt_tokens = len(str(messages)) // 4 # Rough estimate
|
176
|
+
self.completion_tokens = len(content) // 4 # Rough estimate
|
177
|
+
self.total_tokens = self.prompt_tokens + self.completion_tokens
|
178
|
+
|
179
|
+
usage = MockUsage()
|
180
|
+
self._update_token_usage(usage)
|
181
|
+
await self._track_billing(usage)
|
182
|
+
|
183
|
+
async def _stream_response(self, kwargs: Dict[str, Any]) -> AsyncGenerator[str, None]:
|
184
|
+
"""Handle streaming responses - DEPRECATED: Use astream() instead"""
|
185
|
+
kwargs["stream"] = True
|
186
|
+
|
187
|
+
async def stream_generator():
|
188
|
+
try:
|
189
|
+
stream = await self.client.chat.completions.create(**kwargs)
|
190
|
+
async for chunk in stream:
|
191
|
+
content = chunk.choices[0].delta.content
|
192
|
+
if content:
|
193
|
+
yield content
|
194
|
+
except Exception as e:
|
195
|
+
logger.error(f"Error in streaming: {e}")
|
196
|
+
raise
|
197
|
+
|
198
|
+
return stream_generator()
|
199
|
+
|
200
|
+
|
201
|
+
def _update_token_usage(self, usage):
|
202
|
+
"""Update token usage statistics"""
|
203
|
+
self.last_token_usage = {
|
204
|
+
"prompt_tokens": usage.prompt_tokens,
|
205
|
+
"completion_tokens": usage.completion_tokens,
|
206
|
+
"total_tokens": usage.total_tokens
|
207
|
+
}
|
208
|
+
|
209
|
+
# Update total usage
|
210
|
+
self.total_token_usage["prompt_tokens"] += self.last_token_usage["prompt_tokens"]
|
211
|
+
self.total_token_usage["completion_tokens"] += self.last_token_usage["completion_tokens"]
|
212
|
+
self.total_token_usage["total_tokens"] += self.last_token_usage["total_tokens"]
|
213
|
+
self.total_token_usage["requests_count"] += 1
|
214
|
+
|
215
|
+
async def _track_billing(self, usage):
|
216
|
+
"""Track billing information using unified billing system"""
|
217
|
+
provider_config = self.get_provider_config()
|
218
|
+
await self._track_llm_usage(
|
219
|
+
operation="chat",
|
220
|
+
input_tokens=usage.prompt_tokens,
|
221
|
+
output_tokens=usage.completion_tokens,
|
222
|
+
metadata={
|
223
|
+
"temperature": provider_config.get("temperature", 0.7),
|
224
|
+
"max_tokens": provider_config.get("max_tokens", 1024)
|
225
|
+
}
|
226
|
+
)
|
227
|
+
|
228
|
+
def get_token_usage(self) -> Dict[str, Any]:
|
229
|
+
"""Get total token usage statistics"""
|
230
|
+
return self.total_token_usage
|
231
|
+
|
232
|
+
def get_last_token_usage(self) -> Dict[str, int]:
|
233
|
+
"""Get token usage from last request"""
|
234
|
+
return self.last_token_usage
|
235
|
+
|
236
|
+
def get_model_info(self) -> Dict[str, Any]:
|
237
|
+
"""Get information about the current model"""
|
238
|
+
provider_config = self.get_provider_config()
|
239
|
+
return {
|
240
|
+
"name": self.model_name,
|
241
|
+
"max_tokens": provider_config.get("max_tokens", 1024),
|
242
|
+
"supports_streaming": True,
|
243
|
+
"supports_functions": True,
|
244
|
+
"provider": "yyds",
|
245
|
+
"pricing": {
|
246
|
+
"input_tokens_per_1k": 0.0045,
|
247
|
+
"output_tokens_per_1k": 0.0225,
|
248
|
+
"currency": "USD"
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
|
253
|
+
async def close(self):
|
254
|
+
"""Close the backend client"""
|
255
|
+
await self.client.close()
|
@@ -3,10 +3,44 @@
|
|
3
3
|
|
4
4
|
"""
|
5
5
|
Vision服务包
|
6
|
-
|
6
|
+
包含所有视觉相关服务模块,包括stacked services
|
7
7
|
"""
|
8
8
|
|
9
|
-
#
|
10
|
-
from
|
9
|
+
# Vision understanding services
|
10
|
+
from .base_vision_service import BaseVisionService
|
11
|
+
from .openai_vision_service import OpenAIVisionService
|
12
|
+
from .replicate_vision_service import ReplicateVisionService
|
11
13
|
|
12
|
-
|
14
|
+
# Stacked Vision Services
|
15
|
+
from .doc_analysis_service import DocAnalysisStackedService
|
16
|
+
from .ui_analysis_service import UIAnalysisService
|
17
|
+
|
18
|
+
# ISA Vision service
|
19
|
+
try:
|
20
|
+
from .isA_vision_service import ISAVisionService
|
21
|
+
ISA_VISION_AVAILABLE = True
|
22
|
+
except ImportError:
|
23
|
+
ISAVisionService = None
|
24
|
+
ISA_VISION_AVAILABLE = False
|
25
|
+
|
26
|
+
# Optional services - import only if available
|
27
|
+
try:
|
28
|
+
from .ollama_vision_service import OllamaVisionService
|
29
|
+
OLLAMA_VISION_AVAILABLE = True
|
30
|
+
except ImportError:
|
31
|
+
OllamaVisionService = None
|
32
|
+
OLLAMA_VISION_AVAILABLE = False
|
33
|
+
|
34
|
+
__all__ = [
|
35
|
+
"BaseVisionService",
|
36
|
+
"OpenAIVisionService",
|
37
|
+
"ReplicateVisionService",
|
38
|
+
"DocAnalysisStackedService",
|
39
|
+
"UIAnalysisService"
|
40
|
+
]
|
41
|
+
|
42
|
+
if ISA_VISION_AVAILABLE:
|
43
|
+
__all__.append("ISAVisionService")
|
44
|
+
|
45
|
+
if OLLAMA_VISION_AVAILABLE:
|
46
|
+
__all__.append("OllamaVisionService")
|