isa-model 0.3.91__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +732 -573
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.91.dist-info/RECORD +0 -138
- {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
isa_model/client.py
CHANGED
@@ -2,15 +2,78 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
"""
|
5
|
-
ISA Model Client - Unified
|
6
|
-
|
5
|
+
ISA Model Client - Unified AI Service Interface
|
6
|
+
===============================================
|
7
|
+
|
8
|
+
功能描述:
|
9
|
+
ISA Model平台的统一客户端接口,提供智能模型选择和简化的API调用
|
10
|
+
|
11
|
+
主要功能:
|
12
|
+
- 多模态AI服务统一接口:文本、视觉、音频、图像生成、嵌入向量
|
13
|
+
- 智能模型自动选择:基于任务类型和输入数据自动选择最适合的模型
|
14
|
+
- 流式响应支持:支持实时流式文本生成,提供更好的用户体验
|
15
|
+
- 远程/本地服务:支持本地服务调用和远程API调用两种模式
|
16
|
+
- 成本跟踪:自动计算和跟踪API调用成本
|
17
|
+
- 工具支持:支持LangChain工具集成,扩展模型能力
|
18
|
+
- 缓存机制:服务实例缓存,提高性能
|
19
|
+
|
20
|
+
输入接口:
|
21
|
+
- input_data: 多类型输入数据(文本、图像路径、音频文件、字节数据等)
|
22
|
+
- task: 任务类型(chat, analyze, generate_speech, transcribe等)
|
23
|
+
- service_type: 服务类型(text, vision, audio, image, embedding)
|
24
|
+
- model: 可选模型名称(如不指定则智能选择)
|
25
|
+
- provider: 可选提供商名称(openai, ollama, replicate等)
|
26
|
+
|
27
|
+
输出格式:
|
28
|
+
- 统一响应字典,包含result和metadata
|
29
|
+
- 流式响应:包含stream异步生成器
|
30
|
+
- 非流式响应:包含result结果数据
|
31
|
+
- metadata:包含模型信息、计费信息、选择原因等
|
32
|
+
|
33
|
+
核心依赖:
|
34
|
+
- isa_model.inference.ai_factory: AI服务工厂
|
35
|
+
- isa_model.core.services.intelligent_model_selector: 智能模型选择器
|
36
|
+
- aiohttp: HTTP客户端(远程API模式)
|
37
|
+
- asyncio: 异步编程支持
|
38
|
+
|
39
|
+
使用示例:
|
40
|
+
```python
|
41
|
+
# 创建客户端
|
42
|
+
client = ISAModelClient()
|
43
|
+
|
44
|
+
# 流式文本生成
|
45
|
+
result = await client.invoke("写一个故事", "chat", "text")
|
46
|
+
async for token in result["stream"]:
|
47
|
+
print(token, end="", flush=True)
|
48
|
+
|
49
|
+
# 图像分析
|
50
|
+
result = await client.invoke("image.jpg", "analyze", "vision")
|
51
|
+
print(result["result"])
|
52
|
+
|
53
|
+
# 语音合成
|
54
|
+
result = await client.invoke("Hello world", "generate_speech", "audio")
|
55
|
+
print(result["result"])
|
56
|
+
```
|
57
|
+
|
58
|
+
架构特点:
|
59
|
+
- 单例模式:确保配置一致性
|
60
|
+
- 异步支持:所有操作都是异步的
|
61
|
+
- 错误处理:统一的错误处理和响应格式
|
62
|
+
- 可扩展性:支持新的服务提供商和模型
|
63
|
+
|
64
|
+
优化建议:
|
65
|
+
1. 增加请求重试机制:处理网络不稳定情况
|
66
|
+
2. 添加请求限流:避免超出API限制
|
67
|
+
3. 优化缓存策略:支持LRU缓存和TTL过期
|
68
|
+
4. 增加监控指标:记录延迟、成功率等指标
|
69
|
+
5. 支持批处理:提高大量请求的处理效率
|
70
|
+
6. 添加配置验证:启动时验证API密钥和配置
|
7
71
|
"""
|
8
72
|
|
9
73
|
import logging
|
10
74
|
import asyncio
|
11
75
|
from typing import Any, Dict, Optional, List, Union
|
12
76
|
from pathlib import Path
|
13
|
-
import aiohttp
|
14
77
|
|
15
78
|
from isa_model.inference.ai_factory import AIFactory
|
16
79
|
|
@@ -36,41 +99,104 @@ class ISAModelClient:
|
|
36
99
|
response = await client.invoke("audio.mp3", "transcribe", "audio")
|
37
100
|
"""
|
38
101
|
|
102
|
+
# Consolidated task mappings for all service types
|
103
|
+
TASK_MAPPINGS = {
|
104
|
+
"vision": {
|
105
|
+
# Core tasks (direct mapping)
|
106
|
+
"analyze": "analyze",
|
107
|
+
"describe": "describe",
|
108
|
+
"extract": "extract",
|
109
|
+
"detect": "detect",
|
110
|
+
"classify": "classify",
|
111
|
+
"compare": "compare",
|
112
|
+
|
113
|
+
# Common aliases (backward compatibility)
|
114
|
+
"analyze_image": "analyze",
|
115
|
+
"describe_image": "describe",
|
116
|
+
"extract_text": "extract",
|
117
|
+
"extract_table": "extract",
|
118
|
+
"detect_objects": "detect",
|
119
|
+
"detect_ui": "detect",
|
120
|
+
"detect_ui_elements": "detect",
|
121
|
+
"get_coordinates": "detect",
|
122
|
+
"ocr": "extract",
|
123
|
+
"ui_analysis": "analyze",
|
124
|
+
"navigation": "analyze"
|
125
|
+
},
|
126
|
+
"audio": {
|
127
|
+
"generate_speech": "synthesize",
|
128
|
+
"text_to_speech": "synthesize",
|
129
|
+
"tts": "synthesize",
|
130
|
+
"transcribe": "transcribe",
|
131
|
+
"speech_to_text": "transcribe",
|
132
|
+
"stt": "transcribe",
|
133
|
+
"translate": "translate",
|
134
|
+
"detect_language": "detect_language"
|
135
|
+
},
|
136
|
+
"text": {
|
137
|
+
"chat": "chat",
|
138
|
+
"generate": "generate",
|
139
|
+
"complete": "complete",
|
140
|
+
"translate": "translate",
|
141
|
+
"summarize": "summarize",
|
142
|
+
"analyze": "analyze",
|
143
|
+
"extract": "extract",
|
144
|
+
"classify": "classify"
|
145
|
+
},
|
146
|
+
"image": {
|
147
|
+
"generate_image": "generate",
|
148
|
+
"generate": "generate",
|
149
|
+
"img2img": "img2img",
|
150
|
+
"image_to_image": "img2img",
|
151
|
+
"generate_batch": "generate_batch"
|
152
|
+
},
|
153
|
+
"embedding": {
|
154
|
+
"create_embedding": "embed",
|
155
|
+
"embed": "embed",
|
156
|
+
"embed_batch": "embed_batch",
|
157
|
+
"chunk_and_embed": "chunk_and_embed",
|
158
|
+
"similarity": "similarity",
|
159
|
+
"find_similar": "find_similar",
|
160
|
+
"rerank": "rerank",
|
161
|
+
"rerank_documents": "rerank_documents",
|
162
|
+
"document_ranking": "document_ranking"
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
# Service type configuration
|
167
|
+
SUPPORTED_SERVICE_TYPES = {"vision", "audio", "text", "image", "embedding"}
|
168
|
+
|
39
169
|
def __init__(self,
|
40
170
|
config: Optional[Dict[str, Any]] = None,
|
41
|
-
|
42
|
-
api_url: Optional[str] = None,
|
171
|
+
service_endpoint: Optional[str] = None,
|
43
172
|
api_key: Optional[str] = None):
|
44
173
|
"""Initialize ISA Model Client
|
45
174
|
|
46
175
|
Args:
|
47
176
|
config: Optional configuration override
|
48
|
-
|
49
|
-
|
50
|
-
api_key: API key for authentication (optional)
|
177
|
+
service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
|
178
|
+
api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
|
51
179
|
"""
|
52
180
|
self.config = config or {}
|
53
|
-
self.
|
54
|
-
self.api_url = api_url.rstrip('/') if api_url else None
|
55
|
-
self.api_key = api_key
|
181
|
+
self.service_endpoint = service_endpoint
|
56
182
|
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"User-Agent": "ISA-Model-Client/1.0.0"
|
65
|
-
}
|
66
|
-
if self.api_key:
|
67
|
-
self.headers["Authorization"] = f"Bearer {self.api_key}"
|
183
|
+
# Handle API key authentication
|
184
|
+
import os
|
185
|
+
self.api_key = api_key or os.getenv("ISA_API_KEY")
|
186
|
+
if self.api_key:
|
187
|
+
logger.info("API key provided for authentication")
|
188
|
+
else:
|
189
|
+
logger.debug("No API key provided - using anonymous access")
|
68
190
|
|
69
|
-
# Initialize AI Factory for
|
70
|
-
if self.
|
191
|
+
# Initialize AI Factory for direct service access (when service_endpoint is None)
|
192
|
+
if not self.service_endpoint:
|
71
193
|
self.ai_factory = AIFactory.get_instance()
|
72
194
|
else:
|
73
195
|
self.ai_factory = None
|
196
|
+
logger.info(f"Using remote service endpoint: {self.service_endpoint}")
|
197
|
+
|
198
|
+
# HTTP client for remote API calls
|
199
|
+
self._http_session = None
|
74
200
|
|
75
201
|
# Initialize intelligent model selector
|
76
202
|
self.model_selector = None
|
@@ -89,166 +215,352 @@ class ISAModelClient:
|
|
89
215
|
|
90
216
|
logger.info("ISA Model Client initialized")
|
91
217
|
|
92
|
-
async def
|
218
|
+
async def _get_http_session(self):
|
219
|
+
"""Get or create HTTP session for remote API calls"""
|
220
|
+
if self._http_session is None:
|
221
|
+
import aiohttp
|
222
|
+
headers = {}
|
223
|
+
|
224
|
+
# Add API key authentication if available
|
225
|
+
if self.api_key:
|
226
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
227
|
+
headers["X-API-Key"] = self.api_key
|
228
|
+
|
229
|
+
self._http_session = aiohttp.ClientSession(headers=headers)
|
230
|
+
|
231
|
+
return self._http_session
|
232
|
+
|
233
|
+
async def _make_api_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
234
|
+
"""Make HTTP request to remote API endpoint"""
|
235
|
+
if not self.service_endpoint:
|
236
|
+
raise ValueError("Service endpoint not configured for remote API calls")
|
237
|
+
|
238
|
+
session = await self._get_http_session()
|
239
|
+
url = f"{self.service_endpoint.rstrip('/')}/{endpoint.lstrip('/')}"
|
240
|
+
|
241
|
+
try:
|
242
|
+
async with session.post(url, json=data) as response:
|
243
|
+
if response.status == 401:
|
244
|
+
raise Exception("Authentication required or invalid API key")
|
245
|
+
elif response.status == 403:
|
246
|
+
raise Exception("Insufficient permissions")
|
247
|
+
elif not response.ok:
|
248
|
+
error_detail = await response.text()
|
249
|
+
raise Exception(f"API request failed ({response.status}): {error_detail}")
|
250
|
+
|
251
|
+
return await response.json()
|
252
|
+
|
253
|
+
except Exception as e:
|
254
|
+
logger.error(f"Remote API request failed: {e}")
|
255
|
+
raise
|
256
|
+
|
257
|
+
async def close(self):
|
258
|
+
"""Close HTTP session and cleanup resources"""
|
259
|
+
if self._http_session:
|
260
|
+
await self._http_session.close()
|
261
|
+
self._http_session = None
|
262
|
+
|
263
|
+
async def _invoke_remote_api(
|
93
264
|
self,
|
94
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
95
|
-
task: str,
|
265
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
266
|
+
task: str,
|
96
267
|
service_type: str,
|
97
|
-
|
98
|
-
|
268
|
+
model: Optional[str] = None,
|
269
|
+
provider: Optional[str] = None,
|
270
|
+
stream: Optional[bool] = None,
|
99
271
|
**kwargs
|
100
|
-
):
|
101
|
-
"""
|
102
|
-
Streaming invoke method that yields tokens in real-time
|
103
|
-
|
104
|
-
Args:
|
105
|
-
input_data: Input data (text for LLM streaming)
|
106
|
-
task: Task to perform
|
107
|
-
service_type: Type of service (only "text" supports streaming)
|
108
|
-
model_hint: Optional model preference
|
109
|
-
provider_hint: Optional provider preference
|
110
|
-
**kwargs: Additional parameters
|
111
|
-
|
112
|
-
Yields:
|
113
|
-
Individual tokens as they arrive from the model
|
114
|
-
|
115
|
-
Example:
|
116
|
-
async for token in client.stream("Hello world", "chat", "text"):
|
117
|
-
print(token, end="", flush=True)
|
118
|
-
"""
|
119
|
-
if service_type != "text":
|
120
|
-
raise ValueError("Streaming is only supported for text/LLM services")
|
121
|
-
|
272
|
+
) -> Dict[str, Any]:
|
273
|
+
"""Invoke remote API endpoint"""
|
122
274
|
try:
|
123
|
-
|
124
|
-
|
125
|
-
|
275
|
+
# Prepare request data for unified API
|
276
|
+
request_data = {
|
277
|
+
"task": task,
|
278
|
+
"service_type": service_type,
|
279
|
+
**kwargs
|
280
|
+
}
|
281
|
+
|
282
|
+
# Add model and provider if specified
|
283
|
+
if model:
|
284
|
+
request_data["model"] = model
|
285
|
+
if provider:
|
286
|
+
request_data["provider"] = provider
|
287
|
+
# For remote API, disable streaming to get JSON response
|
288
|
+
request_data["stream"] = False
|
289
|
+
|
290
|
+
# Handle different input data types
|
291
|
+
if isinstance(input_data, (str, Path)):
|
292
|
+
request_data["input_data"] = str(input_data)
|
293
|
+
elif isinstance(input_data, (dict, list)):
|
294
|
+
request_data["input_data"] = input_data
|
126
295
|
else:
|
127
|
-
|
128
|
-
|
296
|
+
# For binary data, convert to base64
|
297
|
+
import base64
|
298
|
+
if isinstance(input_data, bytes):
|
299
|
+
request_data["input_data"] = base64.b64encode(input_data).decode()
|
300
|
+
request_data["data_type"] = "base64"
|
301
|
+
else:
|
302
|
+
request_data["input_data"] = str(input_data)
|
303
|
+
|
304
|
+
# Make API request
|
305
|
+
response = await self._make_api_request("api/v1/invoke", request_data)
|
306
|
+
|
307
|
+
return response
|
308
|
+
|
129
309
|
except Exception as e:
|
130
|
-
logger.error(f"
|
131
|
-
|
132
|
-
|
310
|
+
logger.error(f"Remote API invocation failed: {e}")
|
311
|
+
return {
|
312
|
+
"success": False,
|
313
|
+
"error": str(e),
|
314
|
+
"metadata": {
|
315
|
+
"task": task,
|
316
|
+
"service_type": service_type,
|
317
|
+
"endpoint": "remote"
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
133
321
|
async def invoke(
|
134
322
|
self,
|
135
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
323
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
136
324
|
task: str,
|
137
325
|
service_type: str,
|
138
|
-
|
139
|
-
|
140
|
-
stream: bool =
|
141
|
-
|
326
|
+
model: Optional[str] = None,
|
327
|
+
provider: Optional[str] = None,
|
328
|
+
stream: Optional[bool] = None,
|
329
|
+
show_reasoning: Optional[bool] = False,
|
142
330
|
**kwargs
|
143
|
-
) ->
|
331
|
+
) -> Dict[str, Any]:
|
144
332
|
"""
|
145
333
|
Unified invoke method with intelligent model selection
|
146
334
|
|
147
335
|
Args:
|
148
|
-
input_data: Input data (image path,
|
149
|
-
task: Task to perform (analyze_image, generate_speech, transcribe, etc.)
|
150
|
-
service_type: Type of service (vision, audio,
|
151
|
-
|
152
|
-
|
153
|
-
stream: Enable streaming for text
|
154
|
-
|
155
|
-
**kwargs: Additional task-specific parameters
|
336
|
+
input_data: Input data (str, LangChain messages, image path, audio, etc.)
|
337
|
+
task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
|
338
|
+
service_type: Type of service (text, vision, audio, image, embedding)
|
339
|
+
model: Model name (if None, uses intelligent selection)
|
340
|
+
provider: Provider name (if None, uses intelligent selection)
|
341
|
+
stream: Enable streaming for text tasks (default True for chat/generate tasks, supports tools)
|
342
|
+
show_reasoning: Show reasoning process for O4 models (uses Responses API)
|
343
|
+
**kwargs: Additional task-specific parameters (including tools for LangChain)
|
156
344
|
|
157
345
|
Returns:
|
158
|
-
|
159
|
-
|
346
|
+
Unified response dictionary with result and metadata
|
347
|
+
For streaming: result["stream"] contains async generator
|
348
|
+
For non-streaming: result["result"] contains the response
|
160
349
|
|
161
350
|
Examples:
|
162
|
-
#
|
163
|
-
await client.invoke("
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
await client.invoke("audio.mp3", "transcribe", "audio")
|
170
|
-
|
171
|
-
# Text tasks
|
172
|
-
await client.invoke("Translate this text", "translate", "text")
|
173
|
-
await client.invoke("What is AI?", "chat", "text")
|
351
|
+
# Text tasks with streaming (default for chat)
|
352
|
+
result = await client.invoke("Write a story", "chat", "text")
|
353
|
+
if "stream" in result:
|
354
|
+
async for chunk in result["stream"]:
|
355
|
+
print(chunk, end="", flush=True)
|
356
|
+
else:
|
357
|
+
print(result["result"])
|
174
358
|
|
175
|
-
#
|
176
|
-
|
177
|
-
|
359
|
+
# Text tasks with tools (also supports streaming)
|
360
|
+
result = await client.invoke("What's the weather?", "chat", "text", tools=[get_weather])
|
361
|
+
if "stream" in result:
|
362
|
+
async for chunk in result["stream"]:
|
363
|
+
print(chunk, end="", flush=True)
|
364
|
+
else:
|
365
|
+
print(result["result"])
|
178
366
|
|
179
|
-
#
|
180
|
-
await client.invoke("
|
367
|
+
# Vision tasks (always non-streaming)
|
368
|
+
result = await client.invoke("image.jpg", "analyze", "vision")
|
369
|
+
print(result["result"])
|
181
370
|
|
182
|
-
#
|
183
|
-
|
184
|
-
|
371
|
+
# Audio tasks
|
372
|
+
result = await client.invoke("Hello world", "generate_speech", "audio")
|
373
|
+
print(result["result"])
|
185
374
|
|
186
375
|
# Image generation
|
187
|
-
await client.invoke("A beautiful sunset", "generate_image", "image")
|
376
|
+
result = await client.invoke("A beautiful sunset", "generate_image", "image")
|
377
|
+
print(result["result"])
|
188
378
|
|
189
379
|
# Embedding
|
190
|
-
await client.invoke("Text to embed", "create_embedding", "embedding")
|
380
|
+
result = await client.invoke("Text to embed", "create_embedding", "embedding")
|
381
|
+
print(result["result"])
|
191
382
|
"""
|
192
383
|
try:
|
193
|
-
#
|
194
|
-
if
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
384
|
+
# If using remote service endpoint, make API call
|
385
|
+
if self.service_endpoint:
|
386
|
+
return await self._invoke_remote_api(
|
387
|
+
input_data=input_data,
|
388
|
+
task=task,
|
389
|
+
service_type=service_type,
|
390
|
+
model=model,
|
391
|
+
provider=provider,
|
392
|
+
stream=stream,
|
393
|
+
**kwargs
|
394
|
+
)
|
395
|
+
|
396
|
+
# Set default streaming for text tasks
|
397
|
+
if stream is None and service_type == "text":
|
398
|
+
if task in ["chat", "generate"]:
|
399
|
+
stream = True # Enable streaming for chat and generate tasks
|
208
400
|
else:
|
209
|
-
|
210
|
-
input_data=input_data,
|
211
|
-
task=task,
|
212
|
-
service_type=service_type,
|
213
|
-
model_hint=model_hint,
|
214
|
-
provider_hint=provider_hint,
|
215
|
-
tools=tools,
|
216
|
-
**kwargs
|
217
|
-
)
|
401
|
+
stream = False # Disable for other text tasks
|
218
402
|
|
219
|
-
#
|
220
|
-
if
|
221
|
-
return await self.
|
403
|
+
# If streaming is enabled for text tasks, return streaming response
|
404
|
+
if stream and service_type == "text":
|
405
|
+
return await self._invoke_service_streaming(
|
222
406
|
input_data=input_data,
|
223
407
|
task=task,
|
224
408
|
service_type=service_type,
|
225
|
-
model_hint=
|
226
|
-
provider_hint=
|
227
|
-
|
409
|
+
model_hint=model,
|
410
|
+
provider_hint=provider,
|
411
|
+
show_reasoning=show_reasoning, # Explicitly pass show_reasoning
|
228
412
|
**kwargs
|
229
413
|
)
|
230
414
|
else:
|
231
|
-
|
415
|
+
# Use regular non-streaming service
|
416
|
+
return await self._invoke_service(
|
232
417
|
input_data=input_data,
|
233
418
|
task=task,
|
234
419
|
service_type=service_type,
|
235
|
-
model_hint=
|
236
|
-
provider_hint=
|
237
|
-
|
420
|
+
model_hint=model,
|
421
|
+
provider_hint=provider,
|
422
|
+
stream=False, # Force non-streaming
|
238
423
|
**kwargs
|
239
424
|
)
|
240
425
|
|
241
426
|
except Exception as e:
|
242
|
-
|
243
|
-
|
244
|
-
"
|
245
|
-
"
|
246
|
-
"
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
427
|
+
return self._handle_error(e, {
|
428
|
+
"operation": "invoke",
|
429
|
+
"task": task,
|
430
|
+
"service_type": service_type,
|
431
|
+
"input_type": type(input_data).__name__
|
432
|
+
})
|
433
|
+
|
434
|
+
async def invoke_stream(
|
435
|
+
self,
|
436
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
437
|
+
task: str,
|
438
|
+
service_type: str,
|
439
|
+
model: Optional[str] = None,
|
440
|
+
provider: Optional[str] = None,
|
441
|
+
return_metadata: bool = False,
|
442
|
+
**kwargs
|
443
|
+
):
|
444
|
+
"""
|
445
|
+
Unified streaming invoke method - returns async generator for real-time token streaming
|
446
|
+
|
447
|
+
Args:
|
448
|
+
input_data: Input data (str, LangChain messages, image path, audio, etc.)
|
449
|
+
task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
|
450
|
+
service_type: Type of service (text, vision, audio, image, embedding)
|
451
|
+
model: Model name (if None, uses intelligent selection)
|
452
|
+
provider: Provider name (if None, uses intelligent selection)
|
453
|
+
return_metadata: If True, yields ('metadata', metadata_dict) as final item
|
454
|
+
**kwargs: Additional task-specific parameters (including tools for LangChain)
|
455
|
+
|
456
|
+
Returns:
|
457
|
+
For text services: AsyncGenerator[Union[str, Tuple[str, Dict]], None] - yields tokens as they arrive
|
458
|
+
- Normal items: token strings
|
459
|
+
- Final item (if return_metadata=True): ('metadata', metadata_dict) with billing info
|
460
|
+
For other services: Raises ValueError (streaming not supported)
|
461
|
+
|
462
|
+
Examples:
|
463
|
+
# Simple streaming
|
464
|
+
async for token in client.invoke_stream("Hello!", "chat", "text"):
|
465
|
+
print(token, end='', flush=True)
|
466
|
+
|
467
|
+
# Streaming with metadata
|
468
|
+
async for item in client.invoke_stream("Hello!", "chat", "text", return_metadata=True):
|
469
|
+
if isinstance(item, tuple) and item[0] == 'metadata':
|
470
|
+
print(f"\nBilling: {item[1]['billing']}")
|
471
|
+
else:
|
472
|
+
print(item, end='', flush=True)
|
473
|
+
"""
|
474
|
+
try:
|
475
|
+
# Only text services support streaming
|
476
|
+
if service_type != "text":
|
477
|
+
raise ValueError(f"Streaming not supported for service type: {service_type}")
|
478
|
+
|
479
|
+
# Tools are supported with streaming
|
480
|
+
|
481
|
+
# Step 1: Select best model for this task
|
482
|
+
selected_model = await self._select_model(
|
483
|
+
input_data=input_data,
|
484
|
+
task=task,
|
485
|
+
service_type=service_type,
|
486
|
+
model_hint=model,
|
487
|
+
provider_hint=provider
|
488
|
+
)
|
489
|
+
|
490
|
+
# Step 2: Get appropriate service
|
491
|
+
service = await self._get_service(
|
492
|
+
service_type=service_type,
|
493
|
+
model_name=selected_model["model_id"],
|
494
|
+
provider=selected_model["provider"],
|
495
|
+
task=task,
|
496
|
+
use_cache=False # Don't cache for streaming to avoid state issues
|
497
|
+
)
|
498
|
+
|
499
|
+
# Step 3: Ensure service supports streaming
|
500
|
+
if not hasattr(service, 'astream'):
|
501
|
+
raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
|
502
|
+
|
503
|
+
# Step 4: Enable streaming on the service
|
504
|
+
if hasattr(service, 'streaming'):
|
505
|
+
service.streaming = True
|
506
|
+
|
507
|
+
# Step 5: Stream tokens and collect for billing
|
508
|
+
content_chunks = []
|
509
|
+
async for token in service.astream(input_data):
|
510
|
+
content_chunks.append(token)
|
511
|
+
yield token
|
512
|
+
|
513
|
+
# Step 6: After streaming is complete, calculate billing info and optionally return metadata
|
514
|
+
try:
|
515
|
+
await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
|
516
|
+
|
517
|
+
# Get billing info (similar to _invoke_service)
|
518
|
+
billing_info = self._get_billing_info(service, selected_model["model_id"])
|
519
|
+
|
520
|
+
# Log billing info for tracking
|
521
|
+
logger.info(f"Streaming completed - Model: {selected_model['model_id']}, "
|
522
|
+
f"Tokens: {billing_info.get('total_tokens', 'N/A')}, "
|
523
|
+
f"Cost: ${billing_info.get('cost_usd', 0):.4f}")
|
524
|
+
|
525
|
+
# Return metadata if requested
|
526
|
+
if return_metadata:
|
527
|
+
metadata = {
|
528
|
+
"model_used": selected_model["model_id"],
|
529
|
+
"provider": selected_model["provider"],
|
530
|
+
"task": task,
|
531
|
+
"service_type": service_type,
|
532
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
533
|
+
"billing": billing_info,
|
534
|
+
"streaming": True,
|
535
|
+
"tokens_streamed": len(content_chunks),
|
536
|
+
"content_length": len("".join(content_chunks))
|
537
|
+
}
|
538
|
+
yield ('metadata', metadata)
|
539
|
+
|
540
|
+
except Exception as billing_error:
|
541
|
+
logger.warning(f"Failed to track billing for streaming: {billing_error}")
|
542
|
+
if return_metadata:
|
543
|
+
# Return fallback metadata even if billing fails
|
544
|
+
fallback_metadata = {
|
545
|
+
"model_used": selected_model["model_id"],
|
546
|
+
"provider": selected_model["provider"],
|
547
|
+
"task": task,
|
548
|
+
"service_type": service_type,
|
549
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
550
|
+
"billing": {
|
551
|
+
"cost_usd": 0.0,
|
552
|
+
"error": str(billing_error),
|
553
|
+
"currency": "USD"
|
554
|
+
},
|
555
|
+
"streaming": True,
|
556
|
+
"tokens_streamed": len(content_chunks),
|
557
|
+
"content_length": len("".join(content_chunks))
|
558
|
+
}
|
559
|
+
yield ('metadata', fallback_metadata)
|
560
|
+
|
561
|
+
except Exception as e:
|
562
|
+
logger.error(f"Streaming invoke failed: {e}")
|
563
|
+
raise
|
252
564
|
|
253
565
|
async def _select_model(
|
254
566
|
self,
|
@@ -268,8 +580,26 @@ class ISAModelClient:
|
|
268
580
|
"reason": "User specified"
|
269
581
|
}
|
270
582
|
|
583
|
+
# If model_hint provided but no provider_hint, handle special cases
|
584
|
+
if model_hint:
|
585
|
+
# Special handling for hybrid service
|
586
|
+
if model_hint == "hybrid":
|
587
|
+
return {
|
588
|
+
"model_id": model_hint,
|
589
|
+
"provider": "hybrid",
|
590
|
+
"reason": "Hybrid service requested"
|
591
|
+
}
|
592
|
+
# If only model_hint provided, use default provider for that service type
|
593
|
+
elif provider_hint is None:
|
594
|
+
default_provider = self._get_default_provider(service_type)
|
595
|
+
return {
|
596
|
+
"model_id": model_hint,
|
597
|
+
"provider": default_provider,
|
598
|
+
"reason": "Model specified with default provider"
|
599
|
+
}
|
600
|
+
|
271
601
|
# Use intelligent model selector if available
|
272
|
-
if INTELLIGENT_SELECTOR_AVAILABLE:
|
602
|
+
if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
|
273
603
|
try:
|
274
604
|
# Initialize model selector if not already done
|
275
605
|
if self.model_selector is None:
|
@@ -304,6 +634,17 @@ class ISAModelClient:
|
|
304
634
|
# Fallback to default model selection
|
305
635
|
return self._get_default_model(service_type, task, provider_hint)
|
306
636
|
|
637
|
+
def _get_default_provider(self, service_type: str) -> str:
|
638
|
+
"""Get default provider for service type"""
|
639
|
+
defaults = {
|
640
|
+
"vision": "openai",
|
641
|
+
"audio": "openai",
|
642
|
+
"text": "openai",
|
643
|
+
"image": "replicate",
|
644
|
+
"embedding": "openai"
|
645
|
+
}
|
646
|
+
return defaults.get(service_type, "openai")
|
647
|
+
|
307
648
|
def _get_default_model(
|
308
649
|
self,
|
309
650
|
service_type: str,
|
@@ -314,7 +655,7 @@ class ISAModelClient:
|
|
314
655
|
|
315
656
|
defaults = {
|
316
657
|
"vision": {
|
317
|
-
"model_id": "gpt-
|
658
|
+
"model_id": "gpt-4.1-nano",
|
318
659
|
"provider": "openai"
|
319
660
|
},
|
320
661
|
"audio": {
|
@@ -323,7 +664,7 @@ class ISAModelClient:
|
|
323
664
|
"default": {"model_id": "whisper-1", "provider": "openai"}
|
324
665
|
},
|
325
666
|
"text": {
|
326
|
-
"model_id": "gpt-4.1-
|
667
|
+
"model_id": "gpt-4.1-nano",
|
327
668
|
"provider": "openai"
|
328
669
|
},
|
329
670
|
"image": {
|
@@ -331,8 +672,9 @@ class ISAModelClient:
|
|
331
672
|
"provider": "replicate"
|
332
673
|
},
|
333
674
|
"embedding": {
|
334
|
-
"model_id": "text-embedding-3-small",
|
335
|
-
"provider": "
|
675
|
+
"embed": {"model_id": "text-embedding-3-small", "provider": "openai"},
|
676
|
+
"rerank": {"model_id": "isa-jina-reranker-v2-service", "provider": "isa"},
|
677
|
+
"default": {"model_id": "text-embedding-3-small", "provider": "openai"}
|
336
678
|
}
|
337
679
|
}
|
338
680
|
|
@@ -344,6 +686,14 @@ class ISAModelClient:
|
|
344
686
|
default = defaults["audio"]["stt"]
|
345
687
|
else:
|
346
688
|
default = defaults["audio"]["default"]
|
689
|
+
# Handle embedding service type with task-specific models
|
690
|
+
elif service_type == "embedding":
|
691
|
+
if "rerank" in task:
|
692
|
+
default = defaults["embedding"]["rerank"]
|
693
|
+
elif "embed" in task:
|
694
|
+
default = defaults["embedding"]["embed"]
|
695
|
+
else:
|
696
|
+
default = defaults["embedding"]["default"]
|
347
697
|
else:
|
348
698
|
default = defaults.get(service_type, defaults["vision"])
|
349
699
|
|
@@ -363,59 +713,56 @@ class ISAModelClient:
|
|
363
713
|
model_name: str,
|
364
714
|
provider: str,
|
365
715
|
task: str,
|
366
|
-
|
716
|
+
use_cache: bool = True
|
367
717
|
) -> Any:
|
368
718
|
"""Get appropriate service instance"""
|
369
719
|
|
370
720
|
cache_key = f"{service_type}_{provider}_{model_name}"
|
371
721
|
|
372
|
-
# Check cache first
|
373
|
-
if cache_key in self._service_cache:
|
374
|
-
|
375
|
-
# If tools are needed, bind them to the service
|
376
|
-
if tools and service_type == "text":
|
377
|
-
return service.bind_tools(tools)
|
378
|
-
return service
|
722
|
+
# Check cache first (if caching is enabled)
|
723
|
+
if use_cache and cache_key in self._service_cache:
|
724
|
+
return self._service_cache[cache_key]
|
379
725
|
|
380
726
|
try:
|
727
|
+
# Validate service type
|
728
|
+
self._validate_service_type(service_type)
|
729
|
+
|
381
730
|
# Route to appropriate AIFactory method
|
382
731
|
if service_type == "vision":
|
383
732
|
service = self.ai_factory.get_vision(model_name, provider)
|
384
|
-
|
385
733
|
elif service_type == "audio":
|
386
734
|
if "speech" in task or "tts" in task:
|
387
735
|
service = self.ai_factory.get_tts(model_name, provider)
|
388
736
|
elif "transcribe" in task or "stt" in task:
|
389
737
|
service = self.ai_factory.get_stt(model_name, provider)
|
390
738
|
else:
|
391
|
-
# Default to STT for unknown audio tasks
|
392
739
|
service = self.ai_factory.get_stt(model_name, provider)
|
393
|
-
|
394
740
|
elif service_type == "text":
|
395
741
|
service = self.ai_factory.get_llm(model_name, provider)
|
396
|
-
|
397
742
|
elif service_type == "image":
|
398
743
|
service = self.ai_factory.get_img("t2i", model_name, provider)
|
399
|
-
|
400
744
|
elif service_type == "embedding":
|
401
745
|
service = self.ai_factory.get_embed(model_name, provider)
|
402
746
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
# Cache the service
|
407
|
-
self._service_cache[cache_key] = service
|
408
|
-
|
409
|
-
# If tools are needed, bind them to the service
|
410
|
-
if tools and service_type == "text":
|
411
|
-
return service.bind_tools(tools)
|
412
|
-
|
747
|
+
# Cache the service (if caching is enabled)
|
748
|
+
if use_cache:
|
749
|
+
self._service_cache[cache_key] = service
|
413
750
|
return service
|
414
751
|
|
415
752
|
except Exception as e:
|
416
753
|
logger.error(f"Failed to get service {service_type}/{provider}/{model_name}: {e}")
|
417
754
|
raise
|
418
755
|
|
756
|
+
def _validate_service_type(self, service_type: str) -> None:
|
757
|
+
"""Validate service type is supported"""
|
758
|
+
if service_type not in self.SUPPORTED_SERVICE_TYPES:
|
759
|
+
raise ValueError(f"Unsupported service type: {service_type}")
|
760
|
+
|
761
|
+
def _map_task(self, task: str, service_type: str) -> str:
|
762
|
+
"""Map common task names to unified task names"""
|
763
|
+
task_mapping = self.TASK_MAPPINGS.get(service_type, {})
|
764
|
+
return task_mapping.get(task, task)
|
765
|
+
|
419
766
|
async def _execute_task(
|
420
767
|
self,
|
421
768
|
service: Any,
|
@@ -427,166 +774,69 @@ class ISAModelClient:
|
|
427
774
|
"""Execute the task using the appropriate service"""
|
428
775
|
|
429
776
|
try:
|
777
|
+
self._validate_service_type(service_type)
|
778
|
+
unified_task = self._map_task(task, service_type)
|
779
|
+
|
430
780
|
if service_type == "vision":
|
431
|
-
return await
|
781
|
+
return await service.invoke(
|
782
|
+
image=input_data,
|
783
|
+
task=unified_task,
|
784
|
+
**kwargs
|
785
|
+
)
|
432
786
|
|
433
787
|
elif service_type == "audio":
|
434
|
-
|
788
|
+
if unified_task in ["synthesize", "text_to_speech", "tts"]:
|
789
|
+
return await service.invoke(
|
790
|
+
text=input_data,
|
791
|
+
task=unified_task,
|
792
|
+
**kwargs
|
793
|
+
)
|
794
|
+
else:
|
795
|
+
return await service.invoke(
|
796
|
+
audio_input=input_data,
|
797
|
+
task=unified_task,
|
798
|
+
**kwargs
|
799
|
+
)
|
435
800
|
|
436
801
|
elif service_type == "text":
|
437
|
-
|
802
|
+
# Extract show_reasoning from kwargs if present
|
803
|
+
show_reasoning = kwargs.pop('show_reasoning', False)
|
804
|
+
result = await service.invoke(
|
805
|
+
input_data=input_data,
|
806
|
+
task=unified_task,
|
807
|
+
show_reasoning=show_reasoning,
|
808
|
+
**kwargs
|
809
|
+
)
|
810
|
+
|
811
|
+
logger.debug(f"Service result type: {type(result)}")
|
812
|
+
logger.debug(f"Service result: {result}")
|
813
|
+
|
814
|
+
if isinstance(result, dict) and 'message' in result:
|
815
|
+
message = result['message']
|
816
|
+
logger.debug(f"Extracted message type: {type(message)}")
|
817
|
+
logger.debug(f"Extracted message: {message}")
|
818
|
+
return message
|
819
|
+
else:
|
820
|
+
return result
|
438
821
|
|
439
822
|
elif service_type == "image":
|
440
|
-
return await
|
823
|
+
return await service.invoke(
|
824
|
+
prompt=input_data,
|
825
|
+
task=unified_task,
|
826
|
+
**kwargs
|
827
|
+
)
|
441
828
|
|
442
829
|
elif service_type == "embedding":
|
443
|
-
return await
|
444
|
-
|
445
|
-
|
446
|
-
|
830
|
+
return await service.invoke(
|
831
|
+
input_data=input_data,
|
832
|
+
task=unified_task,
|
833
|
+
**kwargs
|
834
|
+
)
|
447
835
|
|
448
836
|
except Exception as e:
|
449
837
|
logger.error(f"Task execution failed: {e}")
|
450
838
|
raise
|
451
839
|
|
452
|
-
async def _execute_vision_task(self, service, input_data, task, **kwargs):
|
453
|
-
"""Execute vision-related tasks using unified invoke method"""
|
454
|
-
|
455
|
-
# Map common task names to unified task names
|
456
|
-
task_mapping = {
|
457
|
-
"analyze_image": "analyze_image",
|
458
|
-
"detect_ui_elements": "detect_ui",
|
459
|
-
"extract_table": "extract_table",
|
460
|
-
"extract_text": "extract_text",
|
461
|
-
"ocr": "extract_text",
|
462
|
-
"describe": "analyze_image"
|
463
|
-
}
|
464
|
-
|
465
|
-
unified_task = task_mapping.get(task, task)
|
466
|
-
|
467
|
-
# Use unified invoke method with proper parameters
|
468
|
-
return await service.invoke(
|
469
|
-
image=input_data,
|
470
|
-
task=unified_task,
|
471
|
-
**kwargs
|
472
|
-
)
|
473
|
-
|
474
|
-
async def _execute_audio_task(self, service, input_data, task, **kwargs):
|
475
|
-
"""Execute audio-related tasks using unified invoke method"""
|
476
|
-
|
477
|
-
# Map common task names to unified task names
|
478
|
-
task_mapping = {
|
479
|
-
"generate_speech": "synthesize",
|
480
|
-
"text_to_speech": "synthesize",
|
481
|
-
"tts": "synthesize",
|
482
|
-
"transcribe": "transcribe",
|
483
|
-
"speech_to_text": "transcribe",
|
484
|
-
"stt": "transcribe",
|
485
|
-
"translate": "translate",
|
486
|
-
"detect_language": "detect_language"
|
487
|
-
}
|
488
|
-
|
489
|
-
unified_task = task_mapping.get(task, task)
|
490
|
-
|
491
|
-
# Use unified invoke method with correct parameter name based on task type
|
492
|
-
if unified_task in ["synthesize", "text_to_speech", "tts"]:
|
493
|
-
# TTS services expect 'text' parameter
|
494
|
-
return await service.invoke(
|
495
|
-
text=input_data,
|
496
|
-
task=unified_task,
|
497
|
-
**kwargs
|
498
|
-
)
|
499
|
-
else:
|
500
|
-
# STT services expect 'audio_input' parameter
|
501
|
-
return await service.invoke(
|
502
|
-
audio_input=input_data,
|
503
|
-
task=unified_task,
|
504
|
-
**kwargs
|
505
|
-
)
|
506
|
-
|
507
|
-
async def _execute_text_task(self, service, input_data, task, **kwargs):
|
508
|
-
"""Execute text-related tasks using unified invoke method"""
|
509
|
-
|
510
|
-
# Map common task names to unified task names
|
511
|
-
task_mapping = {
|
512
|
-
"chat": "chat",
|
513
|
-
"generate": "generate",
|
514
|
-
"complete": "complete",
|
515
|
-
"translate": "translate",
|
516
|
-
"summarize": "summarize",
|
517
|
-
"analyze": "analyze",
|
518
|
-
"extract": "extract",
|
519
|
-
"classify": "classify"
|
520
|
-
}
|
521
|
-
|
522
|
-
unified_task = task_mapping.get(task, task)
|
523
|
-
|
524
|
-
# Use unified invoke method
|
525
|
-
result = await service.invoke(
|
526
|
-
input_data=input_data,
|
527
|
-
task=unified_task,
|
528
|
-
**kwargs
|
529
|
-
)
|
530
|
-
|
531
|
-
# Handle the new response format from LLM services
|
532
|
-
# LLM services now return {"message": ..., "success": ..., "metadata": ...}
|
533
|
-
if isinstance(result, dict) and "message" in result:
|
534
|
-
# Extract the message content (convert AIMessage to string)
|
535
|
-
message = result["message"]
|
536
|
-
if hasattr(message, 'content'):
|
537
|
-
# Handle langchain AIMessage objects
|
538
|
-
return message.content
|
539
|
-
elif isinstance(message, str):
|
540
|
-
return message
|
541
|
-
else:
|
542
|
-
# Fallback: convert to string
|
543
|
-
return str(message)
|
544
|
-
|
545
|
-
# Fallback for other service types or legacy format
|
546
|
-
return result
|
547
|
-
|
548
|
-
async def _execute_image_task(self, service, input_data, task, **kwargs):
|
549
|
-
"""Execute image generation tasks using unified invoke method"""
|
550
|
-
|
551
|
-
# Map common task names to unified task names
|
552
|
-
task_mapping = {
|
553
|
-
"generate_image": "generate",
|
554
|
-
"generate": "generate",
|
555
|
-
"img2img": "img2img",
|
556
|
-
"image_to_image": "img2img",
|
557
|
-
"generate_batch": "generate_batch"
|
558
|
-
}
|
559
|
-
|
560
|
-
unified_task = task_mapping.get(task, task)
|
561
|
-
|
562
|
-
# Use unified invoke method
|
563
|
-
return await service.invoke(
|
564
|
-
prompt=input_data,
|
565
|
-
task=unified_task,
|
566
|
-
**kwargs
|
567
|
-
)
|
568
|
-
|
569
|
-
async def _execute_embedding_task(self, service, input_data, task, **kwargs):
|
570
|
-
"""Execute embedding tasks using unified invoke method"""
|
571
|
-
|
572
|
-
# Map common task names to unified task names
|
573
|
-
task_mapping = {
|
574
|
-
"create_embedding": "embed",
|
575
|
-
"embed": "embed",
|
576
|
-
"embed_batch": "embed_batch",
|
577
|
-
"chunk_and_embed": "chunk_and_embed",
|
578
|
-
"similarity": "similarity",
|
579
|
-
"find_similar": "find_similar"
|
580
|
-
}
|
581
|
-
|
582
|
-
unified_task = task_mapping.get(task, task)
|
583
|
-
|
584
|
-
# Use unified invoke method
|
585
|
-
return await service.invoke(
|
586
|
-
input_data=input_data,
|
587
|
-
task=unified_task,
|
588
|
-
**kwargs
|
589
|
-
)
|
590
840
|
|
591
841
|
def clear_cache(self):
|
592
842
|
"""Clear service cache"""
|
@@ -602,7 +852,7 @@ class ISAModelClient:
|
|
602
852
|
Returns:
|
603
853
|
List of available models with metadata
|
604
854
|
"""
|
605
|
-
if INTELLIGENT_SELECTOR_AVAILABLE:
|
855
|
+
if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
|
606
856
|
try:
|
607
857
|
if self.model_selector is None:
|
608
858
|
self.model_selector = await get_model_selector(self.config)
|
@@ -649,17 +899,26 @@ class ISAModelClient:
|
|
649
899
|
"error": str(e)
|
650
900
|
}
|
651
901
|
|
652
|
-
|
902
|
+
def _handle_error(self, e: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
|
903
|
+
"""Handle errors consistently across methods"""
|
904
|
+
error_msg = f"Failed to {context.get('operation', 'execute')} {context.get('task', '')} on {context.get('service_type', '')}: {e}"
|
905
|
+
logger.error(error_msg)
|
906
|
+
return {
|
907
|
+
"success": False,
|
908
|
+
"error": str(e),
|
909
|
+
"metadata": context
|
910
|
+
}
|
911
|
+
|
912
|
+
async def _invoke_service_streaming(
|
653
913
|
self,
|
654
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
914
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
655
915
|
task: str,
|
656
916
|
service_type: str,
|
657
917
|
model_hint: Optional[str] = None,
|
658
918
|
provider_hint: Optional[str] = None,
|
659
|
-
tools: Optional[List[Any]] = None,
|
660
919
|
**kwargs
|
661
920
|
) -> Dict[str, Any]:
|
662
|
-
"""
|
921
|
+
"""Service invoke that returns streaming response with async generator"""
|
663
922
|
try:
|
664
923
|
# Step 1: Select best model for this task
|
665
924
|
selected_model = await self._select_model(
|
@@ -676,305 +935,205 @@ class ISAModelClient:
|
|
676
935
|
model_name=selected_model["model_id"],
|
677
936
|
provider=selected_model["provider"],
|
678
937
|
task=task,
|
679
|
-
|
938
|
+
use_cache=False # Don't cache for streaming to avoid state issues
|
680
939
|
)
|
681
940
|
|
682
|
-
# Step 3:
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
941
|
+
# Step 3: Handle tools for LLM services (bind tools if provided)
|
942
|
+
tools = kwargs.pop("tools", None)
|
943
|
+
if service_type == "text" and tools:
|
944
|
+
service = await self._get_service(
|
945
|
+
service_type=service_type,
|
946
|
+
model_name=selected_model["model_id"],
|
947
|
+
provider=selected_model["provider"],
|
948
|
+
task=task,
|
949
|
+
use_cache=False
|
950
|
+
)
|
951
|
+
service = service.bind_tools(tools)
|
952
|
+
|
953
|
+
# Step 4: Ensure service supports streaming
|
954
|
+
if not hasattr(service, 'astream'):
|
955
|
+
raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
|
956
|
+
|
957
|
+
# Step 5: Enable streaming on the service
|
958
|
+
if hasattr(service, 'streaming'):
|
959
|
+
service.streaming = True
|
690
960
|
|
691
|
-
# Step
|
961
|
+
# Step 6: Create async generator wrapper that yields tokens
|
962
|
+
async def stream_generator():
|
963
|
+
# Pass show_reasoning parameter if available for LLM services
|
964
|
+
if service_type == "text" and hasattr(service, 'astream'):
|
965
|
+
show_reasoning = kwargs.get('show_reasoning', False)
|
966
|
+
logger.debug(f"Stream generator: show_reasoning={show_reasoning}")
|
967
|
+
if 'show_reasoning' in kwargs:
|
968
|
+
async for token in service.astream(input_data, show_reasoning=show_reasoning):
|
969
|
+
yield token
|
970
|
+
else:
|
971
|
+
async for token in service.astream(input_data):
|
972
|
+
yield token
|
973
|
+
else:
|
974
|
+
async for token in service.astream(input_data):
|
975
|
+
yield token
|
976
|
+
|
977
|
+
# Return response with stream generator and metadata
|
692
978
|
return {
|
693
979
|
"success": True,
|
694
|
-
"
|
980
|
+
"stream": stream_generator(),
|
695
981
|
"metadata": {
|
696
982
|
"model_used": selected_model["model_id"],
|
697
983
|
"provider": selected_model["provider"],
|
698
984
|
"task": task,
|
699
985
|
"service_type": service_type,
|
700
|
-
"selection_reason": selected_model.get("reason", "Default selection")
|
986
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
987
|
+
"streaming": True
|
701
988
|
}
|
702
989
|
}
|
703
990
|
except Exception as e:
|
704
|
-
logger.error(f"
|
991
|
+
logger.error(f"Streaming service invoke failed: {e}")
|
705
992
|
raise
|
706
|
-
|
707
|
-
async def
|
993
|
+
|
994
|
+
async def _invoke_service(
|
708
995
|
self,
|
709
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
996
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
710
997
|
task: str,
|
711
998
|
service_type: str,
|
712
999
|
model_hint: Optional[str] = None,
|
713
1000
|
provider_hint: Optional[str] = None,
|
1001
|
+
stream: Optional[bool] = None,
|
714
1002
|
**kwargs
|
715
1003
|
) -> Dict[str, Any]:
|
716
|
-
"""
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
task=task,
|
1004
|
+
"""Direct service invoke - passes LangChain objects and tools directly to services"""
|
1005
|
+
try:
|
1006
|
+
# Step 1: Select best model for this task
|
1007
|
+
selected_model = await self._select_model(
|
1008
|
+
input_data=input_data,
|
1009
|
+
task=task,
|
723
1010
|
service_type=service_type,
|
724
1011
|
model_hint=model_hint,
|
725
|
-
provider_hint=provider_hint
|
726
|
-
**kwargs
|
1012
|
+
provider_hint=provider_hint
|
727
1013
|
)
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
1014
|
+
|
1015
|
+
# Step 2: Get appropriate service
|
1016
|
+
service = await self._get_service(
|
1017
|
+
service_type=service_type,
|
1018
|
+
model_name=selected_model["model_id"],
|
1019
|
+
provider=selected_model["provider"],
|
1020
|
+
task=task
|
1021
|
+
)
|
1022
|
+
|
1023
|
+
# Step 3: Handle tools for LLM services (bind tools if provided)
|
1024
|
+
tools = kwargs.pop("tools", None)
|
1025
|
+
if service_type == "text" and tools:
|
1026
|
+
service = await self._get_service(
|
1027
|
+
service_type=service_type,
|
1028
|
+
model_name=selected_model["model_id"],
|
1029
|
+
provider=selected_model["provider"],
|
1030
|
+
task=task,
|
1031
|
+
use_cache=False
|
1032
|
+
)
|
1033
|
+
service = service.bind_tools(tools)
|
1034
|
+
# Note: streaming is still supported with tools
|
1035
|
+
|
1036
|
+
# Step 4: Set streaming for text services
|
1037
|
+
if service_type == "text" and stream is not None:
|
1038
|
+
if hasattr(service, 'streaming'):
|
1039
|
+
service.streaming = stream
|
1040
|
+
|
1041
|
+
# Step 5: Execute task with unified interface
|
1042
|
+
result = await self._execute_task(
|
1043
|
+
service=service,
|
1044
|
+
input_data=input_data,
|
733
1045
|
task=task,
|
734
1046
|
service_type=service_type,
|
735
|
-
model_hint=model_hint,
|
736
|
-
provider_hint=provider_hint,
|
737
1047
|
**kwargs
|
738
1048
|
)
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
"
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
else:
|
761
|
-
error_data = await response.text()
|
762
|
-
raise Exception(f"API error {response.status}: {error_data}")
|
763
|
-
|
764
|
-
except Exception as e:
|
765
|
-
logger.error(f"API invoke failed: {e}")
|
766
|
-
raise
|
1049
|
+
|
1050
|
+
# Step 6: Wait for billing tracking to complete, then get billing information
|
1051
|
+
await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
|
1052
|
+
billing_info = self._get_billing_info(service, selected_model["model_id"])
|
1053
|
+
|
1054
|
+
# Return unified response
|
1055
|
+
return {
|
1056
|
+
"success": True,
|
1057
|
+
"result": result,
|
1058
|
+
"metadata": {
|
1059
|
+
"model_used": selected_model["model_id"],
|
1060
|
+
"provider": selected_model["provider"],
|
1061
|
+
"task": task,
|
1062
|
+
"service_type": service_type,
|
1063
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
1064
|
+
"billing": billing_info
|
1065
|
+
}
|
1066
|
+
}
|
1067
|
+
except Exception as e:
|
1068
|
+
logger.error(f"Service invoke failed: {e}")
|
1069
|
+
raise
|
767
1070
|
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
data.add_field('service_type', service_type)
|
785
|
-
|
786
|
-
if model_hint:
|
787
|
-
data.add_field('model_hint', model_hint)
|
788
|
-
if provider_hint:
|
789
|
-
data.add_field('provider_hint', provider_hint)
|
790
|
-
|
791
|
-
data.add_field('file',
|
792
|
-
open(file_path, 'rb'),
|
793
|
-
filename=file_path.name,
|
794
|
-
content_type='application/octet-stream')
|
795
|
-
|
796
|
-
headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
|
797
|
-
|
798
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
799
|
-
try:
|
800
|
-
async with session.post(
|
801
|
-
f"{self.api_url}/api/v1/invoke-file",
|
802
|
-
data=data,
|
803
|
-
headers=headers
|
804
|
-
) as response:
|
1071
|
+
def _get_billing_info(self, service: Any, model_id: str) -> Dict[str, Any]:
|
1072
|
+
"""Extract billing information from service after task execution"""
|
1073
|
+
try:
|
1074
|
+
# Check if service has model_manager with billing_tracker
|
1075
|
+
if hasattr(service, 'model_manager') and hasattr(service.model_manager, 'billing_tracker'):
|
1076
|
+
billing_tracker = service.model_manager.billing_tracker
|
1077
|
+
|
1078
|
+
# Get the latest usage record for this model
|
1079
|
+
model_records = [
|
1080
|
+
record for record in billing_tracker.usage_records
|
1081
|
+
if record.model_id == model_id
|
1082
|
+
]
|
1083
|
+
|
1084
|
+
if model_records:
|
1085
|
+
# Get the most recent record
|
1086
|
+
latest_record = max(model_records, key=lambda r: r.timestamp)
|
805
1087
|
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
1088
|
+
return {
|
1089
|
+
"cost_usd": latest_record.cost_usd,
|
1090
|
+
"input_tokens": latest_record.input_tokens,
|
1091
|
+
"output_tokens": latest_record.output_tokens,
|
1092
|
+
"total_tokens": latest_record.total_tokens,
|
1093
|
+
"operation": latest_record.operation,
|
1094
|
+
"timestamp": latest_record.timestamp,
|
1095
|
+
"currency": "USD"
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
# Fallback: no billing info available
|
1099
|
+
return {
|
1100
|
+
"cost_usd": 0.0,
|
1101
|
+
"input_tokens": None,
|
1102
|
+
"output_tokens": None,
|
1103
|
+
"total_tokens": None,
|
1104
|
+
"operation": None,
|
1105
|
+
"timestamp": None,
|
1106
|
+
"currency": "USD",
|
1107
|
+
"note": "Billing information not available"
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
except Exception as e:
|
1111
|
+
logger.warning(f"Failed to get billing info: {e}")
|
1112
|
+
return {
|
1113
|
+
"cost_usd": 0.0,
|
1114
|
+
"error": str(e),
|
1115
|
+
"currency": "USD"
|
1116
|
+
}
|
815
1117
|
|
816
|
-
async def _invoke_api_binary(
|
817
|
-
self,
|
818
|
-
data: bytes,
|
819
|
-
task: str,
|
820
|
-
service_type: str,
|
821
|
-
model_hint: Optional[str] = None,
|
822
|
-
provider_hint: Optional[str] = None,
|
823
|
-
**kwargs
|
824
|
-
) -> Dict[str, Any]:
|
825
|
-
"""API binary upload"""
|
826
|
-
|
827
|
-
form_data = aiohttp.FormData()
|
828
|
-
form_data.add_field('task', task)
|
829
|
-
form_data.add_field('service_type', service_type)
|
830
|
-
|
831
|
-
if model_hint:
|
832
|
-
form_data.add_field('model_hint', model_hint)
|
833
|
-
if provider_hint:
|
834
|
-
form_data.add_field('provider_hint', provider_hint)
|
835
|
-
|
836
|
-
form_data.add_field('file',
|
837
|
-
data,
|
838
|
-
filename='data.bin',
|
839
|
-
content_type='application/octet-stream')
|
840
|
-
|
841
|
-
headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
|
842
|
-
|
843
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
844
|
-
try:
|
845
|
-
async with session.post(
|
846
|
-
f"{self.api_url}/api/v1/invoke-file",
|
847
|
-
data=form_data,
|
848
|
-
headers=headers
|
849
|
-
) as response:
|
850
|
-
|
851
|
-
if response.status == 200:
|
852
|
-
return await response.json()
|
853
|
-
else:
|
854
|
-
error_data = await response.text()
|
855
|
-
raise Exception(f"API error {response.status}: {error_data}")
|
856
|
-
|
857
|
-
except Exception as e:
|
858
|
-
logger.error(f"API binary upload failed: {e}")
|
859
|
-
raise
|
860
|
-
|
861
|
-
async def _stream_local(
|
862
|
-
self,
|
863
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
864
|
-
task: str,
|
865
|
-
service_type: str,
|
866
|
-
model_hint: Optional[str] = None,
|
867
|
-
provider_hint: Optional[str] = None,
|
868
|
-
tools: Optional[List[Any]] = None,
|
869
|
-
**kwargs
|
870
|
-
):
|
871
|
-
"""Local streaming using AI Factory"""
|
872
|
-
# Step 1: Select best model for this task
|
873
|
-
selected_model = await self._select_model(
|
874
|
-
input_data=input_data,
|
875
|
-
task=task,
|
876
|
-
service_type=service_type,
|
877
|
-
model_hint=model_hint,
|
878
|
-
provider_hint=provider_hint
|
879
|
-
)
|
880
|
-
|
881
|
-
# Step 2: Get appropriate service
|
882
|
-
service = await self._get_service(
|
883
|
-
service_type=service_type,
|
884
|
-
model_name=selected_model["model_id"],
|
885
|
-
provider=selected_model["provider"],
|
886
|
-
task=task,
|
887
|
-
tools=tools
|
888
|
-
)
|
889
|
-
|
890
|
-
# Step 3: Yield tokens from the stream
|
891
|
-
async for token in service.astream(input_data):
|
892
|
-
yield token
|
893
|
-
|
894
|
-
async def _stream_api(
|
895
|
-
self,
|
896
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
897
|
-
task: str,
|
898
|
-
service_type: str,
|
899
|
-
model_hint: Optional[str] = None,
|
900
|
-
provider_hint: Optional[str] = None,
|
901
|
-
**kwargs
|
902
|
-
):
|
903
|
-
"""API streaming using Server-Sent Events (SSE)"""
|
904
|
-
|
905
|
-
# Only support text streaming for now
|
906
|
-
if not isinstance(input_data, (str, dict)):
|
907
|
-
raise ValueError("API streaming only supports text input")
|
908
|
-
|
909
|
-
payload = {
|
910
|
-
"input_data": input_data,
|
911
|
-
"task": task,
|
912
|
-
"service_type": service_type,
|
913
|
-
"model_hint": model_hint,
|
914
|
-
"provider_hint": provider_hint,
|
915
|
-
"stream": True,
|
916
|
-
"parameters": kwargs
|
917
|
-
}
|
918
|
-
|
919
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
920
|
-
try:
|
921
|
-
async with session.post(
|
922
|
-
f"{self.api_url}/api/v1/stream",
|
923
|
-
json=payload,
|
924
|
-
headers=self.headers
|
925
|
-
) as response:
|
926
|
-
|
927
|
-
if response.status == 200:
|
928
|
-
# Parse SSE stream
|
929
|
-
async for line in response.content:
|
930
|
-
if line:
|
931
|
-
line_str = line.decode().strip()
|
932
|
-
if line_str.startswith("data: "):
|
933
|
-
try:
|
934
|
-
# Parse SSE data
|
935
|
-
import json
|
936
|
-
json_str = line_str[6:] # Remove "data: " prefix
|
937
|
-
data = json.loads(json_str)
|
938
|
-
|
939
|
-
if data.get("type") == "token" and "token" in data:
|
940
|
-
yield data["token"]
|
941
|
-
elif data.get("type") == "completion":
|
942
|
-
# End of stream
|
943
|
-
break
|
944
|
-
elif data.get("type") == "error":
|
945
|
-
raise Exception(f"Server error: {data.get('error')}")
|
946
|
-
|
947
|
-
except json.JSONDecodeError:
|
948
|
-
# Skip malformed lines
|
949
|
-
continue
|
950
|
-
else:
|
951
|
-
error_data = await response.text()
|
952
|
-
raise Exception(f"API streaming error {response.status}: {error_data}")
|
953
|
-
|
954
|
-
except Exception as e:
|
955
|
-
logger.error(f"API streaming failed: {e}")
|
956
|
-
raise
|
957
1118
|
|
958
1119
|
|
959
1120
|
# Convenience function for quick access
|
960
1121
|
def create_client(
|
961
1122
|
config: Optional[Dict[str, Any]] = None,
|
962
|
-
|
963
|
-
api_url: Optional[str] = None,
|
1123
|
+
service_endpoint: Optional[str] = None,
|
964
1124
|
api_key: Optional[str] = None
|
965
1125
|
) -> ISAModelClient:
|
966
1126
|
"""Create ISA Model Client instance
|
967
1127
|
|
968
1128
|
Args:
|
969
1129
|
config: Optional configuration
|
970
|
-
|
971
|
-
|
972
|
-
api_key: API key for authentication (optional)
|
1130
|
+
service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
|
1131
|
+
api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
|
973
1132
|
|
974
1133
|
Returns:
|
975
1134
|
ISAModelClient instance
|
976
1135
|
"""
|
977
|
-
return ISAModelClient(config=config,
|
1136
|
+
return ISAModelClient(config=config, service_endpoint=service_endpoint, api_key=api_key)
|
978
1137
|
|
979
1138
|
|
980
1139
|
# Export for easy import
|