isa-model 0.3.91__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. isa_model/client.py +732 -573
  2. isa_model/core/cache/redis_cache.py +401 -0
  3. isa_model/core/config/config_manager.py +53 -10
  4. isa_model/core/config.py +1 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/migrations.py +277 -0
  7. isa_model/core/database/supabase_client.py +123 -0
  8. isa_model/core/models/__init__.py +37 -0
  9. isa_model/core/models/model_billing_tracker.py +60 -88
  10. isa_model/core/models/model_manager.py +36 -18
  11. isa_model/core/models/model_repo.py +44 -38
  12. isa_model/core/models/model_statistics_tracker.py +234 -0
  13. isa_model/core/models/model_storage.py +0 -1
  14. isa_model/core/models/model_version_manager.py +959 -0
  15. isa_model/core/pricing_manager.py +2 -249
  16. isa_model/core/resilience/circuit_breaker.py +366 -0
  17. isa_model/core/security/secrets.py +358 -0
  18. isa_model/core/services/__init__.py +2 -4
  19. isa_model/core/services/intelligent_model_selector.py +101 -370
  20. isa_model/core/storage/hf_storage.py +1 -1
  21. isa_model/core/types.py +7 -0
  22. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  23. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  24. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  25. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  26. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  27. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  28. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  29. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  30. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  31. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  33. isa_model/deployment/core/deployment_manager.py +6 -4
  34. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  35. isa_model/eval/benchmarks/__init__.py +27 -0
  36. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  37. isa_model/eval/benchmarks.py +244 -12
  38. isa_model/eval/evaluators/__init__.py +8 -2
  39. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  40. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  41. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  42. isa_model/eval/example_evaluation.py +395 -0
  43. isa_model/eval/factory.py +272 -5
  44. isa_model/eval/isa_benchmarks.py +700 -0
  45. isa_model/eval/isa_integration.py +582 -0
  46. isa_model/eval/metrics.py +159 -6
  47. isa_model/eval/tests/unit/test_basic.py +396 -0
  48. isa_model/inference/ai_factory.py +44 -8
  49. isa_model/inference/services/audio/__init__.py +21 -0
  50. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  51. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  52. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  53. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  54. isa_model/inference/services/base_service.py +17 -1
  55. isa_model/inference/services/embedding/__init__.py +13 -0
  56. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  57. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  58. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  59. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  60. isa_model/inference/services/img/__init__.py +2 -2
  61. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  62. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  63. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  64. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  65. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  66. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  67. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  68. isa_model/inference/services/llm/base_llm_service.py +30 -6
  69. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  70. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  71. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  72. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  73. isa_model/inference/services/vision/__init__.py +5 -5
  74. isa_model/inference/services/vision/base_vision_service.py +118 -185
  75. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  76. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  77. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  78. isa_model/serving/api/fastapi_server.py +88 -16
  79. isa_model/serving/api/middleware/auth.py +311 -0
  80. isa_model/serving/api/middleware/security.py +278 -0
  81. isa_model/serving/api/routes/analytics.py +486 -0
  82. isa_model/serving/api/routes/deployments.py +339 -0
  83. isa_model/serving/api/routes/evaluations.py +579 -0
  84. isa_model/serving/api/routes/logs.py +430 -0
  85. isa_model/serving/api/routes/settings.py +582 -0
  86. isa_model/serving/api/routes/unified.py +324 -165
  87. isa_model/serving/api/startup.py +304 -0
  88. isa_model/serving/modal_proxy_server.py +249 -0
  89. isa_model/training/__init__.py +100 -6
  90. isa_model/training/core/__init__.py +4 -1
  91. isa_model/training/examples/intelligent_training_example.py +281 -0
  92. isa_model/training/intelligent/__init__.py +25 -0
  93. isa_model/training/intelligent/decision_engine.py +643 -0
  94. isa_model/training/intelligent/intelligent_factory.py +888 -0
  95. isa_model/training/intelligent/knowledge_base.py +751 -0
  96. isa_model/training/intelligent/resource_optimizer.py +839 -0
  97. isa_model/training/intelligent/task_classifier.py +576 -0
  98. isa_model/training/storage/__init__.py +24 -0
  99. isa_model/training/storage/core_integration.py +439 -0
  100. isa_model/training/storage/training_repository.py +552 -0
  101. isa_model/training/storage/training_storage.py +628 -0
  102. {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  103. isa_model-0.4.0.dist-info/RECORD +182 -0
  104. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  105. isa_model/deployment/cloud/modal/register_models.py +0 -321
  106. isa_model/inference/adapter/unified_api.py +0 -248
  107. isa_model/inference/services/helpers/stacked_config.py +0 -148
  108. isa_model/inference/services/img/flux_professional_service.py +0 -603
  109. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  110. isa_model/inference/services/others/table_transformer_service.py +0 -61
  111. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  112. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  113. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  114. isa_model/scripts/inference_tracker.py +0 -283
  115. isa_model/scripts/mlflow_manager.py +0 -379
  116. isa_model/scripts/model_registry.py +0 -465
  117. isa_model/scripts/register_models.py +0 -370
  118. isa_model/scripts/register_models_with_embeddings.py +0 -510
  119. isa_model/scripts/start_mlflow.py +0 -95
  120. isa_model/scripts/training_tracker.py +0 -257
  121. isa_model-0.3.91.dist-info/RECORD +0 -138
  122. {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  123. {isa_model-0.3.91.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
isa_model/client.py CHANGED
@@ -2,15 +2,78 @@
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
4
  """
5
- ISA Model Client - Unified interface for all AI services
6
- Provides intelligent model selection and simplified API
5
+ ISA Model Client - Unified AI Service Interface
6
+ ===============================================
7
+
8
+ 功能描述:
9
+ ISA Model平台的统一客户端接口,提供智能模型选择和简化的API调用
10
+
11
+ 主要功能:
12
+ - 多模态AI服务统一接口:文本、视觉、音频、图像生成、嵌入向量
13
+ - 智能模型自动选择:基于任务类型和输入数据自动选择最适合的模型
14
+ - 流式响应支持:支持实时流式文本生成,提供更好的用户体验
15
+ - 远程/本地服务:支持本地服务调用和远程API调用两种模式
16
+ - 成本跟踪:自动计算和跟踪API调用成本
17
+ - 工具支持:支持LangChain工具集成,扩展模型能力
18
+ - 缓存机制:服务实例缓存,提高性能
19
+
20
+ 输入接口:
21
+ - input_data: 多类型输入数据(文本、图像路径、音频文件、字节数据等)
22
+ - task: 任务类型(chat, analyze, generate_speech, transcribe等)
23
+ - service_type: 服务类型(text, vision, audio, image, embedding)
24
+ - model: 可选模型名称(如不指定则智能选择)
25
+ - provider: 可选提供商名称(openai, ollama, replicate等)
26
+
27
+ 输出格式:
28
+ - 统一响应字典,包含result和metadata
29
+ - 流式响应:包含stream异步生成器
30
+ - 非流式响应:包含result结果数据
31
+ - metadata:包含模型信息、计费信息、选择原因等
32
+
33
+ 核心依赖:
34
+ - isa_model.inference.ai_factory: AI服务工厂
35
+ - isa_model.core.services.intelligent_model_selector: 智能模型选择器
36
+ - aiohttp: HTTP客户端(远程API模式)
37
+ - asyncio: 异步编程支持
38
+
39
+ 使用示例:
40
+ ```python
41
+ # 创建客户端
42
+ client = ISAModelClient()
43
+
44
+ # 流式文本生成
45
+ result = await client.invoke("写一个故事", "chat", "text")
46
+ async for token in result["stream"]:
47
+ print(token, end="", flush=True)
48
+
49
+ # 图像分析
50
+ result = await client.invoke("image.jpg", "analyze", "vision")
51
+ print(result["result"])
52
+
53
+ # 语音合成
54
+ result = await client.invoke("Hello world", "generate_speech", "audio")
55
+ print(result["result"])
56
+ ```
57
+
58
+ 架构特点:
59
+ - 单例模式:确保配置一致性
60
+ - 异步支持:所有操作都是异步的
61
+ - 错误处理:统一的错误处理和响应格式
62
+ - 可扩展性:支持新的服务提供商和模型
63
+
64
+ 优化建议:
65
+ 1. 增加请求重试机制:处理网络不稳定情况
66
+ 2. 添加请求限流:避免超出API限制
67
+ 3. 优化缓存策略:支持LRU缓存和TTL过期
68
+ 4. 增加监控指标:记录延迟、成功率等指标
69
+ 5. 支持批处理:提高大量请求的处理效率
70
+ 6. 添加配置验证:启动时验证API密钥和配置
7
71
  """
8
72
 
9
73
  import logging
10
74
  import asyncio
11
75
  from typing import Any, Dict, Optional, List, Union
12
76
  from pathlib import Path
13
- import aiohttp
14
77
 
15
78
  from isa_model.inference.ai_factory import AIFactory
16
79
 
@@ -36,41 +99,104 @@ class ISAModelClient:
36
99
  response = await client.invoke("audio.mp3", "transcribe", "audio")
37
100
  """
38
101
 
102
+ # Consolidated task mappings for all service types
103
+ TASK_MAPPINGS = {
104
+ "vision": {
105
+ # Core tasks (direct mapping)
106
+ "analyze": "analyze",
107
+ "describe": "describe",
108
+ "extract": "extract",
109
+ "detect": "detect",
110
+ "classify": "classify",
111
+ "compare": "compare",
112
+
113
+ # Common aliases (backward compatibility)
114
+ "analyze_image": "analyze",
115
+ "describe_image": "describe",
116
+ "extract_text": "extract",
117
+ "extract_table": "extract",
118
+ "detect_objects": "detect",
119
+ "detect_ui": "detect",
120
+ "detect_ui_elements": "detect",
121
+ "get_coordinates": "detect",
122
+ "ocr": "extract",
123
+ "ui_analysis": "analyze",
124
+ "navigation": "analyze"
125
+ },
126
+ "audio": {
127
+ "generate_speech": "synthesize",
128
+ "text_to_speech": "synthesize",
129
+ "tts": "synthesize",
130
+ "transcribe": "transcribe",
131
+ "speech_to_text": "transcribe",
132
+ "stt": "transcribe",
133
+ "translate": "translate",
134
+ "detect_language": "detect_language"
135
+ },
136
+ "text": {
137
+ "chat": "chat",
138
+ "generate": "generate",
139
+ "complete": "complete",
140
+ "translate": "translate",
141
+ "summarize": "summarize",
142
+ "analyze": "analyze",
143
+ "extract": "extract",
144
+ "classify": "classify"
145
+ },
146
+ "image": {
147
+ "generate_image": "generate",
148
+ "generate": "generate",
149
+ "img2img": "img2img",
150
+ "image_to_image": "img2img",
151
+ "generate_batch": "generate_batch"
152
+ },
153
+ "embedding": {
154
+ "create_embedding": "embed",
155
+ "embed": "embed",
156
+ "embed_batch": "embed_batch",
157
+ "chunk_and_embed": "chunk_and_embed",
158
+ "similarity": "similarity",
159
+ "find_similar": "find_similar",
160
+ "rerank": "rerank",
161
+ "rerank_documents": "rerank_documents",
162
+ "document_ranking": "document_ranking"
163
+ }
164
+ }
165
+
166
+ # Service type configuration
167
+ SUPPORTED_SERVICE_TYPES = {"vision", "audio", "text", "image", "embedding"}
168
+
39
169
  def __init__(self,
40
170
  config: Optional[Dict[str, Any]] = None,
41
- mode: str = "local",
42
- api_url: Optional[str] = None,
171
+ service_endpoint: Optional[str] = None,
43
172
  api_key: Optional[str] = None):
44
173
  """Initialize ISA Model Client
45
174
 
46
175
  Args:
47
176
  config: Optional configuration override
48
- mode: "local" for direct AI Factory, "api" for HTTP API calls
49
- api_url: API base URL (required if mode="api")
50
- api_key: API key for authentication (optional)
177
+ service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
178
+ api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
51
179
  """
52
180
  self.config = config or {}
53
- self.mode = mode
54
- self.api_url = api_url.rstrip('/') if api_url else None
55
- self.api_key = api_key
181
+ self.service_endpoint = service_endpoint
56
182
 
57
- # Setup HTTP headers for API mode
58
- if self.mode == "api":
59
- if not self.api_url:
60
- raise ValueError("api_url is required when mode='api'")
61
-
62
- self.headers = {
63
- "Content-Type": "application/json",
64
- "User-Agent": "ISA-Model-Client/1.0.0"
65
- }
66
- if self.api_key:
67
- self.headers["Authorization"] = f"Bearer {self.api_key}"
183
+ # Handle API key authentication
184
+ import os
185
+ self.api_key = api_key or os.getenv("ISA_API_KEY")
186
+ if self.api_key:
187
+ logger.info("API key provided for authentication")
188
+ else:
189
+ logger.debug("No API key provided - using anonymous access")
68
190
 
69
- # Initialize AI Factory for local mode
70
- if self.mode == "local":
191
+ # Initialize AI Factory for direct service access (when service_endpoint is None)
192
+ if not self.service_endpoint:
71
193
  self.ai_factory = AIFactory.get_instance()
72
194
  else:
73
195
  self.ai_factory = None
196
+ logger.info(f"Using remote service endpoint: {self.service_endpoint}")
197
+
198
+ # HTTP client for remote API calls
199
+ self._http_session = None
74
200
 
75
201
  # Initialize intelligent model selector
76
202
  self.model_selector = None
@@ -89,166 +215,352 @@ class ISAModelClient:
89
215
 
90
216
  logger.info("ISA Model Client initialized")
91
217
 
92
- async def stream(
218
+ async def _get_http_session(self):
219
+ """Get or create HTTP session for remote API calls"""
220
+ if self._http_session is None:
221
+ import aiohttp
222
+ headers = {}
223
+
224
+ # Add API key authentication if available
225
+ if self.api_key:
226
+ headers["Authorization"] = f"Bearer {self.api_key}"
227
+ headers["X-API-Key"] = self.api_key
228
+
229
+ self._http_session = aiohttp.ClientSession(headers=headers)
230
+
231
+ return self._http_session
232
+
233
+ async def _make_api_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
234
+ """Make HTTP request to remote API endpoint"""
235
+ if not self.service_endpoint:
236
+ raise ValueError("Service endpoint not configured for remote API calls")
237
+
238
+ session = await self._get_http_session()
239
+ url = f"{self.service_endpoint.rstrip('/')}/{endpoint.lstrip('/')}"
240
+
241
+ try:
242
+ async with session.post(url, json=data) as response:
243
+ if response.status == 401:
244
+ raise Exception("Authentication required or invalid API key")
245
+ elif response.status == 403:
246
+ raise Exception("Insufficient permissions")
247
+ elif not response.ok:
248
+ error_detail = await response.text()
249
+ raise Exception(f"API request failed ({response.status}): {error_detail}")
250
+
251
+ return await response.json()
252
+
253
+ except Exception as e:
254
+ logger.error(f"Remote API request failed: {e}")
255
+ raise
256
+
257
+ async def close(self):
258
+ """Close HTTP session and cleanup resources"""
259
+ if self._http_session:
260
+ await self._http_session.close()
261
+ self._http_session = None
262
+
263
+ async def _invoke_remote_api(
93
264
  self,
94
- input_data: Union[str, bytes, Path, Dict[str, Any]],
95
- task: str,
265
+ input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
266
+ task: str,
96
267
  service_type: str,
97
- model_hint: Optional[str] = None,
98
- provider_hint: Optional[str] = None,
268
+ model: Optional[str] = None,
269
+ provider: Optional[str] = None,
270
+ stream: Optional[bool] = None,
99
271
  **kwargs
100
- ):
101
- """
102
- Streaming invoke method that yields tokens in real-time
103
-
104
- Args:
105
- input_data: Input data (text for LLM streaming)
106
- task: Task to perform
107
- service_type: Type of service (only "text" supports streaming)
108
- model_hint: Optional model preference
109
- provider_hint: Optional provider preference
110
- **kwargs: Additional parameters
111
-
112
- Yields:
113
- Individual tokens as they arrive from the model
114
-
115
- Example:
116
- async for token in client.stream("Hello world", "chat", "text"):
117
- print(token, end="", flush=True)
118
- """
119
- if service_type != "text":
120
- raise ValueError("Streaming is only supported for text/LLM services")
121
-
272
+ ) -> Dict[str, Any]:
273
+ """Invoke remote API endpoint"""
122
274
  try:
123
- if self.mode == "api":
124
- async for token in self._stream_api(input_data, task, service_type, model_hint, provider_hint, **kwargs):
125
- yield token
275
+ # Prepare request data for unified API
276
+ request_data = {
277
+ "task": task,
278
+ "service_type": service_type,
279
+ **kwargs
280
+ }
281
+
282
+ # Add model and provider if specified
283
+ if model:
284
+ request_data["model"] = model
285
+ if provider:
286
+ request_data["provider"] = provider
287
+ # For remote API, disable streaming to get JSON response
288
+ request_data["stream"] = False
289
+
290
+ # Handle different input data types
291
+ if isinstance(input_data, (str, Path)):
292
+ request_data["input_data"] = str(input_data)
293
+ elif isinstance(input_data, (dict, list)):
294
+ request_data["input_data"] = input_data
126
295
  else:
127
- async for token in self._stream_local(input_data, task, service_type, model_hint, provider_hint, **kwargs):
128
- yield token
296
+ # For binary data, convert to base64
297
+ import base64
298
+ if isinstance(input_data, bytes):
299
+ request_data["input_data"] = base64.b64encode(input_data).decode()
300
+ request_data["data_type"] = "base64"
301
+ else:
302
+ request_data["input_data"] = str(input_data)
303
+
304
+ # Make API request
305
+ response = await self._make_api_request("api/v1/invoke", request_data)
306
+
307
+ return response
308
+
129
309
  except Exception as e:
130
- logger.error(f"Failed to stream {task} on {service_type}: {e}")
131
- raise
132
-
310
+ logger.error(f"Remote API invocation failed: {e}")
311
+ return {
312
+ "success": False,
313
+ "error": str(e),
314
+ "metadata": {
315
+ "task": task,
316
+ "service_type": service_type,
317
+ "endpoint": "remote"
318
+ }
319
+ }
320
+
133
321
  async def invoke(
134
322
  self,
135
- input_data: Union[str, bytes, Path, Dict[str, Any]],
323
+ input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
136
324
  task: str,
137
325
  service_type: str,
138
- model_hint: Optional[str] = None,
139
- provider_hint: Optional[str] = None,
140
- stream: bool = False,
141
- tools: Optional[List[Any]] = None,
326
+ model: Optional[str] = None,
327
+ provider: Optional[str] = None,
328
+ stream: Optional[bool] = None,
329
+ show_reasoning: Optional[bool] = False,
142
330
  **kwargs
143
- ) -> Union[Dict[str, Any], object]:
331
+ ) -> Dict[str, Any]:
144
332
  """
145
333
  Unified invoke method with intelligent model selection
146
334
 
147
335
  Args:
148
- input_data: Input data (image path, text, audio, etc.)
149
- task: Task to perform (analyze_image, generate_speech, transcribe, etc.)
150
- service_type: Type of service (vision, audio, text, image, embedding)
151
- model_hint: Optional model preference
152
- provider_hint: Optional provider preference
153
- stream: Enable streaming for text services (returns AsyncGenerator)
154
- tools: Optional list of tools for function calling (only for text services)
155
- **kwargs: Additional task-specific parameters
336
+ input_data: Input data (str, LangChain messages, image path, audio, etc.)
337
+ task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
338
+ service_type: Type of service (text, vision, audio, image, embedding)
339
+ model: Model name (if None, uses intelligent selection)
340
+ provider: Provider name (if None, uses intelligent selection)
341
+ stream: Enable streaming for text tasks (default True for chat/generate tasks, supports tools)
342
+ show_reasoning: Show reasoning process for O4 models (uses Responses API)
343
+ **kwargs: Additional task-specific parameters (including tools for LangChain)
156
344
 
157
345
  Returns:
158
- If stream=False: Unified response dictionary with result and metadata
159
- If stream=True: AsyncGenerator yielding tokens (only for text services)
346
+ Unified response dictionary with result and metadata
347
+ For streaming: result["stream"] contains async generator
348
+ For non-streaming: result["result"] contains the response
160
349
 
161
350
  Examples:
162
- # Vision tasks
163
- await client.invoke("image.jpg", "analyze_image", "vision")
164
- await client.invoke("screenshot.png", "detect_ui_elements", "vision")
165
- await client.invoke("document.pdf", "extract_table", "vision")
166
-
167
- # Audio tasks
168
- await client.invoke("Hello world", "generate_speech", "audio")
169
- await client.invoke("audio.mp3", "transcribe", "audio")
170
-
171
- # Text tasks
172
- await client.invoke("Translate this text", "translate", "text")
173
- await client.invoke("What is AI?", "chat", "text")
351
+ # Text tasks with streaming (default for chat)
352
+ result = await client.invoke("Write a story", "chat", "text")
353
+ if "stream" in result:
354
+ async for chunk in result["stream"]:
355
+ print(chunk, end="", flush=True)
356
+ else:
357
+ print(result["result"])
174
358
 
175
- # Streaming text
176
- async for token in await client.invoke("Hello", "chat", "text", stream=True):
177
- print(token, end="", flush=True)
359
+ # Text tasks with tools (also supports streaming)
360
+ result = await client.invoke("What's the weather?", "chat", "text", tools=[get_weather])
361
+ if "stream" in result:
362
+ async for chunk in result["stream"]:
363
+ print(chunk, end="", flush=True)
364
+ else:
365
+ print(result["result"])
178
366
 
179
- # Text with tools
180
- await client.invoke("What's 5+3?", "chat", "text", tools=[calculator_function])
367
+ # Vision tasks (always non-streaming)
368
+ result = await client.invoke("image.jpg", "analyze", "vision")
369
+ print(result["result"])
181
370
 
182
- # Streaming with tools
183
- async for token in await client.invoke("What's 5+3?", "chat", "text", stream=True, tools=[calculator_function]):
184
- print(token, end="")
371
+ # Audio tasks
372
+ result = await client.invoke("Hello world", "generate_speech", "audio")
373
+ print(result["result"])
185
374
 
186
375
  # Image generation
187
- await client.invoke("A beautiful sunset", "generate_image", "image")
376
+ result = await client.invoke("A beautiful sunset", "generate_image", "image")
377
+ print(result["result"])
188
378
 
189
379
  # Embedding
190
- await client.invoke("Text to embed", "create_embedding", "embedding")
380
+ result = await client.invoke("Text to embed", "create_embedding", "embedding")
381
+ print(result["result"])
191
382
  """
192
383
  try:
193
- # Handle streaming case
194
- if stream:
195
- if service_type != "text":
196
- raise ValueError("Streaming is only supported for text services")
197
-
198
- if self.mode == "api":
199
- return self._stream_api(
200
- input_data=input_data,
201
- task=task,
202
- service_type=service_type,
203
- model_hint=model_hint,
204
- provider_hint=provider_hint,
205
- tools=tools,
206
- **kwargs
207
- )
384
+ # If using remote service endpoint, make API call
385
+ if self.service_endpoint:
386
+ return await self._invoke_remote_api(
387
+ input_data=input_data,
388
+ task=task,
389
+ service_type=service_type,
390
+ model=model,
391
+ provider=provider,
392
+ stream=stream,
393
+ **kwargs
394
+ )
395
+
396
+ # Set default streaming for text tasks
397
+ if stream is None and service_type == "text":
398
+ if task in ["chat", "generate"]:
399
+ stream = True # Enable streaming for chat and generate tasks
208
400
  else:
209
- return self._stream_local(
210
- input_data=input_data,
211
- task=task,
212
- service_type=service_type,
213
- model_hint=model_hint,
214
- provider_hint=provider_hint,
215
- tools=tools,
216
- **kwargs
217
- )
401
+ stream = False # Disable for other text tasks
218
402
 
219
- # Route to appropriate mode for non-streaming
220
- if self.mode == "api":
221
- return await self._invoke_api(
403
+ # If streaming is enabled for text tasks, return streaming response
404
+ if stream and service_type == "text":
405
+ return await self._invoke_service_streaming(
222
406
  input_data=input_data,
223
407
  task=task,
224
408
  service_type=service_type,
225
- model_hint=model_hint,
226
- provider_hint=provider_hint,
227
- tools=tools,
409
+ model_hint=model,
410
+ provider_hint=provider,
411
+ show_reasoning=show_reasoning, # Explicitly pass show_reasoning
228
412
  **kwargs
229
413
  )
230
414
  else:
231
- return await self._invoke_local(
415
+ # Use regular non-streaming service
416
+ return await self._invoke_service(
232
417
  input_data=input_data,
233
418
  task=task,
234
419
  service_type=service_type,
235
- model_hint=model_hint,
236
- provider_hint=provider_hint,
237
- tools=tools,
420
+ model_hint=model,
421
+ provider_hint=provider,
422
+ stream=False, # Force non-streaming
238
423
  **kwargs
239
424
  )
240
425
 
241
426
  except Exception as e:
242
- logger.error(f"Failed to invoke {task} on {service_type}: {e}")
243
- return {
244
- "success": False,
245
- "error": str(e),
246
- "metadata": {
247
- "task": task,
248
- "service_type": service_type,
249
- "input_type": type(input_data).__name__
250
- }
251
- }
427
+ return self._handle_error(e, {
428
+ "operation": "invoke",
429
+ "task": task,
430
+ "service_type": service_type,
431
+ "input_type": type(input_data).__name__
432
+ })
433
+
434
+ async def invoke_stream(
435
+ self,
436
+ input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
437
+ task: str,
438
+ service_type: str,
439
+ model: Optional[str] = None,
440
+ provider: Optional[str] = None,
441
+ return_metadata: bool = False,
442
+ **kwargs
443
+ ):
444
+ """
445
+ Unified streaming invoke method - returns async generator for real-time token streaming
446
+
447
+ Args:
448
+ input_data: Input data (str, LangChain messages, image path, audio, etc.)
449
+ task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
450
+ service_type: Type of service (text, vision, audio, image, embedding)
451
+ model: Model name (if None, uses intelligent selection)
452
+ provider: Provider name (if None, uses intelligent selection)
453
+ return_metadata: If True, yields ('metadata', metadata_dict) as final item
454
+ **kwargs: Additional task-specific parameters (including tools for LangChain)
455
+
456
+ Returns:
457
+ For text services: AsyncGenerator[Union[str, Tuple[str, Dict]], None] - yields tokens as they arrive
458
+ - Normal items: token strings
459
+ - Final item (if return_metadata=True): ('metadata', metadata_dict) with billing info
460
+ For other services: Raises ValueError (streaming not supported)
461
+
462
+ Examples:
463
+ # Simple streaming
464
+ async for token in client.invoke_stream("Hello!", "chat", "text"):
465
+ print(token, end='', flush=True)
466
+
467
+ # Streaming with metadata
468
+ async for item in client.invoke_stream("Hello!", "chat", "text", return_metadata=True):
469
+ if isinstance(item, tuple) and item[0] == 'metadata':
470
+ print(f"\nBilling: {item[1]['billing']}")
471
+ else:
472
+ print(item, end='', flush=True)
473
+ """
474
+ try:
475
+ # Only text services support streaming
476
+ if service_type != "text":
477
+ raise ValueError(f"Streaming not supported for service type: {service_type}")
478
+
479
+ # Tools are supported with streaming
480
+
481
+ # Step 1: Select best model for this task
482
+ selected_model = await self._select_model(
483
+ input_data=input_data,
484
+ task=task,
485
+ service_type=service_type,
486
+ model_hint=model,
487
+ provider_hint=provider
488
+ )
489
+
490
+ # Step 2: Get appropriate service
491
+ service = await self._get_service(
492
+ service_type=service_type,
493
+ model_name=selected_model["model_id"],
494
+ provider=selected_model["provider"],
495
+ task=task,
496
+ use_cache=False # Don't cache for streaming to avoid state issues
497
+ )
498
+
499
+ # Step 3: Ensure service supports streaming
500
+ if not hasattr(service, 'astream'):
501
+ raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
502
+
503
+ # Step 4: Enable streaming on the service
504
+ if hasattr(service, 'streaming'):
505
+ service.streaming = True
506
+
507
+ # Step 5: Stream tokens and collect for billing
508
+ content_chunks = []
509
+ async for token in service.astream(input_data):
510
+ content_chunks.append(token)
511
+ yield token
512
+
513
+ # Step 6: After streaming is complete, calculate billing info and optionally return metadata
514
+ try:
515
+ await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
516
+
517
+ # Get billing info (similar to _invoke_service)
518
+ billing_info = self._get_billing_info(service, selected_model["model_id"])
519
+
520
+ # Log billing info for tracking
521
+ logger.info(f"Streaming completed - Model: {selected_model['model_id']}, "
522
+ f"Tokens: {billing_info.get('total_tokens', 'N/A')}, "
523
+ f"Cost: ${billing_info.get('cost_usd', 0):.4f}")
524
+
525
+ # Return metadata if requested
526
+ if return_metadata:
527
+ metadata = {
528
+ "model_used": selected_model["model_id"],
529
+ "provider": selected_model["provider"],
530
+ "task": task,
531
+ "service_type": service_type,
532
+ "selection_reason": selected_model.get("reason", "Default selection"),
533
+ "billing": billing_info,
534
+ "streaming": True,
535
+ "tokens_streamed": len(content_chunks),
536
+ "content_length": len("".join(content_chunks))
537
+ }
538
+ yield ('metadata', metadata)
539
+
540
+ except Exception as billing_error:
541
+ logger.warning(f"Failed to track billing for streaming: {billing_error}")
542
+ if return_metadata:
543
+ # Return fallback metadata even if billing fails
544
+ fallback_metadata = {
545
+ "model_used": selected_model["model_id"],
546
+ "provider": selected_model["provider"],
547
+ "task": task,
548
+ "service_type": service_type,
549
+ "selection_reason": selected_model.get("reason", "Default selection"),
550
+ "billing": {
551
+ "cost_usd": 0.0,
552
+ "error": str(billing_error),
553
+ "currency": "USD"
554
+ },
555
+ "streaming": True,
556
+ "tokens_streamed": len(content_chunks),
557
+ "content_length": len("".join(content_chunks))
558
+ }
559
+ yield ('metadata', fallback_metadata)
560
+
561
+ except Exception as e:
562
+ logger.error(f"Streaming invoke failed: {e}")
563
+ raise
252
564
 
253
565
  async def _select_model(
254
566
  self,
@@ -268,8 +580,26 @@ class ISAModelClient:
268
580
  "reason": "User specified"
269
581
  }
270
582
 
583
+ # If model_hint provided but no provider_hint, handle special cases
584
+ if model_hint:
585
+ # Special handling for hybrid service
586
+ if model_hint == "hybrid":
587
+ return {
588
+ "model_id": model_hint,
589
+ "provider": "hybrid",
590
+ "reason": "Hybrid service requested"
591
+ }
592
+ # If only model_hint provided, use default provider for that service type
593
+ elif provider_hint is None:
594
+ default_provider = self._get_default_provider(service_type)
595
+ return {
596
+ "model_id": model_hint,
597
+ "provider": default_provider,
598
+ "reason": "Model specified with default provider"
599
+ }
600
+
271
601
  # Use intelligent model selector if available
272
- if INTELLIGENT_SELECTOR_AVAILABLE:
602
+ if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
273
603
  try:
274
604
  # Initialize model selector if not already done
275
605
  if self.model_selector is None:
@@ -304,6 +634,17 @@ class ISAModelClient:
304
634
  # Fallback to default model selection
305
635
  return self._get_default_model(service_type, task, provider_hint)
306
636
 
637
+ def _get_default_provider(self, service_type: str) -> str:
638
+ """Get default provider for service type"""
639
+ defaults = {
640
+ "vision": "openai",
641
+ "audio": "openai",
642
+ "text": "openai",
643
+ "image": "replicate",
644
+ "embedding": "openai"
645
+ }
646
+ return defaults.get(service_type, "openai")
647
+
307
648
  def _get_default_model(
308
649
  self,
309
650
  service_type: str,
@@ -314,7 +655,7 @@ class ISAModelClient:
314
655
 
315
656
  defaults = {
316
657
  "vision": {
317
- "model_id": "gpt-4o-mini",
658
+ "model_id": "gpt-4.1-nano",
318
659
  "provider": "openai"
319
660
  },
320
661
  "audio": {
@@ -323,7 +664,7 @@ class ISAModelClient:
323
664
  "default": {"model_id": "whisper-1", "provider": "openai"}
324
665
  },
325
666
  "text": {
326
- "model_id": "gpt-4.1-mini",
667
+ "model_id": "gpt-4.1-nano",
327
668
  "provider": "openai"
328
669
  },
329
670
  "image": {
@@ -331,8 +672,9 @@ class ISAModelClient:
331
672
  "provider": "replicate"
332
673
  },
333
674
  "embedding": {
334
- "model_id": "text-embedding-3-small",
335
- "provider": "openai"
675
+ "embed": {"model_id": "text-embedding-3-small", "provider": "openai"},
676
+ "rerank": {"model_id": "isa-jina-reranker-v2-service", "provider": "isa"},
677
+ "default": {"model_id": "text-embedding-3-small", "provider": "openai"}
336
678
  }
337
679
  }
338
680
 
@@ -344,6 +686,14 @@ class ISAModelClient:
344
686
  default = defaults["audio"]["stt"]
345
687
  else:
346
688
  default = defaults["audio"]["default"]
689
+ # Handle embedding service type with task-specific models
690
+ elif service_type == "embedding":
691
+ if "rerank" in task:
692
+ default = defaults["embedding"]["rerank"]
693
+ elif "embed" in task:
694
+ default = defaults["embedding"]["embed"]
695
+ else:
696
+ default = defaults["embedding"]["default"]
347
697
  else:
348
698
  default = defaults.get(service_type, defaults["vision"])
349
699
 
@@ -363,59 +713,56 @@ class ISAModelClient:
363
713
  model_name: str,
364
714
  provider: str,
365
715
  task: str,
366
- tools: Optional[List[Any]] = None
716
+ use_cache: bool = True
367
717
  ) -> Any:
368
718
  """Get appropriate service instance"""
369
719
 
370
720
  cache_key = f"{service_type}_{provider}_{model_name}"
371
721
 
372
- # Check cache first
373
- if cache_key in self._service_cache:
374
- service = self._service_cache[cache_key]
375
- # If tools are needed, bind them to the service
376
- if tools and service_type == "text":
377
- return service.bind_tools(tools)
378
- return service
722
+ # Check cache first (if caching is enabled)
723
+ if use_cache and cache_key in self._service_cache:
724
+ return self._service_cache[cache_key]
379
725
 
380
726
  try:
727
+ # Validate service type
728
+ self._validate_service_type(service_type)
729
+
381
730
  # Route to appropriate AIFactory method
382
731
  if service_type == "vision":
383
732
  service = self.ai_factory.get_vision(model_name, provider)
384
-
385
733
  elif service_type == "audio":
386
734
  if "speech" in task or "tts" in task:
387
735
  service = self.ai_factory.get_tts(model_name, provider)
388
736
  elif "transcribe" in task or "stt" in task:
389
737
  service = self.ai_factory.get_stt(model_name, provider)
390
738
  else:
391
- # Default to STT for unknown audio tasks
392
739
  service = self.ai_factory.get_stt(model_name, provider)
393
-
394
740
  elif service_type == "text":
395
741
  service = self.ai_factory.get_llm(model_name, provider)
396
-
397
742
  elif service_type == "image":
398
743
  service = self.ai_factory.get_img("t2i", model_name, provider)
399
-
400
744
  elif service_type == "embedding":
401
745
  service = self.ai_factory.get_embed(model_name, provider)
402
746
 
403
- else:
404
- raise ValueError(f"Unsupported service type: {service_type}")
405
-
406
- # Cache the service
407
- self._service_cache[cache_key] = service
408
-
409
- # If tools are needed, bind them to the service
410
- if tools and service_type == "text":
411
- return service.bind_tools(tools)
412
-
747
+ # Cache the service (if caching is enabled)
748
+ if use_cache:
749
+ self._service_cache[cache_key] = service
413
750
  return service
414
751
 
415
752
  except Exception as e:
416
753
  logger.error(f"Failed to get service {service_type}/{provider}/{model_name}: {e}")
417
754
  raise
418
755
 
756
+ def _validate_service_type(self, service_type: str) -> None:
757
+ """Validate service type is supported"""
758
+ if service_type not in self.SUPPORTED_SERVICE_TYPES:
759
+ raise ValueError(f"Unsupported service type: {service_type}")
760
+
761
+ def _map_task(self, task: str, service_type: str) -> str:
762
+ """Map common task names to unified task names"""
763
+ task_mapping = self.TASK_MAPPINGS.get(service_type, {})
764
+ return task_mapping.get(task, task)
765
+
419
766
  async def _execute_task(
420
767
  self,
421
768
  service: Any,
@@ -427,166 +774,69 @@ class ISAModelClient:
427
774
  """Execute the task using the appropriate service"""
428
775
 
429
776
  try:
777
+ self._validate_service_type(service_type)
778
+ unified_task = self._map_task(task, service_type)
779
+
430
780
  if service_type == "vision":
431
- return await self._execute_vision_task(service, input_data, task, **kwargs)
781
+ return await service.invoke(
782
+ image=input_data,
783
+ task=unified_task,
784
+ **kwargs
785
+ )
432
786
 
433
787
  elif service_type == "audio":
434
- return await self._execute_audio_task(service, input_data, task, **kwargs)
788
+ if unified_task in ["synthesize", "text_to_speech", "tts"]:
789
+ return await service.invoke(
790
+ text=input_data,
791
+ task=unified_task,
792
+ **kwargs
793
+ )
794
+ else:
795
+ return await service.invoke(
796
+ audio_input=input_data,
797
+ task=unified_task,
798
+ **kwargs
799
+ )
435
800
 
436
801
  elif service_type == "text":
437
- return await self._execute_text_task(service, input_data, task, **kwargs)
802
+ # Extract show_reasoning from kwargs if present
803
+ show_reasoning = kwargs.pop('show_reasoning', False)
804
+ result = await service.invoke(
805
+ input_data=input_data,
806
+ task=unified_task,
807
+ show_reasoning=show_reasoning,
808
+ **kwargs
809
+ )
810
+
811
+ logger.debug(f"Service result type: {type(result)}")
812
+ logger.debug(f"Service result: {result}")
813
+
814
+ if isinstance(result, dict) and 'message' in result:
815
+ message = result['message']
816
+ logger.debug(f"Extracted message type: {type(message)}")
817
+ logger.debug(f"Extracted message: {message}")
818
+ return message
819
+ else:
820
+ return result
438
821
 
439
822
  elif service_type == "image":
440
- return await self._execute_image_task(service, input_data, task, **kwargs)
823
+ return await service.invoke(
824
+ prompt=input_data,
825
+ task=unified_task,
826
+ **kwargs
827
+ )
441
828
 
442
829
  elif service_type == "embedding":
443
- return await self._execute_embedding_task(service, input_data, task, **kwargs)
444
-
445
- else:
446
- raise ValueError(f"Unsupported service type: {service_type}")
830
+ return await service.invoke(
831
+ input_data=input_data,
832
+ task=unified_task,
833
+ **kwargs
834
+ )
447
835
 
448
836
  except Exception as e:
449
837
  logger.error(f"Task execution failed: {e}")
450
838
  raise
451
839
 
452
- async def _execute_vision_task(self, service, input_data, task, **kwargs):
453
- """Execute vision-related tasks using unified invoke method"""
454
-
455
- # Map common task names to unified task names
456
- task_mapping = {
457
- "analyze_image": "analyze_image",
458
- "detect_ui_elements": "detect_ui",
459
- "extract_table": "extract_table",
460
- "extract_text": "extract_text",
461
- "ocr": "extract_text",
462
- "describe": "analyze_image"
463
- }
464
-
465
- unified_task = task_mapping.get(task, task)
466
-
467
- # Use unified invoke method with proper parameters
468
- return await service.invoke(
469
- image=input_data,
470
- task=unified_task,
471
- **kwargs
472
- )
473
-
474
- async def _execute_audio_task(self, service, input_data, task, **kwargs):
475
- """Execute audio-related tasks using unified invoke method"""
476
-
477
- # Map common task names to unified task names
478
- task_mapping = {
479
- "generate_speech": "synthesize",
480
- "text_to_speech": "synthesize",
481
- "tts": "synthesize",
482
- "transcribe": "transcribe",
483
- "speech_to_text": "transcribe",
484
- "stt": "transcribe",
485
- "translate": "translate",
486
- "detect_language": "detect_language"
487
- }
488
-
489
- unified_task = task_mapping.get(task, task)
490
-
491
- # Use unified invoke method with correct parameter name based on task type
492
- if unified_task in ["synthesize", "text_to_speech", "tts"]:
493
- # TTS services expect 'text' parameter
494
- return await service.invoke(
495
- text=input_data,
496
- task=unified_task,
497
- **kwargs
498
- )
499
- else:
500
- # STT services expect 'audio_input' parameter
501
- return await service.invoke(
502
- audio_input=input_data,
503
- task=unified_task,
504
- **kwargs
505
- )
506
-
507
- async def _execute_text_task(self, service, input_data, task, **kwargs):
508
- """Execute text-related tasks using unified invoke method"""
509
-
510
- # Map common task names to unified task names
511
- task_mapping = {
512
- "chat": "chat",
513
- "generate": "generate",
514
- "complete": "complete",
515
- "translate": "translate",
516
- "summarize": "summarize",
517
- "analyze": "analyze",
518
- "extract": "extract",
519
- "classify": "classify"
520
- }
521
-
522
- unified_task = task_mapping.get(task, task)
523
-
524
- # Use unified invoke method
525
- result = await service.invoke(
526
- input_data=input_data,
527
- task=unified_task,
528
- **kwargs
529
- )
530
-
531
- # Handle the new response format from LLM services
532
- # LLM services now return {"message": ..., "success": ..., "metadata": ...}
533
- if isinstance(result, dict) and "message" in result:
534
- # Extract the message content (convert AIMessage to string)
535
- message = result["message"]
536
- if hasattr(message, 'content'):
537
- # Handle langchain AIMessage objects
538
- return message.content
539
- elif isinstance(message, str):
540
- return message
541
- else:
542
- # Fallback: convert to string
543
- return str(message)
544
-
545
- # Fallback for other service types or legacy format
546
- return result
547
-
548
- async def _execute_image_task(self, service, input_data, task, **kwargs):
549
- """Execute image generation tasks using unified invoke method"""
550
-
551
- # Map common task names to unified task names
552
- task_mapping = {
553
- "generate_image": "generate",
554
- "generate": "generate",
555
- "img2img": "img2img",
556
- "image_to_image": "img2img",
557
- "generate_batch": "generate_batch"
558
- }
559
-
560
- unified_task = task_mapping.get(task, task)
561
-
562
- # Use unified invoke method
563
- return await service.invoke(
564
- prompt=input_data,
565
- task=unified_task,
566
- **kwargs
567
- )
568
-
569
- async def _execute_embedding_task(self, service, input_data, task, **kwargs):
570
- """Execute embedding tasks using unified invoke method"""
571
-
572
- # Map common task names to unified task names
573
- task_mapping = {
574
- "create_embedding": "embed",
575
- "embed": "embed",
576
- "embed_batch": "embed_batch",
577
- "chunk_and_embed": "chunk_and_embed",
578
- "similarity": "similarity",
579
- "find_similar": "find_similar"
580
- }
581
-
582
- unified_task = task_mapping.get(task, task)
583
-
584
- # Use unified invoke method
585
- return await service.invoke(
586
- input_data=input_data,
587
- task=unified_task,
588
- **kwargs
589
- )
590
840
 
591
841
  def clear_cache(self):
592
842
  """Clear service cache"""
@@ -602,7 +852,7 @@ class ISAModelClient:
602
852
  Returns:
603
853
  List of available models with metadata
604
854
  """
605
- if INTELLIGENT_SELECTOR_AVAILABLE:
855
+ if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
606
856
  try:
607
857
  if self.model_selector is None:
608
858
  self.model_selector = await get_model_selector(self.config)
@@ -649,17 +899,26 @@ class ISAModelClient:
649
899
  "error": str(e)
650
900
  }
651
901
 
652
- async def _invoke_local(
902
+ def _handle_error(self, e: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
903
+ """Handle errors consistently across methods"""
904
+ error_msg = f"Failed to {context.get('operation', 'execute')} {context.get('task', '')} on {context.get('service_type', '')}: {e}"
905
+ logger.error(error_msg)
906
+ return {
907
+ "success": False,
908
+ "error": str(e),
909
+ "metadata": context
910
+ }
911
+
912
+ async def _invoke_service_streaming(
653
913
  self,
654
- input_data: Union[str, bytes, Path, Dict[str, Any]],
914
+ input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
655
915
  task: str,
656
916
  service_type: str,
657
917
  model_hint: Optional[str] = None,
658
918
  provider_hint: Optional[str] = None,
659
- tools: Optional[List[Any]] = None,
660
919
  **kwargs
661
920
  ) -> Dict[str, Any]:
662
- """Local invoke using AI Factory (original logic)"""
921
+ """Service invoke that returns streaming response with async generator"""
663
922
  try:
664
923
  # Step 1: Select best model for this task
665
924
  selected_model = await self._select_model(
@@ -676,305 +935,205 @@ class ISAModelClient:
676
935
  model_name=selected_model["model_id"],
677
936
  provider=selected_model["provider"],
678
937
  task=task,
679
- tools=tools
938
+ use_cache=False # Don't cache for streaming to avoid state issues
680
939
  )
681
940
 
682
- # Step 3: Execute task with unified interface
683
- result = await self._execute_task(
684
- service=service,
685
- input_data=input_data,
686
- task=task,
687
- service_type=service_type,
688
- **kwargs
689
- )
941
+ # Step 3: Handle tools for LLM services (bind tools if provided)
942
+ tools = kwargs.pop("tools", None)
943
+ if service_type == "text" and tools:
944
+ service = await self._get_service(
945
+ service_type=service_type,
946
+ model_name=selected_model["model_id"],
947
+ provider=selected_model["provider"],
948
+ task=task,
949
+ use_cache=False
950
+ )
951
+ service = service.bind_tools(tools)
952
+
953
+ # Step 4: Ensure service supports streaming
954
+ if not hasattr(service, 'astream'):
955
+ raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
956
+
957
+ # Step 5: Enable streaming on the service
958
+ if hasattr(service, 'streaming'):
959
+ service.streaming = True
690
960
 
691
- # Step 4: Return unified response
961
+ # Step 6: Create async generator wrapper that yields tokens
962
+ async def stream_generator():
963
+ # Pass show_reasoning parameter if available for LLM services
964
+ if service_type == "text" and hasattr(service, 'astream'):
965
+ show_reasoning = kwargs.get('show_reasoning', False)
966
+ logger.debug(f"Stream generator: show_reasoning={show_reasoning}")
967
+ if 'show_reasoning' in kwargs:
968
+ async for token in service.astream(input_data, show_reasoning=show_reasoning):
969
+ yield token
970
+ else:
971
+ async for token in service.astream(input_data):
972
+ yield token
973
+ else:
974
+ async for token in service.astream(input_data):
975
+ yield token
976
+
977
+ # Return response with stream generator and metadata
692
978
  return {
693
979
  "success": True,
694
- "result": result,
980
+ "stream": stream_generator(),
695
981
  "metadata": {
696
982
  "model_used": selected_model["model_id"],
697
983
  "provider": selected_model["provider"],
698
984
  "task": task,
699
985
  "service_type": service_type,
700
- "selection_reason": selected_model.get("reason", "Default selection")
986
+ "selection_reason": selected_model.get("reason", "Default selection"),
987
+ "streaming": True
701
988
  }
702
989
  }
703
990
  except Exception as e:
704
- logger.error(f"Local invoke failed: {e}")
991
+ logger.error(f"Streaming service invoke failed: {e}")
705
992
  raise
706
-
707
- async def _invoke_api(
993
+
994
+ async def _invoke_service(
708
995
  self,
709
- input_data: Union[str, bytes, Path, Dict[str, Any]],
996
+ input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
710
997
  task: str,
711
998
  service_type: str,
712
999
  model_hint: Optional[str] = None,
713
1000
  provider_hint: Optional[str] = None,
1001
+ stream: Optional[bool] = None,
714
1002
  **kwargs
715
1003
  ) -> Dict[str, Any]:
716
- """API invoke using HTTP requests"""
717
-
718
- # Handle file inputs
719
- if isinstance(input_data, Path):
720
- return await self._invoke_api_file(
721
- file_path=input_data,
722
- task=task,
1004
+ """Direct service invoke - passes LangChain objects and tools directly to services"""
1005
+ try:
1006
+ # Step 1: Select best model for this task
1007
+ selected_model = await self._select_model(
1008
+ input_data=input_data,
1009
+ task=task,
723
1010
  service_type=service_type,
724
1011
  model_hint=model_hint,
725
- provider_hint=provider_hint,
726
- **kwargs
1012
+ provider_hint=provider_hint
727
1013
  )
728
-
729
- # Handle binary data
730
- if isinstance(input_data, bytes):
731
- return await self._invoke_api_binary(
732
- data=input_data,
1014
+
1015
+ # Step 2: Get appropriate service
1016
+ service = await self._get_service(
1017
+ service_type=service_type,
1018
+ model_name=selected_model["model_id"],
1019
+ provider=selected_model["provider"],
1020
+ task=task
1021
+ )
1022
+
1023
+ # Step 3: Handle tools for LLM services (bind tools if provided)
1024
+ tools = kwargs.pop("tools", None)
1025
+ if service_type == "text" and tools:
1026
+ service = await self._get_service(
1027
+ service_type=service_type,
1028
+ model_name=selected_model["model_id"],
1029
+ provider=selected_model["provider"],
1030
+ task=task,
1031
+ use_cache=False
1032
+ )
1033
+ service = service.bind_tools(tools)
1034
+ # Note: streaming is still supported with tools
1035
+
1036
+ # Step 4: Set streaming for text services
1037
+ if service_type == "text" and stream is not None:
1038
+ if hasattr(service, 'streaming'):
1039
+ service.streaming = stream
1040
+
1041
+ # Step 5: Execute task with unified interface
1042
+ result = await self._execute_task(
1043
+ service=service,
1044
+ input_data=input_data,
733
1045
  task=task,
734
1046
  service_type=service_type,
735
- model_hint=model_hint,
736
- provider_hint=provider_hint,
737
1047
  **kwargs
738
1048
  )
739
-
740
- # Handle text/JSON data
741
- payload = {
742
- "input_data": input_data,
743
- "task": task,
744
- "service_type": service_type,
745
- "model_hint": model_hint,
746
- "provider_hint": provider_hint,
747
- "parameters": kwargs
748
- }
749
-
750
- async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
751
- try:
752
- async with session.post(
753
- f"{self.api_url}/api/v1/invoke",
754
- json=payload,
755
- headers=self.headers
756
- ) as response:
757
-
758
- if response.status == 200:
759
- return await response.json()
760
- else:
761
- error_data = await response.text()
762
- raise Exception(f"API error {response.status}: {error_data}")
763
-
764
- except Exception as e:
765
- logger.error(f"API invoke failed: {e}")
766
- raise
1049
+
1050
+ # Step 6: Wait for billing tracking to complete, then get billing information
1051
+ await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
1052
+ billing_info = self._get_billing_info(service, selected_model["model_id"])
1053
+
1054
+ # Return unified response
1055
+ return {
1056
+ "success": True,
1057
+ "result": result,
1058
+ "metadata": {
1059
+ "model_used": selected_model["model_id"],
1060
+ "provider": selected_model["provider"],
1061
+ "task": task,
1062
+ "service_type": service_type,
1063
+ "selection_reason": selected_model.get("reason", "Default selection"),
1064
+ "billing": billing_info
1065
+ }
1066
+ }
1067
+ except Exception as e:
1068
+ logger.error(f"Service invoke failed: {e}")
1069
+ raise
767
1070
 
768
- async def _invoke_api_file(
769
- self,
770
- file_path: Path,
771
- task: str,
772
- service_type: str,
773
- model_hint: Optional[str] = None,
774
- provider_hint: Optional[str] = None,
775
- **kwargs
776
- ) -> Dict[str, Any]:
777
- """API file upload"""
778
-
779
- if not file_path.exists():
780
- raise FileNotFoundError(f"File not found: {file_path}")
781
-
782
- data = aiohttp.FormData()
783
- data.add_field('task', task)
784
- data.add_field('service_type', service_type)
785
-
786
- if model_hint:
787
- data.add_field('model_hint', model_hint)
788
- if provider_hint:
789
- data.add_field('provider_hint', provider_hint)
790
-
791
- data.add_field('file',
792
- open(file_path, 'rb'),
793
- filename=file_path.name,
794
- content_type='application/octet-stream')
795
-
796
- headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
797
-
798
- async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
799
- try:
800
- async with session.post(
801
- f"{self.api_url}/api/v1/invoke-file",
802
- data=data,
803
- headers=headers
804
- ) as response:
1071
+ def _get_billing_info(self, service: Any, model_id: str) -> Dict[str, Any]:
1072
+ """Extract billing information from service after task execution"""
1073
+ try:
1074
+ # Check if service has model_manager with billing_tracker
1075
+ if hasattr(service, 'model_manager') and hasattr(service.model_manager, 'billing_tracker'):
1076
+ billing_tracker = service.model_manager.billing_tracker
1077
+
1078
+ # Get the latest usage record for this model
1079
+ model_records = [
1080
+ record for record in billing_tracker.usage_records
1081
+ if record.model_id == model_id
1082
+ ]
1083
+
1084
+ if model_records:
1085
+ # Get the most recent record
1086
+ latest_record = max(model_records, key=lambda r: r.timestamp)
805
1087
 
806
- if response.status == 200:
807
- return await response.json()
808
- else:
809
- error_data = await response.text()
810
- raise Exception(f"API error {response.status}: {error_data}")
811
-
812
- except Exception as e:
813
- logger.error(f"API file upload failed: {e}")
814
- raise
1088
+ return {
1089
+ "cost_usd": latest_record.cost_usd,
1090
+ "input_tokens": latest_record.input_tokens,
1091
+ "output_tokens": latest_record.output_tokens,
1092
+ "total_tokens": latest_record.total_tokens,
1093
+ "operation": latest_record.operation,
1094
+ "timestamp": latest_record.timestamp,
1095
+ "currency": "USD"
1096
+ }
1097
+
1098
+ # Fallback: no billing info available
1099
+ return {
1100
+ "cost_usd": 0.0,
1101
+ "input_tokens": None,
1102
+ "output_tokens": None,
1103
+ "total_tokens": None,
1104
+ "operation": None,
1105
+ "timestamp": None,
1106
+ "currency": "USD",
1107
+ "note": "Billing information not available"
1108
+ }
1109
+
1110
+ except Exception as e:
1111
+ logger.warning(f"Failed to get billing info: {e}")
1112
+ return {
1113
+ "cost_usd": 0.0,
1114
+ "error": str(e),
1115
+ "currency": "USD"
1116
+ }
815
1117
 
816
- async def _invoke_api_binary(
817
- self,
818
- data: bytes,
819
- task: str,
820
- service_type: str,
821
- model_hint: Optional[str] = None,
822
- provider_hint: Optional[str] = None,
823
- **kwargs
824
- ) -> Dict[str, Any]:
825
- """API binary upload"""
826
-
827
- form_data = aiohttp.FormData()
828
- form_data.add_field('task', task)
829
- form_data.add_field('service_type', service_type)
830
-
831
- if model_hint:
832
- form_data.add_field('model_hint', model_hint)
833
- if provider_hint:
834
- form_data.add_field('provider_hint', provider_hint)
835
-
836
- form_data.add_field('file',
837
- data,
838
- filename='data.bin',
839
- content_type='application/octet-stream')
840
-
841
- headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
842
-
843
- async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
844
- try:
845
- async with session.post(
846
- f"{self.api_url}/api/v1/invoke-file",
847
- data=form_data,
848
- headers=headers
849
- ) as response:
850
-
851
- if response.status == 200:
852
- return await response.json()
853
- else:
854
- error_data = await response.text()
855
- raise Exception(f"API error {response.status}: {error_data}")
856
-
857
- except Exception as e:
858
- logger.error(f"API binary upload failed: {e}")
859
- raise
860
-
861
- async def _stream_local(
862
- self,
863
- input_data: Union[str, bytes, Path, Dict[str, Any]],
864
- task: str,
865
- service_type: str,
866
- model_hint: Optional[str] = None,
867
- provider_hint: Optional[str] = None,
868
- tools: Optional[List[Any]] = None,
869
- **kwargs
870
- ):
871
- """Local streaming using AI Factory"""
872
- # Step 1: Select best model for this task
873
- selected_model = await self._select_model(
874
- input_data=input_data,
875
- task=task,
876
- service_type=service_type,
877
- model_hint=model_hint,
878
- provider_hint=provider_hint
879
- )
880
-
881
- # Step 2: Get appropriate service
882
- service = await self._get_service(
883
- service_type=service_type,
884
- model_name=selected_model["model_id"],
885
- provider=selected_model["provider"],
886
- task=task,
887
- tools=tools
888
- )
889
-
890
- # Step 3: Yield tokens from the stream
891
- async for token in service.astream(input_data):
892
- yield token
893
-
894
- async def _stream_api(
895
- self,
896
- input_data: Union[str, bytes, Path, Dict[str, Any]],
897
- task: str,
898
- service_type: str,
899
- model_hint: Optional[str] = None,
900
- provider_hint: Optional[str] = None,
901
- **kwargs
902
- ):
903
- """API streaming using Server-Sent Events (SSE)"""
904
-
905
- # Only support text streaming for now
906
- if not isinstance(input_data, (str, dict)):
907
- raise ValueError("API streaming only supports text input")
908
-
909
- payload = {
910
- "input_data": input_data,
911
- "task": task,
912
- "service_type": service_type,
913
- "model_hint": model_hint,
914
- "provider_hint": provider_hint,
915
- "stream": True,
916
- "parameters": kwargs
917
- }
918
-
919
- async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
920
- try:
921
- async with session.post(
922
- f"{self.api_url}/api/v1/stream",
923
- json=payload,
924
- headers=self.headers
925
- ) as response:
926
-
927
- if response.status == 200:
928
- # Parse SSE stream
929
- async for line in response.content:
930
- if line:
931
- line_str = line.decode().strip()
932
- if line_str.startswith("data: "):
933
- try:
934
- # Parse SSE data
935
- import json
936
- json_str = line_str[6:] # Remove "data: " prefix
937
- data = json.loads(json_str)
938
-
939
- if data.get("type") == "token" and "token" in data:
940
- yield data["token"]
941
- elif data.get("type") == "completion":
942
- # End of stream
943
- break
944
- elif data.get("type") == "error":
945
- raise Exception(f"Server error: {data.get('error')}")
946
-
947
- except json.JSONDecodeError:
948
- # Skip malformed lines
949
- continue
950
- else:
951
- error_data = await response.text()
952
- raise Exception(f"API streaming error {response.status}: {error_data}")
953
-
954
- except Exception as e:
955
- logger.error(f"API streaming failed: {e}")
956
- raise
957
1118
 
958
1119
 
959
1120
  # Convenience function for quick access
960
1121
  def create_client(
961
1122
  config: Optional[Dict[str, Any]] = None,
962
- mode: str = "local",
963
- api_url: Optional[str] = None,
1123
+ service_endpoint: Optional[str] = None,
964
1124
  api_key: Optional[str] = None
965
1125
  ) -> ISAModelClient:
966
1126
  """Create ISA Model Client instance
967
1127
 
968
1128
  Args:
969
1129
  config: Optional configuration
970
- mode: "local" for direct AI Factory, "api" for HTTP API calls
971
- api_url: API base URL (required if mode="api")
972
- api_key: API key for authentication (optional)
1130
+ service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
1131
+ api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
973
1132
 
974
1133
  Returns:
975
1134
  ISAModelClient instance
976
1135
  """
977
- return ISAModelClient(config=config, mode=mode, api_url=api_url, api_key=api_key)
1136
+ return ISAModelClient(config=config, service_endpoint=service_endpoint, api_key=api_key)
978
1137
 
979
1138
 
980
1139
  # Export for easy import