isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -9,13 +9,16 @@ This is the main API that handles all types of AI requests:
9
9
  - Embedding tasks
10
10
  """
11
11
 
12
- from fastapi import APIRouter, HTTPException, UploadFile, File, Form
12
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request, Depends
13
13
  from fastapi.responses import StreamingResponse
14
14
  from pydantic import BaseModel, Field
15
- from typing import Optional, Dict, Any, Union, List
15
+ from typing import Optional, Dict, Any, Union, List, AsyncGenerator
16
16
  import logging
17
+ from ..middleware.auth import optional_auth, require_read_access
18
+ from ..middleware.security import rate_limit_standard, rate_limit_heavy, sanitize_input
17
19
  import asyncio
18
20
  import json
21
+ import time
19
22
  from pathlib import Path
20
23
 
21
24
  from isa_model.client import ISAModelClient
@@ -24,30 +27,152 @@ logger = logging.getLogger(__name__)
24
27
  router = APIRouter()
25
28
 
26
29
  class UnifiedRequest(BaseModel):
27
- """Unified request model for all AI services"""
28
- input_data: Union[str, Dict[str, Any]] = Field(..., description="Input data (text, image URL, etc.)")
29
- task: str = Field(..., description="Task to perform (chat, analyze_image, generate_speech, etc.)")
30
- service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
31
- model_hint: Optional[str] = Field(None, description="Optional model preference")
32
- provider_hint: Optional[str] = Field(None, description="Optional provider preference")
33
- stream: Optional[bool] = Field(False, description="Enable streaming for text services")
34
- parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
30
+ """
31
+ **统一请求模型 - 支持所有AI服务类型**
32
+
33
+ 这个模型为所有AI服务(文本、视觉、音频、图像生成、嵌入)提供统一的请求接口。
34
+
35
+ **支持的服务类型**:
36
+ - `text`: 文本服务 (聊天、生成、翻译)
37
+ - `vision`: 视觉服务 (图像分析、OCR、UI检测)
38
+ - `audio`: 音频服务 (TTS、STT、转录)
39
+ - `image`: 图像生成服务 (文本生成图像、图像转换)
40
+ - `embedding`: 嵌入服务 (文本向量化、相似度计算)
41
+
42
+ **请求示例**:
43
+ ```json
44
+ {
45
+ "input_data": "你好,世界!",
46
+ "task": "chat",
47
+ "service_type": "text",
48
+ "model": "gpt-4o-mini",
49
+ "provider": "openai"
50
+ }
51
+ ```
52
+ """
53
+ input_data: Union[str, Dict[str, Any]] = Field(
54
+ ...,
55
+ description="输入数据,支持多种格式:文本字符串、LangChain消息列表、图像URL/路径、音频文件路径等。根据service_type确定具体格式。",
56
+ examples=["你好,世界!", "https://example.com/image.jpg", "/path/to/audio.mp3"]
57
+ )
58
+ task: str = Field(
59
+ ...,
60
+ description="要执行的任务类型。常见任务:chat(聊天)、analyze_image(图像分析)、generate_speech(语音生成)、create_embedding(创建嵌入)等。",
61
+ examples=["chat", "analyze_image", "generate_speech", "transcribe", "generate_image", "create_embedding"]
62
+ )
63
+ service_type: str = Field(
64
+ ...,
65
+ description="服务类型,决定使用哪种AI服务。可选值:text、vision、audio、image、embedding。",
66
+ examples=["text", "vision", "audio", "image", "embedding"]
67
+ )
68
+ model: Optional[str] = Field(
69
+ None,
70
+ description="可选的模型指定。如果指定,系统将尝试使用该模型。常见模型:gpt-4o-mini、gpt-4o、whisper-1、flux-schnell等。",
71
+ examples=["gpt-4o-mini", "gpt-4o", "whisper-1", "tts-1", "flux-schnell", "text-embedding-3-small"]
72
+ )
73
+ provider: Optional[str] = Field(
74
+ None,
75
+ description="可选的服务提供商指定。如果指定,系统将尝试使用该提供商。常见提供商:openai、replicate、anthropic等。",
76
+ examples=["openai", "replicate", "anthropic"]
77
+ )
78
+ stream: Optional[bool] = Field(
79
+ None,
80
+ description="是否启用流式响应。仅适用于文本服务。text+chat任务默认启用流式。当使用工具调用时会自动禁用流式响应以确保完整性。"
81
+ )
82
+ tools: Optional[List[Dict[str, Any]]] = Field(
83
+ None,
84
+ description="可选的工具列表,用于函数调用功能。仅适用于文本服务。工具格式遵循LangChain工具规范。使用工具时会自动禁用流式响应。",
85
+ examples=[[
86
+ {
87
+ "name": "get_weather",
88
+ "description": "获取天气信息",
89
+ "parameters": {
90
+ "type": "object",
91
+ "properties": {
92
+ "location": {"type": "string", "description": "城市名称"}
93
+ },
94
+ "required": ["location"]
95
+ }
96
+ }
97
+ ]]
98
+ )
99
+ parameters: Optional[Dict[str, Any]] = Field(
100
+ default_factory=dict,
101
+ description="额外的任务参数,用于精细控制服务行为。参数内容根据具体服务类型而定,如temperature、max_tokens、voice等。",
102
+ examples=[{"temperature": 0.7, "max_tokens": 1000}, {"voice": "alloy", "speed": 1.0}, {"width": 1024, "height": 1024}]
103
+ )
35
104
 
36
105
  class UnifiedResponse(BaseModel):
37
- """Unified response model for all AI services"""
38
- success: bool
39
- result: Optional[Any] = None
40
- error: Optional[str] = None
41
- metadata: Dict[str, Any]
106
+ """
107
+ **统一响应模型 - 所有AI服务的标准响应格式**
108
+
109
+ 提供一致的成功/失败状态、结果数据和元数据信息。
110
+
111
+ **成功响应示例**:
112
+ ```json
113
+ {
114
+ "success": true,
115
+ "result": {
116
+ "content": "你好!我是AI助手。",
117
+ "tool_calls": [],
118
+ "response_metadata": {
119
+ "token_usage": {
120
+ "prompt_tokens": 15,
121
+ "completion_tokens": 10,
122
+ "total_tokens": 25
123
+ }
124
+ }
125
+ },
126
+ "error": null,
127
+ "metadata": {
128
+ "model_used": "gpt-4o-mini",
129
+ "provider": "openai",
130
+ "task": "chat",
131
+ "service_type": "text",
132
+ "processing_time": 1.23
133
+ }
134
+ }
135
+ ```
136
+
137
+ **错误响应示例**:
138
+ ```json
139
+ {
140
+ "success": false,
141
+ "result": null,
142
+ "error": "Model 'invalid-model' not found",
143
+ "metadata": {
144
+ "error_code": "MODEL_NOT_FOUND",
145
+ "task": "chat",
146
+ "service_type": "text"
147
+ }
148
+ }
149
+ ```
150
+ """
151
+ success: bool = Field(
152
+ ...,
153
+ description="请求是否成功执行。true表示成功,false表示失败。"
154
+ )
155
+ result: Optional[Any] = Field(
156
+ None,
157
+ description="服务执行结果。成功时包含实际数据,失败时为null。数据类型根据服务类型而定:文本服务返回AIMessage对象,视觉服务返回分析文本,音频服务返回文件路径或文本,图像服务返回图像URL,嵌入服务返回向量数组。"
158
+ )
159
+ error: Optional[str] = Field(
160
+ None,
161
+ description="错误信息描述。成功时为null,失败时包含详细的错误说明。"
162
+ )
163
+ metadata: Dict[str, Any] = Field(
164
+ ...,
165
+ description="响应元数据,包含执行信息如使用的模型、提供商、处理时间、token使用量等。元数据内容根据服务类型和执行情况而定。"
166
+ )
42
167
 
43
168
  # Global ISA client instance for server-side processing
44
169
  _isa_client = None
45
170
 
46
171
  def get_isa_client():
47
- """Get or create ISA client for local processing"""
172
+ """Get or create ISA client for service processing"""
48
173
  global _isa_client
49
174
  if _isa_client is None:
50
- _isa_client = ISAModelClient(mode="local") # Use local mode
175
+ _isa_client = ISAModelClient() # Use direct service mode
51
176
  return _isa_client
52
177
 
53
178
  @router.get("/")
@@ -61,11 +186,16 @@ async def unified_info():
61
186
  "version": "1.0.0"
62
187
  }
63
188
 
64
- @router.post("/invoke", response_model=UnifiedResponse)
65
- async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
189
+ @router.post("/invoke")
190
+ @rate_limit_standard()
191
+ async def unified_invoke(request: Request, user: Dict = Depends(require_read_access)):
66
192
  """
67
193
  **Unified API endpoint for all AI services**
68
194
 
195
+ Supports both JSON and multipart/form-data requests:
196
+ - JSON: Standard API request with UnifiedRequest body
197
+ - Form: File upload with form parameters
198
+
69
199
  This single endpoint handles:
70
200
  - Vision: image analysis, OCR, UI detection
71
201
  - Text: chat, generation, translation
@@ -76,167 +206,196 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
76
206
  **Uses ISAModelClient in local mode - all the complex logic is in client.py**
77
207
  """
78
208
  try:
79
- # Get ISA client instance (local mode)
209
+ # Get ISA client instance (service mode)
80
210
  client = get_isa_client()
81
211
 
82
- # Use client's local invoke method directly
83
- # This handles all the complexity: model selection, service routing, execution
84
- result = await client._invoke_local(
85
- input_data=request.input_data,
86
- task=request.task,
87
- service_type=request.service_type,
88
- model_hint=request.model_hint,
89
- provider_hint=request.provider_hint,
90
- **request.parameters
91
- )
92
-
93
- # Return the result in our API format
94
- return UnifiedResponse(
95
- success=result["success"],
96
- result=result.get("result"),
97
- error=result.get("error"),
98
- metadata=result["metadata"]
99
- )
100
-
101
- except Exception as e:
102
- logger.error(f"Unified invoke failed: {e}")
103
- return UnifiedResponse(
104
- success=False,
105
- error=str(e),
106
- metadata={
107
- "task": request.task,
108
- "service_type": request.service_type,
109
- "model_hint": request.model_hint,
110
- "provider_hint": request.provider_hint
111
- }
112
- )
113
-
114
- @router.post("/stream")
115
- async def unified_stream(request: UnifiedRequest):
116
- """
117
- **Unified streaming endpoint for text services**
118
-
119
- Returns Server-Sent Events (SSE) stream for real-time token generation.
120
- Only supports text service types.
121
- """
122
- try:
123
- # Validate streaming request
124
- if request.service_type != "text":
125
- raise HTTPException(status_code=400, detail="Streaming only supported for text services")
212
+ # Check content type to determine request format
213
+ content_type = request.headers.get("content-type", "")
126
214
 
127
- # Get ISA client instance (local mode)
128
- client = get_isa_client()
215
+ if content_type.startswith("multipart/form-data"):
216
+ # Handle form data with file upload
217
+ form = await request.form()
218
+
219
+ # Extract required fields
220
+ task = form.get("task")
221
+ service_type = form.get("service_type")
222
+ model = form.get("model")
223
+ provider = form.get("provider")
224
+ parameters = form.get("parameters")
225
+ file = form.get("file")
226
+
227
+ if not task or not service_type:
228
+ raise HTTPException(status_code=400, detail="task and service_type are required")
229
+
230
+ if file is None:
231
+ raise HTTPException(status_code=400, detail="file is required for multipart requests")
232
+
233
+ # Read file data
234
+ file_data = await file.read()
235
+
236
+ # Parse parameters if provided as JSON string
237
+ parsed_params = {}
238
+ if parameters:
239
+ try:
240
+ parsed_params = json.loads(parameters)
241
+ except json.JSONDecodeError:
242
+ parsed_params = {}
243
+
244
+ result = await client._invoke_service(
245
+ input_data=file_data,
246
+ task=task,
247
+ service_type=service_type,
248
+ model_hint=model,
249
+ provider_hint=provider,
250
+ filename=file.filename,
251
+ content_type=file.content_type,
252
+ file_size=len(file_data),
253
+ **parsed_params
254
+ )
255
+
256
+ # Return the result in our API format
257
+ return UnifiedResponse(
258
+ success=result["success"],
259
+ result=result.get("result"),
260
+ error=result.get("error"),
261
+ metadata={
262
+ **result["metadata"],
263
+ "filename": file.filename,
264
+ "content_type": file.content_type,
265
+ "file_size": len(file_data)
266
+ }
267
+ )
129
268
 
130
- async def generate_stream():
131
- """Generator for SSE streaming"""
269
+ else:
270
+ # Handle JSON request
132
271
  try:
133
- # Use client's streaming method
134
- stream_gen = await client.invoke(
135
- input_data=request.input_data,
136
- task=request.task,
137
- service_type=request.service_type,
138
- model_hint=request.model_hint,
139
- provider_hint=request.provider_hint,
140
- stream=True,
141
- **request.parameters
142
- )
272
+ json_body = await request.json()
273
+ unified_request = UnifiedRequest(**json_body)
143
274
 
144
- # Stream tokens as SSE format
145
- async for token in stream_gen:
146
- # SSE format: "data: {json}\n\n"
147
- token_data = {
148
- "token": token,
149
- "type": "token"
150
- }
151
- yield f"data: {json.dumps(token_data)}\n\n"
152
-
153
- # Send completion signal
154
- completion_data = {
155
- "type": "completion",
156
- "status": "finished"
157
- }
158
- yield f"data: {json.dumps(completion_data)}\n\n"
275
+ # Sanitize string inputs to prevent XSS and injection attacks
276
+ if isinstance(unified_request.input_data, str):
277
+ unified_request.input_data = sanitize_input(unified_request.input_data)
159
278
 
160
279
  except Exception as e:
161
- logger.error(f"Streaming error: {e}")
162
- error_data = {
163
- "type": "error",
164
- "error": str(e)
165
- }
166
- yield f"data: {json.dumps(error_data)}\n\n"
167
-
168
- # Return SSE stream response
169
- return StreamingResponse(
170
- generate_stream(),
171
- media_type="text/plain",
172
- headers={
173
- "Cache-Control": "no-cache",
174
- "Connection": "keep-alive",
175
- "Content-Type": "text/plain; charset=utf-8"
176
- }
177
- )
178
-
179
- except Exception as e:
180
- logger.error(f"Streaming setup failed: {e}")
181
- raise HTTPException(status_code=500, detail=str(e))
182
-
183
- @router.post("/invoke-file", response_model=UnifiedResponse)
184
- async def unified_invoke_file(
185
- task: str = Form(...),
186
- service_type: str = Form(...),
187
- model_hint: Optional[str] = Form(None),
188
- provider_hint: Optional[str] = Form(None),
189
- file: UploadFile = File(...)
190
- ) -> UnifiedResponse:
191
- """
192
- Unified file upload endpoint
193
-
194
- For tasks that require file input (images, audio, documents)
195
- """
196
- try:
197
- # Read file data
198
- file_data = await file.read()
199
-
200
- # Get ISA client instance (local mode)
201
- client = get_isa_client()
202
-
203
- # Use client's local invoke method with binary data
204
- result = await client._invoke_local(
205
- input_data=file_data, # Binary data
206
- task=task,
207
- service_type=service_type,
208
- model_hint=model_hint,
209
- provider_hint=provider_hint,
210
- filename=file.filename,
211
- content_type=file.content_type,
212
- file_size=len(file_data)
213
- )
214
-
215
- # Return the result in our API format
216
- return UnifiedResponse(
217
- success=result["success"],
218
- result=result.get("result"),
219
- error=result.get("error"),
220
- metadata={
221
- **result["metadata"],
222
- "filename": file.filename,
223
- "content_type": file.content_type,
224
- "file_size": len(file_data)
225
- }
226
- )
280
+ raise HTTPException(status_code=400, detail=f"Invalid JSON request: {e}")
281
+
282
+ # Prepare parameters, ensuring tools isn't duplicated
283
+ params = dict(unified_request.parameters) if unified_request.parameters else {}
284
+ if unified_request.tools:
285
+ params.pop("tools", None) # Remove tools from parameters if present
286
+ params["tools"] = unified_request.tools
287
+
288
+ # Check if this should be a streaming response
289
+ # Default to streaming for text+chat unless explicitly disabled
290
+ is_text_chat = (unified_request.service_type == "text" and unified_request.task == "chat")
291
+ stream_setting = unified_request.stream if unified_request.stream is not None else is_text_chat
292
+
293
+ should_stream = (
294
+ is_text_chat and
295
+ not unified_request.tools and # No tools
296
+ stream_setting # Stream enabled by default for text+chat or explicitly
297
+ )
298
+
299
+
300
+ if should_stream:
301
+ # Return streaming response for text chat
302
+ async def generate_stream():
303
+ try:
304
+ # Use streaming invoke but track metadata manually
305
+ collected_tokens = []
306
+ selected_model = None
307
+ service_info = None
308
+ start_time = time.time()
309
+
310
+ # Get model selection info first (lightweight operation)
311
+ try:
312
+ selected_model = await client._select_model(
313
+ input_data=unified_request.input_data,
314
+ task=unified_request.task,
315
+ service_type=unified_request.service_type,
316
+ model_hint=unified_request.model,
317
+ provider_hint=unified_request.provider
318
+ )
319
+ service_info = {
320
+ "model_used": selected_model["model_id"],
321
+ "provider": selected_model["provider"],
322
+ "task": unified_request.task,
323
+ "service_type": unified_request.service_type,
324
+ "selection_reason": selected_model.get("reason", "Default selection"),
325
+ "streaming": True
326
+ }
327
+ except Exception:
328
+ pass
329
+
330
+ # Stream the tokens and get metadata
331
+ processing_time = 0
332
+ async for item in client.invoke_stream(
333
+ input_data=unified_request.input_data,
334
+ task=unified_request.task,
335
+ service_type=unified_request.service_type,
336
+ model=unified_request.model,
337
+ provider=unified_request.provider,
338
+ return_metadata=True, # Request metadata with billing info
339
+ **params
340
+ ):
341
+ if isinstance(item, tuple) and item[0] == 'metadata':
342
+ # This is the final metadata with billing info
343
+ metadata = item[1]
344
+ processing_time = time.time() - start_time
345
+ metadata["processing_time"] = processing_time
346
+ yield f"data: {json.dumps({'metadata': metadata})}\n\n"
347
+ else:
348
+ # This is a token
349
+ collected_tokens.append(item)
350
+ yield f"data: {json.dumps({'token': item})}\n\n"
351
+
352
+ except Exception as e:
353
+ # Send error as final event
354
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
355
+ finally:
356
+ # Send end-of-stream marker
357
+ yield f"data: {json.dumps({'done': True})}\n\n"
358
+
359
+ return StreamingResponse(
360
+ generate_stream(),
361
+ media_type="text/event-stream",
362
+ headers={
363
+ "Cache-Control": "no-cache",
364
+ "Connection": "keep-alive",
365
+ "X-Accel-Buffering": "no" # Disable nginx buffering
366
+ }
367
+ )
368
+ else:
369
+ # Non-streaming response (original behavior)
370
+ result = await client._invoke_service(
371
+ input_data=unified_request.input_data,
372
+ task=unified_request.task,
373
+ service_type=unified_request.service_type,
374
+ model_hint=unified_request.model,
375
+ provider_hint=unified_request.provider,
376
+ **params
377
+ )
378
+
379
+ # Return the result in our API format
380
+ return UnifiedResponse(
381
+ success=result["success"],
382
+ result=result.get("result"),
383
+ error=result.get("error"),
384
+ metadata=result["metadata"]
385
+ )
227
386
 
387
+ except HTTPException:
388
+ raise
228
389
  except Exception as e:
229
- logger.error(f"File invoke failed: {e}")
390
+ logger.error(f"Unified invoke failed: {e}")
230
391
  return UnifiedResponse(
231
392
  success=False,
232
393
  error=str(e),
233
- metadata={
234
- "task": task,
235
- "service_type": service_type,
236
- "filename": file.filename if file else None
237
- }
394
+ metadata={}
238
395
  )
239
396
 
397
+
398
+
240
399
  @router.get("/models")
241
400
  async def get_available_models(service_type: Optional[str] = None):
242
401
  """Get available models (optional filter by service type)"""