isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -9,13 +9,16 @@ This is the main API that handles all types of AI requests:
9
9
  - Embedding tasks
10
10
  """
11
11
 
12
- from fastapi import APIRouter, HTTPException, UploadFile, File, Form
12
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request, Depends, Query
13
13
  from fastapi.responses import StreamingResponse
14
14
  from pydantic import BaseModel, Field
15
- from typing import Optional, Dict, Any, Union, List
15
+ from typing import Optional, Dict, Any, Union, List, AsyncGenerator
16
16
  import logging
17
+ from ..middleware.auth import optional_auth, require_read_access, require_write_access
18
+ from ..middleware.security import rate_limit_standard, rate_limit_heavy, sanitize_input
17
19
  import asyncio
18
20
  import json
21
+ import time
19
22
  from pathlib import Path
20
23
 
21
24
  from isa_model.client import ISAModelClient
@@ -24,30 +27,176 @@ logger = logging.getLogger(__name__)
24
27
  router = APIRouter()
25
28
 
26
29
  class UnifiedRequest(BaseModel):
27
- """Unified request model for all AI services"""
28
- input_data: Union[str, Dict[str, Any]] = Field(..., description="Input data (text, image URL, etc.)")
29
- task: str = Field(..., description="Task to perform (chat, analyze_image, generate_speech, etc.)")
30
- service_type: str = Field(..., description="Service type (text, vision, audio, image, embedding)")
31
- model_hint: Optional[str] = Field(None, description="Optional model preference")
32
- provider_hint: Optional[str] = Field(None, description="Optional provider preference")
33
- stream: Optional[bool] = Field(False, description="Enable streaming for text services")
34
- parameters: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional task parameters")
30
+ """
31
+ **统一请求模型 - 支持所有AI服务类型**
32
+
33
+ 这个模型为所有AI服务(文本、视觉、音频、图像生成、嵌入)提供统一的请求接口。
34
+
35
+ **支持的服务类型**:
36
+ - `text`: 文本服务 (聊天、生成、翻译)
37
+ - `vision`: 视觉服务 (图像分析、OCR、UI检测)
38
+ - `audio`: 音频服务 (TTS、STT、转录)
39
+ - `image`: 图像生成服务 (文本生成图像、图像转换)
40
+ - `embedding`: 嵌入服务 (文本向量化、相似度计算)
41
+
42
+ **请求示例**:
43
+ ```json
44
+ {
45
+ "input_data": "你好,世界!",
46
+ "task": "chat",
47
+ "service_type": "text",
48
+ "model": "gpt-4o-mini",
49
+ "provider": "openai"
50
+ }
51
+ ```
52
+ """
53
+ input_data: Union[str, Dict[str, Any]] = Field(
54
+ ...,
55
+ description="输入数据,支持多种格式:文本字符串、LangChain消息列表、图像URL/路径、音频文件路径等。根据service_type确定具体格式。",
56
+ examples=["你好,世界!", "https://example.com/image.jpg", "/path/to/audio.mp3"]
57
+ )
58
+ task: str = Field(
59
+ ...,
60
+ description="要执行的任务类型。常见任务:chat(聊天)、analyze_image(图像分析)、generate_speech(语音生成)、create_embedding(创建嵌入)等。",
61
+ examples=["chat", "analyze_image", "generate_speech", "transcribe", "generate_image", "create_embedding"]
62
+ )
63
+ service_type: str = Field(
64
+ ...,
65
+ description="服务类型,决定使用哪种AI服务。可选值:text、vision、audio、image、embedding。",
66
+ examples=["text", "vision", "audio", "image", "embedding"]
67
+ )
68
+ model: Optional[str] = Field(
69
+ None,
70
+ description="可选的模型指定。如果指定,系统将尝试使用该模型。常见模型:gpt-4o-mini、gpt-4o、whisper-1、flux-schnell等。",
71
+ examples=["gpt-4o-mini", "gpt-4o", "whisper-1", "tts-1", "flux-schnell", "text-embedding-3-small"]
72
+ )
73
+ provider: Optional[str] = Field(
74
+ None,
75
+ description="可选的服务提供商指定。如果指定,系统将尝试使用该提供商。常见提供商:openai、replicate、anthropic等。",
76
+ examples=["openai", "replicate", "anthropic"]
77
+ )
78
+ stream: Optional[bool] = Field(
79
+ None,
80
+ description="是否启用流式响应。仅适用于文本服务。text+chat任务默认启用流式。当使用工具调用时会自动禁用流式响应以确保完整性。"
81
+ )
82
+ tools: Optional[List[Dict[str, Any]]] = Field(
83
+ None,
84
+ description="可选的工具列表,用于函数调用功能。仅适用于文本服务。工具格式遵循LangChain工具规范。使用工具时会自动禁用流式响应。",
85
+ examples=[[
86
+ {
87
+ "name": "get_weather",
88
+ "description": "获取天气信息",
89
+ "parameters": {
90
+ "type": "object",
91
+ "properties": {
92
+ "location": {"type": "string", "description": "城市名称"}
93
+ },
94
+ "required": ["location"]
95
+ }
96
+ }
97
+ ]]
98
+ )
99
+ output_format: Optional[str] = Field(
100
+ None,
101
+ description="输出格式控制。支持的格式:json(JSON结构化输出)、markdown(Markdown格式)、code(代码块提取)、structured(智能结构化解析)。主要用于文本服务的响应格式化。",
102
+ examples=["json", "markdown", "code", "structured"]
103
+ )
104
+ json_schema: Optional[Dict[str, Any]] = Field(
105
+ None,
106
+ description="JSON模式验证。当output_format='json'时使用,用于验证和约束JSON输出格式。遵循JSON Schema规范。",
107
+ examples=[{
108
+ "type": "object",
109
+ "properties": {
110
+ "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
111
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1}
112
+ },
113
+ "required": ["sentiment", "confidence"]
114
+ }]
115
+ )
116
+ repair_attempts: Optional[int] = Field(
117
+ 3,
118
+ ge=0,
119
+ le=10,
120
+ description="JSON修复尝试次数。当解析JSON失败时,系统会尝试修复常见的JSON格式错误。0表示不进行修复尝试。",
121
+ examples=[3, 0, 5]
122
+ )
123
+ parameters: Optional[Dict[str, Any]] = Field(
124
+ default_factory=dict,
125
+ description="额外的任务参数,用于精细控制服务行为。参数内容根据具体服务类型而定,如temperature、max_tokens、voice等。",
126
+ examples=[{"temperature": 0.7, "max_tokens": 1000}, {"voice": "alloy", "speed": 1.0}, {"width": 1024, "height": 1024}]
127
+ )
35
128
 
36
129
  class UnifiedResponse(BaseModel):
37
- """Unified response model for all AI services"""
38
- success: bool
39
- result: Optional[Any] = None
40
- error: Optional[str] = None
41
- metadata: Dict[str, Any]
130
+ """
131
+ **统一响应模型 - 所有AI服务的标准响应格式**
132
+
133
+ 提供一致的成功/失败状态、结果数据和元数据信息。
134
+
135
+ **成功响应示例**:
136
+ ```json
137
+ {
138
+ "success": true,
139
+ "result": {
140
+ "content": "你好!我是AI助手。",
141
+ "tool_calls": [],
142
+ "response_metadata": {
143
+ "token_usage": {
144
+ "prompt_tokens": 15,
145
+ "completion_tokens": 10,
146
+ "total_tokens": 25
147
+ }
148
+ }
149
+ },
150
+ "error": null,
151
+ "metadata": {
152
+ "model_used": "gpt-4o-mini",
153
+ "provider": "openai",
154
+ "task": "chat",
155
+ "service_type": "text",
156
+ "processing_time": 1.23
157
+ }
158
+ }
159
+ ```
160
+
161
+ **错误响应示例**:
162
+ ```json
163
+ {
164
+ "success": false,
165
+ "result": null,
166
+ "error": "Model 'invalid-model' not found",
167
+ "metadata": {
168
+ "error_code": "MODEL_NOT_FOUND",
169
+ "task": "chat",
170
+ "service_type": "text"
171
+ }
172
+ }
173
+ ```
174
+ """
175
+ success: bool = Field(
176
+ ...,
177
+ description="请求是否成功执行。true表示成功,false表示失败。"
178
+ )
179
+ result: Optional[Any] = Field(
180
+ None,
181
+ description="服务执行结果。成功时包含实际数据,失败时为null。数据类型根据服务类型而定:文本服务返回AIMessage对象,视觉服务返回分析文本,音频服务返回文件路径或文本,图像服务返回图像URL,嵌入服务返回向量数组。"
182
+ )
183
+ error: Optional[str] = Field(
184
+ None,
185
+ description="错误信息描述。成功时为null,失败时包含详细的错误说明。"
186
+ )
187
+ metadata: Dict[str, Any] = Field(
188
+ ...,
189
+ description="响应元数据,包含执行信息如使用的模型、提供商、处理时间、token使用量等。元数据内容根据服务类型和执行情况而定。"
190
+ )
42
191
 
43
192
  # Global ISA client instance for server-side processing
44
193
  _isa_client = None
45
194
 
46
195
  def get_isa_client():
47
- """Get or create ISA client for local processing"""
196
+ """Get or create ISA client for service processing"""
48
197
  global _isa_client
49
198
  if _isa_client is None:
50
- _isa_client = ISAModelClient(mode="local") # Use local mode
199
+ _isa_client = ISAModelClient() # Use direct service mode
51
200
  return _isa_client
52
201
 
53
202
  @router.get("/")
@@ -61,11 +210,16 @@ async def unified_info():
61
210
  "version": "1.0.0"
62
211
  }
63
212
 
64
- @router.post("/invoke", response_model=UnifiedResponse)
65
- async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
213
+ @router.post("/invoke")
214
+ @rate_limit_standard()
215
+ async def unified_invoke(request: Request, user: Dict = Depends(require_read_access)):
66
216
  """
67
217
  **Unified API endpoint for all AI services**
68
218
 
219
+ Supports both JSON and multipart/form-data requests:
220
+ - JSON: Standard API request with UnifiedRequest body
221
+ - Form: File upload with form parameters
222
+
69
223
  This single endpoint handles:
70
224
  - Vision: image analysis, OCR, UI detection
71
225
  - Text: chat, generation, translation
@@ -76,185 +230,314 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
76
230
  **Uses ISAModelClient in local mode - all the complex logic is in client.py**
77
231
  """
78
232
  try:
79
- # Get ISA client instance (local mode)
233
+ # Get ISA client instance (service mode)
80
234
  client = get_isa_client()
81
235
 
82
- # Use client's local invoke method directly
83
- # This handles all the complexity: model selection, service routing, execution
84
- result = await client._invoke_local(
85
- input_data=request.input_data,
86
- task=request.task,
87
- service_type=request.service_type,
88
- model_hint=request.model_hint,
89
- provider_hint=request.provider_hint,
90
- **request.parameters
91
- )
92
-
93
- # Return the result in our API format
94
- return UnifiedResponse(
95
- success=result["success"],
96
- result=result.get("result"),
97
- error=result.get("error"),
98
- metadata=result["metadata"]
99
- )
236
+ # Check content type to determine request format
237
+ content_type = request.headers.get("content-type", "")
100
238
 
101
- except Exception as e:
102
- logger.error(f"Unified invoke failed: {e}")
103
- return UnifiedResponse(
104
- success=False,
105
- error=str(e),
106
- metadata={
107
- "task": request.task,
108
- "service_type": request.service_type,
109
- "model_hint": request.model_hint,
110
- "provider_hint": request.provider_hint
111
- }
112
- )
113
-
114
- @router.post("/stream")
115
- async def unified_stream(request: UnifiedRequest):
116
- """
117
- **Unified streaming endpoint for text services**
118
-
119
- Returns Server-Sent Events (SSE) stream for real-time token generation.
120
- Only supports text service types.
121
- """
122
- try:
123
- # Validate streaming request
124
- if request.service_type != "text":
125
- raise HTTPException(status_code=400, detail="Streaming only supported for text services")
126
-
127
- # Get ISA client instance (local mode)
128
- client = get_isa_client()
239
+ if content_type.startswith("multipart/form-data"):
240
+ # Handle form data with file upload
241
+ form = await request.form()
242
+
243
+ # Extract required fields
244
+ task = form.get("task")
245
+ service_type = form.get("service_type")
246
+ model = form.get("model")
247
+ provider = form.get("provider")
248
+ parameters = form.get("parameters")
249
+ file = form.get("file")
250
+
251
+ if not task or not service_type:
252
+ raise HTTPException(status_code=400, detail="task and service_type are required")
253
+
254
+ if file is None:
255
+ raise HTTPException(status_code=400, detail="file is required for multipart requests")
256
+
257
+ # Read file data
258
+ file_data = await file.read()
259
+
260
+ # Parse parameters if provided as JSON string
261
+ parsed_params = {}
262
+ if parameters:
263
+ try:
264
+ parsed_params = json.loads(parameters)
265
+ except json.JSONDecodeError:
266
+ parsed_params = {}
267
+
268
+ result = await client._invoke_service(
269
+ input_data=file_data,
270
+ task=task,
271
+ service_type=service_type,
272
+ model_hint=model,
273
+ provider_hint=provider,
274
+ filename=file.filename,
275
+ content_type=file.content_type,
276
+ file_size=len(file_data),
277
+ **parsed_params
278
+ )
279
+
280
+ # Return the result in our API format
281
+ return UnifiedResponse(
282
+ success=result["success"],
283
+ result=result.get("result"),
284
+ error=result.get("error"),
285
+ metadata={
286
+ **result["metadata"],
287
+ "filename": file.filename,
288
+ "content_type": file.content_type,
289
+ "file_size": len(file_data)
290
+ }
291
+ )
129
292
 
130
- async def generate_stream():
131
- """Generator for SSE streaming"""
293
+ else:
294
+ # Handle JSON request
132
295
  try:
133
- # Use client's streaming method
134
- stream_gen = await client.invoke(
135
- input_data=request.input_data,
136
- task=request.task,
137
- service_type=request.service_type,
138
- model_hint=request.model_hint,
139
- provider_hint=request.provider_hint,
140
- stream=True,
141
- **request.parameters
142
- )
296
+ json_body = await request.json()
297
+ unified_request = UnifiedRequest(**json_body)
143
298
 
144
- # Stream tokens as SSE format
145
- async for token in stream_gen:
146
- # SSE format: "data: {json}\n\n"
147
- token_data = {
148
- "token": token,
149
- "type": "token"
150
- }
151
- yield f"data: {json.dumps(token_data)}\n\n"
152
-
153
- # Send completion signal
154
- completion_data = {
155
- "type": "completion",
156
- "status": "finished"
157
- }
158
- yield f"data: {json.dumps(completion_data)}\n\n"
299
+ # Sanitize string inputs to prevent XSS and injection attacks
300
+ if isinstance(unified_request.input_data, str):
301
+ unified_request.input_data = sanitize_input(unified_request.input_data)
159
302
 
160
303
  except Exception as e:
161
- logger.error(f"Streaming error: {e}")
162
- error_data = {
163
- "type": "error",
164
- "error": str(e)
165
- }
166
- yield f"data: {json.dumps(error_data)}\n\n"
167
-
168
- # Return SSE stream response
169
- return StreamingResponse(
170
- generate_stream(),
171
- media_type="text/plain",
172
- headers={
173
- "Cache-Control": "no-cache",
174
- "Connection": "keep-alive",
175
- "Content-Type": "text/plain; charset=utf-8"
176
- }
177
- )
304
+ from ..error_handlers import handle_validation_error, create_http_exception, ErrorCode
305
+ if hasattr(e, 'errors'): # Pydantic validation error
306
+ error_response = handle_validation_error(e)
307
+ raise HTTPException(status_code=400, detail=error_response)
308
+ else:
309
+ raise create_http_exception(
310
+ f"请求JSON格式错误: {str(e)}",
311
+ 400,
312
+ ErrorCode.INVALID_INPUT,
313
+ {"suggestion": "请检查JSON格式和必需字段"}
314
+ )
315
+
316
+ # Prepare parameters, ensuring tools isn't duplicated
317
+ params = dict(unified_request.parameters) if unified_request.parameters else {}
318
+ if unified_request.tools:
319
+ params.pop("tools", None) # Remove tools from parameters if present
320
+ params["tools"] = unified_request.tools
321
+
322
+ # Add JSON output formatting parameters
323
+ if unified_request.output_format:
324
+ params["output_format"] = unified_request.output_format
325
+ if unified_request.json_schema:
326
+ params["json_schema"] = unified_request.json_schema
327
+ if unified_request.repair_attempts is not None:
328
+ params["repair_attempts"] = unified_request.repair_attempts
329
+
330
+ # Check if this should be a streaming response
331
+ # Default to streaming for text+chat unless explicitly disabled
332
+ is_text_chat = (unified_request.service_type == "text" and unified_request.task == "chat")
333
+ stream_setting = unified_request.stream if unified_request.stream is not None else is_text_chat
334
+
335
+ should_stream = (
336
+ is_text_chat and
337
+ not unified_request.tools and # No tools
338
+ stream_setting # Stream enabled by default for text+chat or explicitly
339
+ )
340
+
341
+
342
+ if should_stream:
343
+ # Return streaming response for text chat
344
+ async def generate_stream():
345
+ try:
346
+ # Use streaming invoke but track metadata manually
347
+ collected_tokens = []
348
+ selected_model = None
349
+ service_info = None
350
+ start_time = time.time()
351
+
352
+ # Get model selection info first (lightweight operation)
353
+ try:
354
+ selected_model = await client._select_model(
355
+ input_data=unified_request.input_data,
356
+ task=unified_request.task,
357
+ service_type=unified_request.service_type,
358
+ model_hint=unified_request.model,
359
+ provider_hint=unified_request.provider
360
+ )
361
+ service_info = {
362
+ "model_used": selected_model["model_id"],
363
+ "provider": selected_model["provider"],
364
+ "task": unified_request.task,
365
+ "service_type": unified_request.service_type,
366
+ "selection_reason": selected_model.get("reason", "Default selection"),
367
+ "streaming": True
368
+ }
369
+ except Exception:
370
+ pass
371
+
372
+ # Stream the tokens and get metadata
373
+ processing_time = 0
374
+ async for item in client.invoke_stream(
375
+ input_data=unified_request.input_data,
376
+ task=unified_request.task,
377
+ service_type=unified_request.service_type,
378
+ model=unified_request.model,
379
+ provider=unified_request.provider,
380
+ return_metadata=True, # Request metadata with billing info
381
+ **params
382
+ ):
383
+ if isinstance(item, tuple) and item[0] == 'metadata':
384
+ # This is the final metadata with billing info
385
+ metadata = item[1]
386
+ processing_time = time.time() - start_time
387
+ metadata["processing_time"] = processing_time
388
+ yield f"data: {json.dumps({'metadata': metadata})}\n\n"
389
+ else:
390
+ # This is a token
391
+ collected_tokens.append(item)
392
+ yield f"data: {json.dumps({'token': item})}\n\n"
393
+
394
+ except Exception as e:
395
+ from ..error_handlers import create_error_response, ErrorCode
396
+ # Create detailed error response for streaming
397
+ error_response = create_error_response(
398
+ error=e,
399
+ error_code=ErrorCode.INFERENCE_FAILED,
400
+ details={
401
+ "service_type": unified_request.service_type,
402
+ "model": unified_request.model,
403
+ "provider": unified_request.provider,
404
+ "streaming": True
405
+ }
406
+ )
407
+ # Send structured error as final event
408
+ yield f"data: {json.dumps({'error': error_response})}\n\n"
409
+ finally:
410
+ # Send end-of-stream marker
411
+ yield f"data: {json.dumps({'done': True})}\n\n"
412
+
413
+ return StreamingResponse(
414
+ generate_stream(),
415
+ media_type="text/event-stream",
416
+ headers={
417
+ "Cache-Control": "no-cache",
418
+ "Connection": "keep-alive",
419
+ "X-Accel-Buffering": "no" # Disable nginx buffering
420
+ }
421
+ )
422
+ else:
423
+ # Non-streaming response (original behavior)
424
+ result = await client._invoke_service(
425
+ input_data=unified_request.input_data,
426
+ task=unified_request.task,
427
+ service_type=unified_request.service_type,
428
+ model_hint=unified_request.model,
429
+ provider_hint=unified_request.provider,
430
+ **params
431
+ )
432
+
433
+ # Return the result in our API format
434
+ return UnifiedResponse(
435
+ success=result["success"],
436
+ result=result.get("result"),
437
+ error=result.get("error"),
438
+ metadata=result["metadata"]
439
+ )
178
440
 
441
+ except HTTPException:
442
+ raise
179
443
  except Exception as e:
180
- logger.error(f"Streaming setup failed: {e}")
181
- raise HTTPException(status_code=500, detail=str(e))
182
-
183
- @router.post("/invoke-file", response_model=UnifiedResponse)
184
- async def unified_invoke_file(
185
- task: str = Form(...),
186
- service_type: str = Form(...),
187
- model_hint: Optional[str] = Form(None),
188
- provider_hint: Optional[str] = Form(None),
189
- file: UploadFile = File(...)
190
- ) -> UnifiedResponse:
191
- """
192
- Unified file upload endpoint
193
-
194
- For tasks that require file input (images, audio, documents)
195
- """
196
- try:
197
- # Read file data
198
- file_data = await file.read()
199
-
200
- # Get ISA client instance (local mode)
201
- client = get_isa_client()
202
-
203
- # Use client's local invoke method with binary data
204
- result = await client._invoke_local(
205
- input_data=file_data, # Binary data
206
- task=task,
207
- service_type=service_type,
208
- model_hint=model_hint,
209
- provider_hint=provider_hint,
210
- filename=file.filename,
211
- content_type=file.content_type,
212
- file_size=len(file_data)
213
- )
444
+ from ..error_handlers import create_error_response, ErrorCode
445
+ logger.error(f"Unified invoke failed: {e}")
214
446
 
215
- # Return the result in our API format
216
- return UnifiedResponse(
217
- success=result["success"],
218
- result=result.get("result"),
219
- error=result.get("error"),
220
- metadata={
221
- **result["metadata"],
222
- "filename": file.filename,
223
- "content_type": file.content_type,
224
- "file_size": len(file_data)
447
+ # Create detailed error response
448
+ error_response = create_error_response(
449
+ error=e,
450
+ status_code=500,
451
+ error_code=ErrorCode.INFERENCE_FAILED,
452
+ details={
453
+ "service_type": getattr(unified_request, 'service_type', 'unknown'),
454
+ "model": getattr(unified_request, 'model', 'unknown'),
455
+ "provider": getattr(unified_request, 'provider', 'unknown'),
456
+ "task": getattr(unified_request, 'task', 'unknown')
225
457
  }
226
458
  )
227
459
 
228
- except Exception as e:
229
- logger.error(f"File invoke failed: {e}")
230
460
  return UnifiedResponse(
231
461
  success=False,
232
- error=str(e),
462
+ error=error_response.get("error"),
233
463
  metadata={
234
- "task": task,
235
- "service_type": service_type,
236
- "filename": file.filename if file else None
464
+ "error_code": error_response.get("error_code"),
465
+ "user_message": error_response.get("user_message"),
466
+ "details": error_response.get("details", {})
237
467
  }
238
468
  )
239
469
 
470
+
471
+
240
472
  @router.get("/models")
241
473
  async def get_available_models(service_type: Optional[str] = None):
242
474
  """Get available models (optional filter by service type)"""
243
475
  try:
244
- client = get_isa_client()
245
- return await client.get_available_models(service_type)
476
+ from ..cache_manager import cached, model_list_cache_key
477
+
478
+ @cached(ttl=600.0, cache_key_func=lambda st=service_type: model_list_cache_key(st)) # 10 minutes cache
479
+ async def _get_models(service_type_param):
480
+ client = get_isa_client()
481
+ return await client.get_available_models(service_type_param)
482
+
483
+ models_list = await _get_models(service_type)
484
+
485
+ # Ensure we return the expected format
486
+ if isinstance(models_list, list):
487
+ return {
488
+ "success": True,
489
+ "models": models_list,
490
+ "total_count": len(models_list),
491
+ "service_type_filter": service_type
492
+ }
493
+ elif isinstance(models_list, dict) and "models" in models_list:
494
+ # Already in correct format
495
+ return models_list
496
+ else:
497
+ # Unknown format, convert to expected format
498
+ return {
499
+ "success": True,
500
+ "models": models_list if isinstance(models_list, list) else [],
501
+ "total_count": len(models_list) if isinstance(models_list, list) else 0,
502
+ "service_type_filter": service_type
503
+ }
246
504
  except Exception as e:
247
505
  logger.error(f"Failed to get available models: {e}")
248
506
  # Fallback static model list
507
+ # Load custom models
508
+ custom_models = []
509
+ try:
510
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
511
+ custom_model_manager = get_custom_model_manager()
512
+ custom_models = custom_model_manager.get_models_for_api()
513
+ logger.debug(f"Loaded {len(custom_models)} custom models")
514
+ except Exception as e:
515
+ logger.warning(f"Failed to load custom models: {e}")
516
+
517
+ # Base fallback models
518
+ base_models = [
519
+ {"service_type": "vision", "provider": "openai", "model_id": "gpt-4o-mini"},
520
+ {"service_type": "text", "provider": "openai", "model_id": "gpt-4o-mini"},
521
+ {"service_type": "audio", "provider": "openai", "model_id": "whisper-1"},
522
+ {"service_type": "audio", "provider": "openai", "model_id": "tts-1"},
523
+ {"service_type": "embedding", "provider": "openai", "model_id": "text-embedding-3-small"},
524
+ {"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
525
+ ]
526
+
527
+ # Combine base models with custom models
528
+ fallback_models = base_models + custom_models
529
+
530
+ # Filter by service_type if provided
531
+ if service_type:
532
+ fallback_models = [m for m in fallback_models if m["service_type"] == service_type]
533
+
249
534
  return {
250
- "models": [
251
- {"service_type": "vision", "provider": "openai", "model_id": "gpt-4.1-mini"},
252
- {"service_type": "text", "provider": "openai", "model_id": "gpt-4.1-mini"},
253
- {"service_type": "audio", "provider": "openai", "model_id": "whisper-1"},
254
- {"service_type": "audio", "provider": "openai", "model_id": "tts-1"},
255
- {"service_type": "embedding", "provider": "openai", "model_id": "text-embedding-3-small"},
256
- {"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
257
- ]
535
+ "success": False,
536
+ "error": f"Failed to get models: {str(e)}",
537
+ "models": fallback_models,
538
+ "total_count": len(fallback_models),
539
+ "service_type_filter": service_type,
540
+ "fallback": True
258
541
  }
259
542
 
260
543
  @router.get("/health")
@@ -271,4 +554,542 @@ async def health_check():
271
554
  return {
272
555
  "api": "error",
273
556
  "error": str(e)
274
- }
557
+ }
558
+
559
+ # Enhanced Model Management API Endpoints
560
+
561
+ @router.get("/models/search")
562
+ async def search_models(
563
+ query: str = Query(..., description="Search query"),
564
+ model_type: Optional[str] = Query(None, description="Filter by model type"),
565
+ provider: Optional[str] = Query(None, description="Filter by provider"),
566
+ capabilities: Optional[List[str]] = Query(None, description="Filter by capabilities"),
567
+ limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
568
+ user = Depends(optional_auth)
569
+ ):
570
+ """Search models by query and filters"""
571
+ try:
572
+ # Try database search first
573
+ try:
574
+ from isa_model.core.models.model_repo import ModelRepo
575
+
576
+ repo = ModelRepo()
577
+
578
+ # Convert capabilities from query parameter
579
+ capability_list = None
580
+ if capabilities:
581
+ capability_list = [cap.strip() for cap in capabilities if cap.strip()]
582
+
583
+ results = repo.search_models(
584
+ query=query,
585
+ model_type=model_type,
586
+ provider=provider,
587
+ capabilities=capability_list,
588
+ limit=limit
589
+ )
590
+
591
+ # If we got results from the database, return them
592
+ if results:
593
+ return {
594
+ "success": True,
595
+ "query": query,
596
+ "filters": {
597
+ "model_type": model_type,
598
+ "provider": provider,
599
+ "capabilities": capability_list
600
+ },
601
+ "results": [
602
+ {
603
+ "model_id": model.model_id,
604
+ "model_type": model.model_type,
605
+ "provider": model.provider,
606
+ "description": model.metadata.get("description", ""),
607
+ "capabilities": model.capabilities,
608
+ "updated_at": model.updated_at.isoformat() if model.updated_at else None
609
+ }
610
+ for model in results
611
+ ],
612
+ "total_results": len(results)
613
+ }
614
+
615
+ except Exception as db_error:
616
+ logger.warning(f"Database search failed, using fallback: {db_error}")
617
+
618
+ # Fallback: search in our hardcoded model list + custom models
619
+ # Load custom models
620
+ custom_models_for_search = []
621
+ try:
622
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
623
+ custom_model_manager = get_custom_model_manager()
624
+ custom_models_for_search = custom_model_manager.get_models_for_api()
625
+ # Convert format for search
626
+ for model in custom_models_for_search:
627
+ model["model_type"] = model.get("service_type", "text")
628
+ except Exception as e:
629
+ logger.warning(f"Failed to load custom models for search: {e}")
630
+
631
+ fallback_models = [
632
+ {
633
+ "model_id": "gpt-4o-mini",
634
+ "model_type": "text",
635
+ "provider": "openai",
636
+ "description": "Small, fast GPT-4 model optimized for efficiency",
637
+ "capabilities": ["chat", "text_generation", "reasoning"],
638
+ "service_type": "text"
639
+ },
640
+ {
641
+ "model_id": "gpt-4o",
642
+ "model_type": "text",
643
+ "provider": "openai",
644
+ "description": "Large GPT-4 model with enhanced capabilities",
645
+ "capabilities": ["chat", "text_generation", "reasoning", "image_understanding"],
646
+ "service_type": "text"
647
+ },
648
+ {
649
+ "model_id": "text-embedding-3-small",
650
+ "model_type": "embedding",
651
+ "provider": "openai",
652
+ "description": "Small embedding model for text vectorization",
653
+ "capabilities": ["embedding", "similarity"],
654
+ "service_type": "embedding"
655
+ },
656
+ {
657
+ "model_id": "whisper-1",
658
+ "model_type": "audio",
659
+ "provider": "openai",
660
+ "description": "Speech recognition and transcription model",
661
+ "capabilities": ["speech_to_text", "audio_transcription"],
662
+ "service_type": "audio"
663
+ },
664
+ {
665
+ "model_id": "tts-1",
666
+ "model_type": "audio",
667
+ "provider": "openai",
668
+ "description": "Text-to-speech generation model",
669
+ "capabilities": ["text_to_speech"],
670
+ "service_type": "audio"
671
+ },
672
+ {
673
+ "model_id": "flux-schnell",
674
+ "model_type": "image",
675
+ "provider": "replicate",
676
+ "description": "Fast image generation model",
677
+ "capabilities": ["image_generation"],
678
+ "service_type": "image"
679
+ },
680
+ {
681
+ "model_id": "isa-llm-service",
682
+ "model_type": "text",
683
+ "provider": "isa",
684
+ "description": "ISA custom LLM service for trained models",
685
+ "capabilities": ["chat", "text_generation"],
686
+ "service_type": "text"
687
+ },
688
+ {
689
+ "model_id": "isa-omniparser-ui-detection",
690
+ "model_type": "vision",
691
+ "provider": "isa",
692
+ "description": "UI element detection and analysis",
693
+ "capabilities": ["ui_detection", "image_analysis"],
694
+ "service_type": "vision"
695
+ }
696
+ ]
697
+
698
+ # Add custom models to search list
699
+ fallback_models.extend(custom_models_for_search)
700
+
701
+ # Apply search filters
702
+ query_lower = query.lower()
703
+ filtered_models = []
704
+
705
+ for model in fallback_models:
706
+ # Check if query matches
707
+ query_match = (
708
+ query_lower in model["model_id"].lower() or
709
+ query_lower in model["provider"].lower() or
710
+ query_lower in model["description"].lower() or
711
+ any(query_lower in cap.lower() for cap in model["capabilities"])
712
+ )
713
+
714
+ if not query_match:
715
+ continue
716
+
717
+ # Apply type filter
718
+ if model_type and model["model_type"] != model_type:
719
+ continue
720
+
721
+ # Apply provider filter
722
+ if provider and model["provider"] != provider:
723
+ continue
724
+
725
+ # Apply capabilities filter
726
+ if capabilities:
727
+ if not any(cap in model["capabilities"] for cap in capabilities):
728
+ continue
729
+
730
+ filtered_models.append({
731
+ "model_id": model["model_id"],
732
+ "model_type": model["model_type"],
733
+ "provider": model["provider"],
734
+ "description": model["description"],
735
+ "capabilities": model["capabilities"],
736
+ "updated_at": None
737
+ })
738
+
739
+ # Apply limit
740
+ limited_results = filtered_models[:limit]
741
+
742
+ return {
743
+ "success": True,
744
+ "query": query,
745
+ "filters": {
746
+ "model_type": model_type,
747
+ "provider": provider,
748
+ "capabilities": capabilities
749
+ },
750
+ "results": limited_results,
751
+ "total_results": len(limited_results),
752
+ "fallback": True,
753
+ "message": "Using fallback search - database search unavailable"
754
+ }
755
+
756
+ except Exception as e:
757
+ logger.error(f"Failed to search models: {e}")
758
+ raise HTTPException(status_code=500, detail=f"Failed to search models: {str(e)}")
759
+
760
+ @router.get("/models/providers")
761
+ async def get_model_providers(user = Depends(optional_auth)):
762
+ """Get list of available model providers"""
763
+ try:
764
+ from ..cache_manager import cached, provider_list_cache_key
765
+
766
+ @cached(ttl=600.0, cache_key_func=lambda: provider_list_cache_key()) # 10 minutes cache
767
+ async def _get_providers():
768
+ try:
769
+ from isa_model.core.models.model_repo import ModelRepo
770
+ repo = ModelRepo()
771
+ return repo.get_providers_summary()
772
+ except Exception as e:
773
+ logger.warning(f"ModelRepo failed, using fallback: {e}")
774
+ # Fallback to basic provider list
775
+ return [
776
+ {
777
+ "provider": "openai",
778
+ "model_count": 4,
779
+ "model_types": ["text", "vision", "audio", "embedding"],
780
+ "capabilities": ["chat", "completion", "embedding", "vision", "audio"]
781
+ },
782
+ {
783
+ "provider": "isa",
784
+ "model_count": 3,
785
+ "model_types": ["text", "vision", "embedding"],
786
+ "capabilities": ["chat", "completion", "ui_detection", "ocr"]
787
+ },
788
+ {
789
+ "provider": "replicate",
790
+ "model_count": 2,
791
+ "model_types": ["image", "video"],
792
+ "capabilities": ["image_generation", "video_generation"]
793
+ }
794
+ ]
795
+
796
+ providers = await _get_providers()
797
+
798
+ return {
799
+ "success": True,
800
+ "providers": providers,
801
+ "total_count": len(providers),
802
+ "cached": True
803
+ }
804
+
805
+ except Exception as e:
806
+ logger.error(f"Failed to get model providers: {e}")
807
+ raise HTTPException(status_code=500, detail=f"Failed to get model providers: {str(e)}")
808
+
809
+ @router.get("/models/custom")
810
+ async def get_custom_models(
811
+ model_type: Optional[str] = Query(None, description="Filter by model type"),
812
+ provider: Optional[str] = Query(None, description="Filter by provider"),
813
+ user = Depends(optional_auth)
814
+ ):
815
+ """Get list of custom trained models"""
816
+ try:
817
+ from ..cache_manager import cached, custom_models_cache_key
818
+ from isa_model.inference.services.custom_model_manager import get_custom_model_manager
819
+
820
+ @cached(ttl=300.0, cache_key_func=lambda mt=model_type, p=provider: custom_models_cache_key(mt, p)) # 5 minutes cache
821
+ async def _get_custom_models(model_type_param, provider_param):
822
+ custom_model_manager = get_custom_model_manager()
823
+ return custom_model_manager.list_models(model_type=model_type_param, provider=provider_param)
824
+
825
+ models = await _get_custom_models(model_type, provider)
826
+
827
+ # Convert to API format
828
+ api_models = []
829
+ for model in models:
830
+ api_model = {
831
+ "model_id": model.model_id,
832
+ "model_name": model.model_name,
833
+ "model_type": model.model_type,
834
+ "provider": model.provider,
835
+ "base_model": model.base_model,
836
+ "training_date": model.training_date,
837
+ "description": model.metadata.get("description", ""),
838
+ "capabilities": model.capabilities,
839
+ "custom": True
840
+ }
841
+
842
+ if model.performance_metrics:
843
+ api_model["performance_metrics"] = model.performance_metrics
844
+
845
+ if model.deployment_config:
846
+ api_model["deployment_status"] = "configured"
847
+
848
+ api_models.append(api_model)
849
+
850
+ return {
851
+ "success": True,
852
+ "custom_models": api_models,
853
+ "total_count": len(api_models),
854
+ "filters": {
855
+ "model_type": model_type,
856
+ "provider": provider
857
+ },
858
+ "stats": custom_model_manager.get_stats()
859
+ }
860
+
861
+ except Exception as e:
862
+ logger.error(f"Failed to get custom models: {e}")
863
+ return {
864
+ "success": False,
865
+ "error": str(e),
866
+ "custom_models": [],
867
+ "total_count": 0
868
+ }
869
+
870
+ @router.get("/models/capabilities")
871
+ async def get_model_capabilities(user = Depends(optional_auth)):
872
+ """Get list of all available model capabilities"""
873
+ try:
874
+ from ..cache_manager import cached
875
+
876
+ @cached(ttl=3600.0, cache_key_func=lambda: "model_capabilities") # 1 hour cache (static data)
877
+ async def _get_capabilities():
878
+ from isa_model.core.models.model_repo import ModelCapability
879
+
880
+ return [
881
+ {
882
+ "capability": cap.value,
883
+ "description": cap.value.replace("_", " ").title()
884
+ }
885
+ for cap in ModelCapability
886
+ ]
887
+
888
+ capabilities = await _get_capabilities()
889
+
890
+ return {
891
+ "success": True,
892
+ "capabilities": capabilities
893
+ }
894
+
895
+ except Exception as e:
896
+ logger.error(f"Failed to get model capabilities: {e}")
897
+ raise HTTPException(status_code=500, detail=f"Failed to get model capabilities: {str(e)}")
898
+
899
+ @router.get("/models/{model_id}")
900
+ async def get_model_details(model_id: str, user = Depends(optional_auth)):
901
+ """Get detailed information about a specific model"""
902
+ try:
903
+ from ..cache_manager import cached
904
+ from isa_model.core.models.model_repo import ModelRepo
905
+
906
+ @cached(ttl=900.0, cache_key_func=lambda mid=model_id: f"model_details_{mid}") # 15 minutes cache
907
+ async def _get_model_details(model_id_param):
908
+ repo = ModelRepo()
909
+ return repo.get_model_by_id(model_id_param)
910
+
911
+ model = await _get_model_details(model_id)
912
+
913
+ if not model:
914
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
915
+
916
+ return {
917
+ "success": True,
918
+ "model": {
919
+ "model_id": model.model_id,
920
+ "model_type": model.model_type,
921
+ "provider": model.provider,
922
+ "metadata": model.metadata,
923
+ "capabilities": model.capabilities,
924
+ "created_at": model.created_at.isoformat() if model.created_at else None,
925
+ "updated_at": model.updated_at.isoformat() if model.updated_at else None
926
+ }
927
+ }
928
+
929
+ except HTTPException:
930
+ raise
931
+ except Exception as e:
932
+ logger.error(f"Failed to get model details for {model_id}: {e}")
933
+ raise HTTPException(status_code=500, detail=f"Failed to get model details: {str(e)}")
934
+
935
+ @router.get("/models/{model_id}/versions")
936
+ async def get_model_versions(model_id: str, user = Depends(optional_auth)):
937
+ """Get version history for a specific model"""
938
+ try:
939
+ from isa_model.core.models.model_version_manager import ModelVersionManager
940
+
941
+ version_manager = ModelVersionManager()
942
+ versions = version_manager.get_model_versions(model_id)
943
+
944
+ return {
945
+ "success": True,
946
+ "model_id": model_id,
947
+ "versions": [
948
+ {
949
+ "version": v.version,
950
+ "created_at": v.created_at.isoformat(),
951
+ "metadata": v.metadata,
952
+ "is_active": v.is_active
953
+ }
954
+ for v in versions
955
+ ],
956
+ "total_versions": len(versions)
957
+ }
958
+
959
+ except Exception as e:
960
+ logger.error(f"Failed to get model versions for {model_id}: {e}")
961
+ raise HTTPException(status_code=500, detail=f"Failed to get model versions: {str(e)}")
962
+
963
+ @router.post("/models/{model_id}/versions")
964
+ async def create_model_version(
965
+ model_id: str,
966
+ version_data: Dict[str, Any],
967
+ user = Depends(require_write_access)
968
+ ):
969
+ """Create a new version for a model"""
970
+ try:
971
+ from isa_model.core.models.model_version_manager import ModelVersionManager
972
+
973
+ version_manager = ModelVersionManager()
974
+ new_version = version_manager.create_version(
975
+ model_id=model_id,
976
+ metadata=version_data.get("metadata", {}),
977
+ user_id=user.get("user_id") if user else None
978
+ )
979
+
980
+ return {
981
+ "success": True,
982
+ "message": f"New version created for model {model_id}",
983
+ "version": {
984
+ "version": new_version.version,
985
+ "created_at": new_version.created_at.isoformat(),
986
+ "metadata": new_version.metadata
987
+ }
988
+ }
989
+
990
+ except Exception as e:
991
+ logger.error(f"Failed to create model version for {model_id}: {e}")
992
+ raise HTTPException(status_code=500, detail=f"Failed to create model version: {str(e)}")
993
+
994
+ @router.get("/models/{model_id}/billing")
995
+ async def get_model_billing_info(
996
+ model_id: str,
997
+ start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
998
+ end_date: Optional[str] = Query(None, description="End date (ISO format)"),
999
+ user = Depends(optional_auth)
1000
+ ):
1001
+ """Get billing information for a specific model"""
1002
+ try:
1003
+ from isa_model.core.models.model_billing_tracker import ModelBillingTracker
1004
+ from datetime import datetime, timedelta
1005
+
1006
+ # Parse dates
1007
+ if start_date:
1008
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
1009
+ else:
1010
+ start_dt = datetime.now() - timedelta(days=30)
1011
+
1012
+ if end_date:
1013
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
1014
+ else:
1015
+ end_dt = datetime.now()
1016
+
1017
+ billing_tracker = ModelBillingTracker()
1018
+ billing_info = billing_tracker.get_model_billing_summary(
1019
+ model_id=model_id,
1020
+ start_date=start_dt,
1021
+ end_date=end_dt
1022
+ )
1023
+
1024
+ return {
1025
+ "success": True,
1026
+ "model_id": model_id,
1027
+ "billing_period": {
1028
+ "start_date": start_dt.isoformat(),
1029
+ "end_date": end_dt.isoformat()
1030
+ },
1031
+ "billing_summary": billing_info
1032
+ }
1033
+
1034
+ except Exception as e:
1035
+ logger.error(f"Failed to get billing info for {model_id}: {e}")
1036
+ raise HTTPException(status_code=500, detail=f"Failed to get billing info: {str(e)}")
1037
+
1038
+ @router.put("/models/{model_id}/metadata")
1039
+ async def update_model_metadata(
1040
+ model_id: str,
1041
+ metadata_update: Dict[str, Any],
1042
+ user = Depends(require_write_access)
1043
+ ):
1044
+ """Update metadata for a specific model"""
1045
+ try:
1046
+ from isa_model.core.models.model_repo import ModelRepo
1047
+
1048
+ repo = ModelRepo()
1049
+ success = repo.update_model_metadata(
1050
+ model_id=model_id,
1051
+ metadata_updates=metadata_update,
1052
+ updated_by=user.get("user_id") if user else None
1053
+ )
1054
+
1055
+ if not success:
1056
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
1057
+
1058
+ return {
1059
+ "success": True,
1060
+ "message": f"Metadata updated for model {model_id}",
1061
+ "updated_fields": list(metadata_update.keys())
1062
+ }
1063
+
1064
+ except HTTPException:
1065
+ raise
1066
+ except Exception as e:
1067
+ logger.error(f"Failed to update metadata for {model_id}: {e}")
1068
+ raise HTTPException(status_code=500, detail=f"Failed to update metadata: {str(e)}")
1069
+
1070
+ @router.get("/models/{model_id}/statistics")
1071
+ async def get_model_statistics(
1072
+ model_id: str,
1073
+ days: int = Query(30, ge=1, le=365, description="Number of days for statistics"),
1074
+ user = Depends(optional_auth)
1075
+ ):
1076
+ """Get usage statistics for a specific model"""
1077
+ try:
1078
+ from isa_model.core.models.model_statistics_tracker import ModelStatisticsTracker
1079
+
1080
+ stats_tracker = ModelStatisticsTracker()
1081
+ statistics = stats_tracker.get_model_statistics(
1082
+ model_id=model_id,
1083
+ days=days
1084
+ )
1085
+
1086
+ return {
1087
+ "success": True,
1088
+ "model_id": model_id,
1089
+ "period_days": days,
1090
+ "statistics": statistics
1091
+ }
1092
+
1093
+ except Exception as e:
1094
+ logger.error(f"Failed to get statistics for {model_id}: {e}")
1095
+ raise HTTPException(status_code=500, detail=f"Failed to get model statistics: {str(e)}")