isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -9,13 +9,16 @@ This is the main API that handles all types of AI requests:
|
|
9
9
|
- Embedding tasks
|
10
10
|
"""
|
11
11
|
|
12
|
-
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
12
|
+
from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Request, Depends, Query
|
13
13
|
from fastapi.responses import StreamingResponse
|
14
14
|
from pydantic import BaseModel, Field
|
15
|
-
from typing import Optional, Dict, Any, Union, List
|
15
|
+
from typing import Optional, Dict, Any, Union, List, AsyncGenerator
|
16
16
|
import logging
|
17
|
+
from ..middleware.auth import optional_auth, require_read_access, require_write_access
|
18
|
+
from ..middleware.security import rate_limit_standard, rate_limit_heavy, sanitize_input
|
17
19
|
import asyncio
|
18
20
|
import json
|
21
|
+
import time
|
19
22
|
from pathlib import Path
|
20
23
|
|
21
24
|
from isa_model.client import ISAModelClient
|
@@ -24,30 +27,176 @@ logger = logging.getLogger(__name__)
|
|
24
27
|
router = APIRouter()
|
25
28
|
|
26
29
|
class UnifiedRequest(BaseModel):
|
27
|
-
"""
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
"""
|
31
|
+
**统一请求模型 - 支持所有AI服务类型**
|
32
|
+
|
33
|
+
这个模型为所有AI服务(文本、视觉、音频、图像生成、嵌入)提供统一的请求接口。
|
34
|
+
|
35
|
+
**支持的服务类型**:
|
36
|
+
- `text`: 文本服务 (聊天、生成、翻译)
|
37
|
+
- `vision`: 视觉服务 (图像分析、OCR、UI检测)
|
38
|
+
- `audio`: 音频服务 (TTS、STT、转录)
|
39
|
+
- `image`: 图像生成服务 (文本生成图像、图像转换)
|
40
|
+
- `embedding`: 嵌入服务 (文本向量化、相似度计算)
|
41
|
+
|
42
|
+
**请求示例**:
|
43
|
+
```json
|
44
|
+
{
|
45
|
+
"input_data": "你好,世界!",
|
46
|
+
"task": "chat",
|
47
|
+
"service_type": "text",
|
48
|
+
"model": "gpt-4o-mini",
|
49
|
+
"provider": "openai"
|
50
|
+
}
|
51
|
+
```
|
52
|
+
"""
|
53
|
+
input_data: Union[str, Dict[str, Any]] = Field(
|
54
|
+
...,
|
55
|
+
description="输入数据,支持多种格式:文本字符串、LangChain消息列表、图像URL/路径、音频文件路径等。根据service_type确定具体格式。",
|
56
|
+
examples=["你好,世界!", "https://example.com/image.jpg", "/path/to/audio.mp3"]
|
57
|
+
)
|
58
|
+
task: str = Field(
|
59
|
+
...,
|
60
|
+
description="要执行的任务类型。常见任务:chat(聊天)、analyze_image(图像分析)、generate_speech(语音生成)、create_embedding(创建嵌入)等。",
|
61
|
+
examples=["chat", "analyze_image", "generate_speech", "transcribe", "generate_image", "create_embedding"]
|
62
|
+
)
|
63
|
+
service_type: str = Field(
|
64
|
+
...,
|
65
|
+
description="服务类型,决定使用哪种AI服务。可选值:text、vision、audio、image、embedding。",
|
66
|
+
examples=["text", "vision", "audio", "image", "embedding"]
|
67
|
+
)
|
68
|
+
model: Optional[str] = Field(
|
69
|
+
None,
|
70
|
+
description="可选的模型指定。如果指定,系统将尝试使用该模型。常见模型:gpt-4o-mini、gpt-4o、whisper-1、flux-schnell等。",
|
71
|
+
examples=["gpt-4o-mini", "gpt-4o", "whisper-1", "tts-1", "flux-schnell", "text-embedding-3-small"]
|
72
|
+
)
|
73
|
+
provider: Optional[str] = Field(
|
74
|
+
None,
|
75
|
+
description="可选的服务提供商指定。如果指定,系统将尝试使用该提供商。常见提供商:openai、replicate、anthropic等。",
|
76
|
+
examples=["openai", "replicate", "anthropic"]
|
77
|
+
)
|
78
|
+
stream: Optional[bool] = Field(
|
79
|
+
None,
|
80
|
+
description="是否启用流式响应。仅适用于文本服务。text+chat任务默认启用流式。当使用工具调用时会自动禁用流式响应以确保完整性。"
|
81
|
+
)
|
82
|
+
tools: Optional[List[Dict[str, Any]]] = Field(
|
83
|
+
None,
|
84
|
+
description="可选的工具列表,用于函数调用功能。仅适用于文本服务。工具格式遵循LangChain工具规范。使用工具时会自动禁用流式响应。",
|
85
|
+
examples=[[
|
86
|
+
{
|
87
|
+
"name": "get_weather",
|
88
|
+
"description": "获取天气信息",
|
89
|
+
"parameters": {
|
90
|
+
"type": "object",
|
91
|
+
"properties": {
|
92
|
+
"location": {"type": "string", "description": "城市名称"}
|
93
|
+
},
|
94
|
+
"required": ["location"]
|
95
|
+
}
|
96
|
+
}
|
97
|
+
]]
|
98
|
+
)
|
99
|
+
output_format: Optional[str] = Field(
|
100
|
+
None,
|
101
|
+
description="输出格式控制。支持的格式:json(JSON结构化输出)、markdown(Markdown格式)、code(代码块提取)、structured(智能结构化解析)。主要用于文本服务的响应格式化。",
|
102
|
+
examples=["json", "markdown", "code", "structured"]
|
103
|
+
)
|
104
|
+
json_schema: Optional[Dict[str, Any]] = Field(
|
105
|
+
None,
|
106
|
+
description="JSON模式验证。当output_format='json'时使用,用于验证和约束JSON输出格式。遵循JSON Schema规范。",
|
107
|
+
examples=[{
|
108
|
+
"type": "object",
|
109
|
+
"properties": {
|
110
|
+
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
|
111
|
+
"confidence": {"type": "number", "minimum": 0, "maximum": 1}
|
112
|
+
},
|
113
|
+
"required": ["sentiment", "confidence"]
|
114
|
+
}]
|
115
|
+
)
|
116
|
+
repair_attempts: Optional[int] = Field(
|
117
|
+
3,
|
118
|
+
ge=0,
|
119
|
+
le=10,
|
120
|
+
description="JSON修复尝试次数。当解析JSON失败时,系统会尝试修复常见的JSON格式错误。0表示不进行修复尝试。",
|
121
|
+
examples=[3, 0, 5]
|
122
|
+
)
|
123
|
+
parameters: Optional[Dict[str, Any]] = Field(
|
124
|
+
default_factory=dict,
|
125
|
+
description="额外的任务参数,用于精细控制服务行为。参数内容根据具体服务类型而定,如temperature、max_tokens、voice等。",
|
126
|
+
examples=[{"temperature": 0.7, "max_tokens": 1000}, {"voice": "alloy", "speed": 1.0}, {"width": 1024, "height": 1024}]
|
127
|
+
)
|
35
128
|
|
36
129
|
class UnifiedResponse(BaseModel):
|
37
|
-
"""
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
130
|
+
"""
|
131
|
+
**统一响应模型 - 所有AI服务的标准响应格式**
|
132
|
+
|
133
|
+
提供一致的成功/失败状态、结果数据和元数据信息。
|
134
|
+
|
135
|
+
**成功响应示例**:
|
136
|
+
```json
|
137
|
+
{
|
138
|
+
"success": true,
|
139
|
+
"result": {
|
140
|
+
"content": "你好!我是AI助手。",
|
141
|
+
"tool_calls": [],
|
142
|
+
"response_metadata": {
|
143
|
+
"token_usage": {
|
144
|
+
"prompt_tokens": 15,
|
145
|
+
"completion_tokens": 10,
|
146
|
+
"total_tokens": 25
|
147
|
+
}
|
148
|
+
}
|
149
|
+
},
|
150
|
+
"error": null,
|
151
|
+
"metadata": {
|
152
|
+
"model_used": "gpt-4o-mini",
|
153
|
+
"provider": "openai",
|
154
|
+
"task": "chat",
|
155
|
+
"service_type": "text",
|
156
|
+
"processing_time": 1.23
|
157
|
+
}
|
158
|
+
}
|
159
|
+
```
|
160
|
+
|
161
|
+
**错误响应示例**:
|
162
|
+
```json
|
163
|
+
{
|
164
|
+
"success": false,
|
165
|
+
"result": null,
|
166
|
+
"error": "Model 'invalid-model' not found",
|
167
|
+
"metadata": {
|
168
|
+
"error_code": "MODEL_NOT_FOUND",
|
169
|
+
"task": "chat",
|
170
|
+
"service_type": "text"
|
171
|
+
}
|
172
|
+
}
|
173
|
+
```
|
174
|
+
"""
|
175
|
+
success: bool = Field(
|
176
|
+
...,
|
177
|
+
description="请求是否成功执行。true表示成功,false表示失败。"
|
178
|
+
)
|
179
|
+
result: Optional[Any] = Field(
|
180
|
+
None,
|
181
|
+
description="服务执行结果。成功时包含实际数据,失败时为null。数据类型根据服务类型而定:文本服务返回AIMessage对象,视觉服务返回分析文本,音频服务返回文件路径或文本,图像服务返回图像URL,嵌入服务返回向量数组。"
|
182
|
+
)
|
183
|
+
error: Optional[str] = Field(
|
184
|
+
None,
|
185
|
+
description="错误信息描述。成功时为null,失败时包含详细的错误说明。"
|
186
|
+
)
|
187
|
+
metadata: Dict[str, Any] = Field(
|
188
|
+
...,
|
189
|
+
description="响应元数据,包含执行信息如使用的模型、提供商、处理时间、token使用量等。元数据内容根据服务类型和执行情况而定。"
|
190
|
+
)
|
42
191
|
|
43
192
|
# Global ISA client instance for server-side processing
|
44
193
|
_isa_client = None
|
45
194
|
|
46
195
|
def get_isa_client():
|
47
|
-
"""Get or create ISA client for
|
196
|
+
"""Get or create ISA client for service processing"""
|
48
197
|
global _isa_client
|
49
198
|
if _isa_client is None:
|
50
|
-
_isa_client = ISAModelClient(
|
199
|
+
_isa_client = ISAModelClient() # Use direct service mode
|
51
200
|
return _isa_client
|
52
201
|
|
53
202
|
@router.get("/")
|
@@ -61,11 +210,16 @@ async def unified_info():
|
|
61
210
|
"version": "1.0.0"
|
62
211
|
}
|
63
212
|
|
64
|
-
@router.post("/invoke"
|
65
|
-
|
213
|
+
@router.post("/invoke")
|
214
|
+
@rate_limit_standard()
|
215
|
+
async def unified_invoke(request: Request, user: Dict = Depends(require_read_access)):
|
66
216
|
"""
|
67
217
|
**Unified API endpoint for all AI services**
|
68
218
|
|
219
|
+
Supports both JSON and multipart/form-data requests:
|
220
|
+
- JSON: Standard API request with UnifiedRequest body
|
221
|
+
- Form: File upload with form parameters
|
222
|
+
|
69
223
|
This single endpoint handles:
|
70
224
|
- Vision: image analysis, OCR, UI detection
|
71
225
|
- Text: chat, generation, translation
|
@@ -76,185 +230,314 @@ async def unified_invoke(request: UnifiedRequest) -> UnifiedResponse:
|
|
76
230
|
**Uses ISAModelClient in local mode - all the complex logic is in client.py**
|
77
231
|
"""
|
78
232
|
try:
|
79
|
-
# Get ISA client instance (
|
233
|
+
# Get ISA client instance (service mode)
|
80
234
|
client = get_isa_client()
|
81
235
|
|
82
|
-
#
|
83
|
-
|
84
|
-
result = await client._invoke_local(
|
85
|
-
input_data=request.input_data,
|
86
|
-
task=request.task,
|
87
|
-
service_type=request.service_type,
|
88
|
-
model_hint=request.model_hint,
|
89
|
-
provider_hint=request.provider_hint,
|
90
|
-
**request.parameters
|
91
|
-
)
|
92
|
-
|
93
|
-
# Return the result in our API format
|
94
|
-
return UnifiedResponse(
|
95
|
-
success=result["success"],
|
96
|
-
result=result.get("result"),
|
97
|
-
error=result.get("error"),
|
98
|
-
metadata=result["metadata"]
|
99
|
-
)
|
236
|
+
# Check content type to determine request format
|
237
|
+
content_type = request.headers.get("content-type", "")
|
100
238
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
239
|
+
if content_type.startswith("multipart/form-data"):
|
240
|
+
# Handle form data with file upload
|
241
|
+
form = await request.form()
|
242
|
+
|
243
|
+
# Extract required fields
|
244
|
+
task = form.get("task")
|
245
|
+
service_type = form.get("service_type")
|
246
|
+
model = form.get("model")
|
247
|
+
provider = form.get("provider")
|
248
|
+
parameters = form.get("parameters")
|
249
|
+
file = form.get("file")
|
250
|
+
|
251
|
+
if not task or not service_type:
|
252
|
+
raise HTTPException(status_code=400, detail="task and service_type are required")
|
253
|
+
|
254
|
+
if file is None:
|
255
|
+
raise HTTPException(status_code=400, detail="file is required for multipart requests")
|
256
|
+
|
257
|
+
# Read file data
|
258
|
+
file_data = await file.read()
|
259
|
+
|
260
|
+
# Parse parameters if provided as JSON string
|
261
|
+
parsed_params = {}
|
262
|
+
if parameters:
|
263
|
+
try:
|
264
|
+
parsed_params = json.loads(parameters)
|
265
|
+
except json.JSONDecodeError:
|
266
|
+
parsed_params = {}
|
267
|
+
|
268
|
+
result = await client._invoke_service(
|
269
|
+
input_data=file_data,
|
270
|
+
task=task,
|
271
|
+
service_type=service_type,
|
272
|
+
model_hint=model,
|
273
|
+
provider_hint=provider,
|
274
|
+
filename=file.filename,
|
275
|
+
content_type=file.content_type,
|
276
|
+
file_size=len(file_data),
|
277
|
+
**parsed_params
|
278
|
+
)
|
279
|
+
|
280
|
+
# Return the result in our API format
|
281
|
+
return UnifiedResponse(
|
282
|
+
success=result["success"],
|
283
|
+
result=result.get("result"),
|
284
|
+
error=result.get("error"),
|
285
|
+
metadata={
|
286
|
+
**result["metadata"],
|
287
|
+
"filename": file.filename,
|
288
|
+
"content_type": file.content_type,
|
289
|
+
"file_size": len(file_data)
|
290
|
+
}
|
291
|
+
)
|
129
292
|
|
130
|
-
|
131
|
-
|
293
|
+
else:
|
294
|
+
# Handle JSON request
|
132
295
|
try:
|
133
|
-
|
134
|
-
|
135
|
-
input_data=request.input_data,
|
136
|
-
task=request.task,
|
137
|
-
service_type=request.service_type,
|
138
|
-
model_hint=request.model_hint,
|
139
|
-
provider_hint=request.provider_hint,
|
140
|
-
stream=True,
|
141
|
-
**request.parameters
|
142
|
-
)
|
296
|
+
json_body = await request.json()
|
297
|
+
unified_request = UnifiedRequest(**json_body)
|
143
298
|
|
144
|
-
#
|
145
|
-
|
146
|
-
|
147
|
-
token_data = {
|
148
|
-
"token": token,
|
149
|
-
"type": "token"
|
150
|
-
}
|
151
|
-
yield f"data: {json.dumps(token_data)}\n\n"
|
152
|
-
|
153
|
-
# Send completion signal
|
154
|
-
completion_data = {
|
155
|
-
"type": "completion",
|
156
|
-
"status": "finished"
|
157
|
-
}
|
158
|
-
yield f"data: {json.dumps(completion_data)}\n\n"
|
299
|
+
# Sanitize string inputs to prevent XSS and injection attacks
|
300
|
+
if isinstance(unified_request.input_data, str):
|
301
|
+
unified_request.input_data = sanitize_input(unified_request.input_data)
|
159
302
|
|
160
303
|
except Exception as e:
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
304
|
+
from ..error_handlers import handle_validation_error, create_http_exception, ErrorCode
|
305
|
+
if hasattr(e, 'errors'): # Pydantic validation error
|
306
|
+
error_response = handle_validation_error(e)
|
307
|
+
raise HTTPException(status_code=400, detail=error_response)
|
308
|
+
else:
|
309
|
+
raise create_http_exception(
|
310
|
+
f"请求JSON格式错误: {str(e)}",
|
311
|
+
400,
|
312
|
+
ErrorCode.INVALID_INPUT,
|
313
|
+
{"suggestion": "请检查JSON格式和必需字段"}
|
314
|
+
)
|
315
|
+
|
316
|
+
# Prepare parameters, ensuring tools isn't duplicated
|
317
|
+
params = dict(unified_request.parameters) if unified_request.parameters else {}
|
318
|
+
if unified_request.tools:
|
319
|
+
params.pop("tools", None) # Remove tools from parameters if present
|
320
|
+
params["tools"] = unified_request.tools
|
321
|
+
|
322
|
+
# Add JSON output formatting parameters
|
323
|
+
if unified_request.output_format:
|
324
|
+
params["output_format"] = unified_request.output_format
|
325
|
+
if unified_request.json_schema:
|
326
|
+
params["json_schema"] = unified_request.json_schema
|
327
|
+
if unified_request.repair_attempts is not None:
|
328
|
+
params["repair_attempts"] = unified_request.repair_attempts
|
329
|
+
|
330
|
+
# Check if this should be a streaming response
|
331
|
+
# Default to streaming for text+chat unless explicitly disabled
|
332
|
+
is_text_chat = (unified_request.service_type == "text" and unified_request.task == "chat")
|
333
|
+
stream_setting = unified_request.stream if unified_request.stream is not None else is_text_chat
|
334
|
+
|
335
|
+
should_stream = (
|
336
|
+
is_text_chat and
|
337
|
+
not unified_request.tools and # No tools
|
338
|
+
stream_setting # Stream enabled by default for text+chat or explicitly
|
339
|
+
)
|
340
|
+
|
341
|
+
|
342
|
+
if should_stream:
|
343
|
+
# Return streaming response for text chat
|
344
|
+
async def generate_stream():
|
345
|
+
try:
|
346
|
+
# Use streaming invoke but track metadata manually
|
347
|
+
collected_tokens = []
|
348
|
+
selected_model = None
|
349
|
+
service_info = None
|
350
|
+
start_time = time.time()
|
351
|
+
|
352
|
+
# Get model selection info first (lightweight operation)
|
353
|
+
try:
|
354
|
+
selected_model = await client._select_model(
|
355
|
+
input_data=unified_request.input_data,
|
356
|
+
task=unified_request.task,
|
357
|
+
service_type=unified_request.service_type,
|
358
|
+
model_hint=unified_request.model,
|
359
|
+
provider_hint=unified_request.provider
|
360
|
+
)
|
361
|
+
service_info = {
|
362
|
+
"model_used": selected_model["model_id"],
|
363
|
+
"provider": selected_model["provider"],
|
364
|
+
"task": unified_request.task,
|
365
|
+
"service_type": unified_request.service_type,
|
366
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
367
|
+
"streaming": True
|
368
|
+
}
|
369
|
+
except Exception:
|
370
|
+
pass
|
371
|
+
|
372
|
+
# Stream the tokens and get metadata
|
373
|
+
processing_time = 0
|
374
|
+
async for item in client.invoke_stream(
|
375
|
+
input_data=unified_request.input_data,
|
376
|
+
task=unified_request.task,
|
377
|
+
service_type=unified_request.service_type,
|
378
|
+
model=unified_request.model,
|
379
|
+
provider=unified_request.provider,
|
380
|
+
return_metadata=True, # Request metadata with billing info
|
381
|
+
**params
|
382
|
+
):
|
383
|
+
if isinstance(item, tuple) and item[0] == 'metadata':
|
384
|
+
# This is the final metadata with billing info
|
385
|
+
metadata = item[1]
|
386
|
+
processing_time = time.time() - start_time
|
387
|
+
metadata["processing_time"] = processing_time
|
388
|
+
yield f"data: {json.dumps({'metadata': metadata})}\n\n"
|
389
|
+
else:
|
390
|
+
# This is a token
|
391
|
+
collected_tokens.append(item)
|
392
|
+
yield f"data: {json.dumps({'token': item})}\n\n"
|
393
|
+
|
394
|
+
except Exception as e:
|
395
|
+
from ..error_handlers import create_error_response, ErrorCode
|
396
|
+
# Create detailed error response for streaming
|
397
|
+
error_response = create_error_response(
|
398
|
+
error=e,
|
399
|
+
error_code=ErrorCode.INFERENCE_FAILED,
|
400
|
+
details={
|
401
|
+
"service_type": unified_request.service_type,
|
402
|
+
"model": unified_request.model,
|
403
|
+
"provider": unified_request.provider,
|
404
|
+
"streaming": True
|
405
|
+
}
|
406
|
+
)
|
407
|
+
# Send structured error as final event
|
408
|
+
yield f"data: {json.dumps({'error': error_response})}\n\n"
|
409
|
+
finally:
|
410
|
+
# Send end-of-stream marker
|
411
|
+
yield f"data: {json.dumps({'done': True})}\n\n"
|
412
|
+
|
413
|
+
return StreamingResponse(
|
414
|
+
generate_stream(),
|
415
|
+
media_type="text/event-stream",
|
416
|
+
headers={
|
417
|
+
"Cache-Control": "no-cache",
|
418
|
+
"Connection": "keep-alive",
|
419
|
+
"X-Accel-Buffering": "no" # Disable nginx buffering
|
420
|
+
}
|
421
|
+
)
|
422
|
+
else:
|
423
|
+
# Non-streaming response (original behavior)
|
424
|
+
result = await client._invoke_service(
|
425
|
+
input_data=unified_request.input_data,
|
426
|
+
task=unified_request.task,
|
427
|
+
service_type=unified_request.service_type,
|
428
|
+
model_hint=unified_request.model,
|
429
|
+
provider_hint=unified_request.provider,
|
430
|
+
**params
|
431
|
+
)
|
432
|
+
|
433
|
+
# Return the result in our API format
|
434
|
+
return UnifiedResponse(
|
435
|
+
success=result["success"],
|
436
|
+
result=result.get("result"),
|
437
|
+
error=result.get("error"),
|
438
|
+
metadata=result["metadata"]
|
439
|
+
)
|
178
440
|
|
441
|
+
except HTTPException:
|
442
|
+
raise
|
179
443
|
except Exception as e:
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
@router.post("/invoke-file", response_model=UnifiedResponse)
|
184
|
-
async def unified_invoke_file(
|
185
|
-
task: str = Form(...),
|
186
|
-
service_type: str = Form(...),
|
187
|
-
model_hint: Optional[str] = Form(None),
|
188
|
-
provider_hint: Optional[str] = Form(None),
|
189
|
-
file: UploadFile = File(...)
|
190
|
-
) -> UnifiedResponse:
|
191
|
-
"""
|
192
|
-
Unified file upload endpoint
|
193
|
-
|
194
|
-
For tasks that require file input (images, audio, documents)
|
195
|
-
"""
|
196
|
-
try:
|
197
|
-
# Read file data
|
198
|
-
file_data = await file.read()
|
199
|
-
|
200
|
-
# Get ISA client instance (local mode)
|
201
|
-
client = get_isa_client()
|
202
|
-
|
203
|
-
# Use client's local invoke method with binary data
|
204
|
-
result = await client._invoke_local(
|
205
|
-
input_data=file_data, # Binary data
|
206
|
-
task=task,
|
207
|
-
service_type=service_type,
|
208
|
-
model_hint=model_hint,
|
209
|
-
provider_hint=provider_hint,
|
210
|
-
filename=file.filename,
|
211
|
-
content_type=file.content_type,
|
212
|
-
file_size=len(file_data)
|
213
|
-
)
|
444
|
+
from ..error_handlers import create_error_response, ErrorCode
|
445
|
+
logger.error(f"Unified invoke failed: {e}")
|
214
446
|
|
215
|
-
#
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
"
|
223
|
-
"
|
224
|
-
"
|
447
|
+
# Create detailed error response
|
448
|
+
error_response = create_error_response(
|
449
|
+
error=e,
|
450
|
+
status_code=500,
|
451
|
+
error_code=ErrorCode.INFERENCE_FAILED,
|
452
|
+
details={
|
453
|
+
"service_type": getattr(unified_request, 'service_type', 'unknown'),
|
454
|
+
"model": getattr(unified_request, 'model', 'unknown'),
|
455
|
+
"provider": getattr(unified_request, 'provider', 'unknown'),
|
456
|
+
"task": getattr(unified_request, 'task', 'unknown')
|
225
457
|
}
|
226
458
|
)
|
227
459
|
|
228
|
-
except Exception as e:
|
229
|
-
logger.error(f"File invoke failed: {e}")
|
230
460
|
return UnifiedResponse(
|
231
461
|
success=False,
|
232
|
-
error=
|
462
|
+
error=error_response.get("error"),
|
233
463
|
metadata={
|
234
|
-
"
|
235
|
-
"
|
236
|
-
"
|
464
|
+
"error_code": error_response.get("error_code"),
|
465
|
+
"user_message": error_response.get("user_message"),
|
466
|
+
"details": error_response.get("details", {})
|
237
467
|
}
|
238
468
|
)
|
239
469
|
|
470
|
+
|
471
|
+
|
240
472
|
@router.get("/models")
|
241
473
|
async def get_available_models(service_type: Optional[str] = None):
|
242
474
|
"""Get available models (optional filter by service type)"""
|
243
475
|
try:
|
244
|
-
|
245
|
-
|
476
|
+
from ..cache_manager import cached, model_list_cache_key
|
477
|
+
|
478
|
+
@cached(ttl=600.0, cache_key_func=lambda st=service_type: model_list_cache_key(st)) # 10 minutes cache
|
479
|
+
async def _get_models(service_type_param):
|
480
|
+
client = get_isa_client()
|
481
|
+
return await client.get_available_models(service_type_param)
|
482
|
+
|
483
|
+
models_list = await _get_models(service_type)
|
484
|
+
|
485
|
+
# Ensure we return the expected format
|
486
|
+
if isinstance(models_list, list):
|
487
|
+
return {
|
488
|
+
"success": True,
|
489
|
+
"models": models_list,
|
490
|
+
"total_count": len(models_list),
|
491
|
+
"service_type_filter": service_type
|
492
|
+
}
|
493
|
+
elif isinstance(models_list, dict) and "models" in models_list:
|
494
|
+
# Already in correct format
|
495
|
+
return models_list
|
496
|
+
else:
|
497
|
+
# Unknown format, convert to expected format
|
498
|
+
return {
|
499
|
+
"success": True,
|
500
|
+
"models": models_list if isinstance(models_list, list) else [],
|
501
|
+
"total_count": len(models_list) if isinstance(models_list, list) else 0,
|
502
|
+
"service_type_filter": service_type
|
503
|
+
}
|
246
504
|
except Exception as e:
|
247
505
|
logger.error(f"Failed to get available models: {e}")
|
248
506
|
# Fallback static model list
|
507
|
+
# Load custom models
|
508
|
+
custom_models = []
|
509
|
+
try:
|
510
|
+
from isa_model.inference.services.custom_model_manager import get_custom_model_manager
|
511
|
+
custom_model_manager = get_custom_model_manager()
|
512
|
+
custom_models = custom_model_manager.get_models_for_api()
|
513
|
+
logger.debug(f"Loaded {len(custom_models)} custom models")
|
514
|
+
except Exception as e:
|
515
|
+
logger.warning(f"Failed to load custom models: {e}")
|
516
|
+
|
517
|
+
# Base fallback models
|
518
|
+
base_models = [
|
519
|
+
{"service_type": "vision", "provider": "openai", "model_id": "gpt-4o-mini"},
|
520
|
+
{"service_type": "text", "provider": "openai", "model_id": "gpt-4o-mini"},
|
521
|
+
{"service_type": "audio", "provider": "openai", "model_id": "whisper-1"},
|
522
|
+
{"service_type": "audio", "provider": "openai", "model_id": "tts-1"},
|
523
|
+
{"service_type": "embedding", "provider": "openai", "model_id": "text-embedding-3-small"},
|
524
|
+
{"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
|
525
|
+
]
|
526
|
+
|
527
|
+
# Combine base models with custom models
|
528
|
+
fallback_models = base_models + custom_models
|
529
|
+
|
530
|
+
# Filter by service_type if provided
|
531
|
+
if service_type:
|
532
|
+
fallback_models = [m for m in fallback_models if m["service_type"] == service_type]
|
533
|
+
|
249
534
|
return {
|
250
|
-
"
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
{"service_type": "image", "provider": "replicate", "model_id": "black-forest-labs/flux-schnell"}
|
257
|
-
]
|
535
|
+
"success": False,
|
536
|
+
"error": f"Failed to get models: {str(e)}",
|
537
|
+
"models": fallback_models,
|
538
|
+
"total_count": len(fallback_models),
|
539
|
+
"service_type_filter": service_type,
|
540
|
+
"fallback": True
|
258
541
|
}
|
259
542
|
|
260
543
|
@router.get("/health")
|
@@ -271,4 +554,542 @@ async def health_check():
|
|
271
554
|
return {
|
272
555
|
"api": "error",
|
273
556
|
"error": str(e)
|
274
|
-
}
|
557
|
+
}
|
558
|
+
|
559
|
+
# Enhanced Model Management API Endpoints
|
560
|
+
|
561
|
+
@router.get("/models/search")
|
562
|
+
async def search_models(
|
563
|
+
query: str = Query(..., description="Search query"),
|
564
|
+
model_type: Optional[str] = Query(None, description="Filter by model type"),
|
565
|
+
provider: Optional[str] = Query(None, description="Filter by provider"),
|
566
|
+
capabilities: Optional[List[str]] = Query(None, description="Filter by capabilities"),
|
567
|
+
limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
|
568
|
+
user = Depends(optional_auth)
|
569
|
+
):
|
570
|
+
"""Search models by query and filters"""
|
571
|
+
try:
|
572
|
+
# Try database search first
|
573
|
+
try:
|
574
|
+
from isa_model.core.models.model_repo import ModelRepo
|
575
|
+
|
576
|
+
repo = ModelRepo()
|
577
|
+
|
578
|
+
# Convert capabilities from query parameter
|
579
|
+
capability_list = None
|
580
|
+
if capabilities:
|
581
|
+
capability_list = [cap.strip() for cap in capabilities if cap.strip()]
|
582
|
+
|
583
|
+
results = repo.search_models(
|
584
|
+
query=query,
|
585
|
+
model_type=model_type,
|
586
|
+
provider=provider,
|
587
|
+
capabilities=capability_list,
|
588
|
+
limit=limit
|
589
|
+
)
|
590
|
+
|
591
|
+
# If we got results from the database, return them
|
592
|
+
if results:
|
593
|
+
return {
|
594
|
+
"success": True,
|
595
|
+
"query": query,
|
596
|
+
"filters": {
|
597
|
+
"model_type": model_type,
|
598
|
+
"provider": provider,
|
599
|
+
"capabilities": capability_list
|
600
|
+
},
|
601
|
+
"results": [
|
602
|
+
{
|
603
|
+
"model_id": model.model_id,
|
604
|
+
"model_type": model.model_type,
|
605
|
+
"provider": model.provider,
|
606
|
+
"description": model.metadata.get("description", ""),
|
607
|
+
"capabilities": model.capabilities,
|
608
|
+
"updated_at": model.updated_at.isoformat() if model.updated_at else None
|
609
|
+
}
|
610
|
+
for model in results
|
611
|
+
],
|
612
|
+
"total_results": len(results)
|
613
|
+
}
|
614
|
+
|
615
|
+
except Exception as db_error:
|
616
|
+
logger.warning(f"Database search failed, using fallback: {db_error}")
|
617
|
+
|
618
|
+
# Fallback: search in our hardcoded model list + custom models
|
619
|
+
# Load custom models
|
620
|
+
custom_models_for_search = []
|
621
|
+
try:
|
622
|
+
from isa_model.inference.services.custom_model_manager import get_custom_model_manager
|
623
|
+
custom_model_manager = get_custom_model_manager()
|
624
|
+
custom_models_for_search = custom_model_manager.get_models_for_api()
|
625
|
+
# Convert format for search
|
626
|
+
for model in custom_models_for_search:
|
627
|
+
model["model_type"] = model.get("service_type", "text")
|
628
|
+
except Exception as e:
|
629
|
+
logger.warning(f"Failed to load custom models for search: {e}")
|
630
|
+
|
631
|
+
fallback_models = [
|
632
|
+
{
|
633
|
+
"model_id": "gpt-4o-mini",
|
634
|
+
"model_type": "text",
|
635
|
+
"provider": "openai",
|
636
|
+
"description": "Small, fast GPT-4 model optimized for efficiency",
|
637
|
+
"capabilities": ["chat", "text_generation", "reasoning"],
|
638
|
+
"service_type": "text"
|
639
|
+
},
|
640
|
+
{
|
641
|
+
"model_id": "gpt-4o",
|
642
|
+
"model_type": "text",
|
643
|
+
"provider": "openai",
|
644
|
+
"description": "Large GPT-4 model with enhanced capabilities",
|
645
|
+
"capabilities": ["chat", "text_generation", "reasoning", "image_understanding"],
|
646
|
+
"service_type": "text"
|
647
|
+
},
|
648
|
+
{
|
649
|
+
"model_id": "text-embedding-3-small",
|
650
|
+
"model_type": "embedding",
|
651
|
+
"provider": "openai",
|
652
|
+
"description": "Small embedding model for text vectorization",
|
653
|
+
"capabilities": ["embedding", "similarity"],
|
654
|
+
"service_type": "embedding"
|
655
|
+
},
|
656
|
+
{
|
657
|
+
"model_id": "whisper-1",
|
658
|
+
"model_type": "audio",
|
659
|
+
"provider": "openai",
|
660
|
+
"description": "Speech recognition and transcription model",
|
661
|
+
"capabilities": ["speech_to_text", "audio_transcription"],
|
662
|
+
"service_type": "audio"
|
663
|
+
},
|
664
|
+
{
|
665
|
+
"model_id": "tts-1",
|
666
|
+
"model_type": "audio",
|
667
|
+
"provider": "openai",
|
668
|
+
"description": "Text-to-speech generation model",
|
669
|
+
"capabilities": ["text_to_speech"],
|
670
|
+
"service_type": "audio"
|
671
|
+
},
|
672
|
+
{
|
673
|
+
"model_id": "flux-schnell",
|
674
|
+
"model_type": "image",
|
675
|
+
"provider": "replicate",
|
676
|
+
"description": "Fast image generation model",
|
677
|
+
"capabilities": ["image_generation"],
|
678
|
+
"service_type": "image"
|
679
|
+
},
|
680
|
+
{
|
681
|
+
"model_id": "isa-llm-service",
|
682
|
+
"model_type": "text",
|
683
|
+
"provider": "isa",
|
684
|
+
"description": "ISA custom LLM service for trained models",
|
685
|
+
"capabilities": ["chat", "text_generation"],
|
686
|
+
"service_type": "text"
|
687
|
+
},
|
688
|
+
{
|
689
|
+
"model_id": "isa-omniparser-ui-detection",
|
690
|
+
"model_type": "vision",
|
691
|
+
"provider": "isa",
|
692
|
+
"description": "UI element detection and analysis",
|
693
|
+
"capabilities": ["ui_detection", "image_analysis"],
|
694
|
+
"service_type": "vision"
|
695
|
+
}
|
696
|
+
]
|
697
|
+
|
698
|
+
# Add custom models to search list
|
699
|
+
fallback_models.extend(custom_models_for_search)
|
700
|
+
|
701
|
+
# Apply search filters
|
702
|
+
query_lower = query.lower()
|
703
|
+
filtered_models = []
|
704
|
+
|
705
|
+
for model in fallback_models:
|
706
|
+
# Check if query matches
|
707
|
+
query_match = (
|
708
|
+
query_lower in model["model_id"].lower() or
|
709
|
+
query_lower in model["provider"].lower() or
|
710
|
+
query_lower in model["description"].lower() or
|
711
|
+
any(query_lower in cap.lower() for cap in model["capabilities"])
|
712
|
+
)
|
713
|
+
|
714
|
+
if not query_match:
|
715
|
+
continue
|
716
|
+
|
717
|
+
# Apply type filter
|
718
|
+
if model_type and model["model_type"] != model_type:
|
719
|
+
continue
|
720
|
+
|
721
|
+
# Apply provider filter
|
722
|
+
if provider and model["provider"] != provider:
|
723
|
+
continue
|
724
|
+
|
725
|
+
# Apply capabilities filter
|
726
|
+
if capabilities:
|
727
|
+
if not any(cap in model["capabilities"] for cap in capabilities):
|
728
|
+
continue
|
729
|
+
|
730
|
+
filtered_models.append({
|
731
|
+
"model_id": model["model_id"],
|
732
|
+
"model_type": model["model_type"],
|
733
|
+
"provider": model["provider"],
|
734
|
+
"description": model["description"],
|
735
|
+
"capabilities": model["capabilities"],
|
736
|
+
"updated_at": None
|
737
|
+
})
|
738
|
+
|
739
|
+
# Apply limit
|
740
|
+
limited_results = filtered_models[:limit]
|
741
|
+
|
742
|
+
return {
|
743
|
+
"success": True,
|
744
|
+
"query": query,
|
745
|
+
"filters": {
|
746
|
+
"model_type": model_type,
|
747
|
+
"provider": provider,
|
748
|
+
"capabilities": capabilities
|
749
|
+
},
|
750
|
+
"results": limited_results,
|
751
|
+
"total_results": len(limited_results),
|
752
|
+
"fallback": True,
|
753
|
+
"message": "Using fallback search - database search unavailable"
|
754
|
+
}
|
755
|
+
|
756
|
+
except Exception as e:
|
757
|
+
logger.error(f"Failed to search models: {e}")
|
758
|
+
raise HTTPException(status_code=500, detail=f"Failed to search models: {str(e)}")
|
759
|
+
|
760
|
+
@router.get("/models/providers")
|
761
|
+
async def get_model_providers(user = Depends(optional_auth)):
|
762
|
+
"""Get list of available model providers"""
|
763
|
+
try:
|
764
|
+
from ..cache_manager import cached, provider_list_cache_key
|
765
|
+
|
766
|
+
@cached(ttl=600.0, cache_key_func=lambda: provider_list_cache_key()) # 10 minutes cache
|
767
|
+
async def _get_providers():
|
768
|
+
try:
|
769
|
+
from isa_model.core.models.model_repo import ModelRepo
|
770
|
+
repo = ModelRepo()
|
771
|
+
return repo.get_providers_summary()
|
772
|
+
except Exception as e:
|
773
|
+
logger.warning(f"ModelRepo failed, using fallback: {e}")
|
774
|
+
# Fallback to basic provider list
|
775
|
+
return [
|
776
|
+
{
|
777
|
+
"provider": "openai",
|
778
|
+
"model_count": 4,
|
779
|
+
"model_types": ["text", "vision", "audio", "embedding"],
|
780
|
+
"capabilities": ["chat", "completion", "embedding", "vision", "audio"]
|
781
|
+
},
|
782
|
+
{
|
783
|
+
"provider": "isa",
|
784
|
+
"model_count": 3,
|
785
|
+
"model_types": ["text", "vision", "embedding"],
|
786
|
+
"capabilities": ["chat", "completion", "ui_detection", "ocr"]
|
787
|
+
},
|
788
|
+
{
|
789
|
+
"provider": "replicate",
|
790
|
+
"model_count": 2,
|
791
|
+
"model_types": ["image", "video"],
|
792
|
+
"capabilities": ["image_generation", "video_generation"]
|
793
|
+
}
|
794
|
+
]
|
795
|
+
|
796
|
+
providers = await _get_providers()
|
797
|
+
|
798
|
+
return {
|
799
|
+
"success": True,
|
800
|
+
"providers": providers,
|
801
|
+
"total_count": len(providers),
|
802
|
+
"cached": True
|
803
|
+
}
|
804
|
+
|
805
|
+
except Exception as e:
|
806
|
+
logger.error(f"Failed to get model providers: {e}")
|
807
|
+
raise HTTPException(status_code=500, detail=f"Failed to get model providers: {str(e)}")
|
808
|
+
|
809
|
+
@router.get("/models/custom")
|
810
|
+
async def get_custom_models(
|
811
|
+
model_type: Optional[str] = Query(None, description="Filter by model type"),
|
812
|
+
provider: Optional[str] = Query(None, description="Filter by provider"),
|
813
|
+
user = Depends(optional_auth)
|
814
|
+
):
|
815
|
+
"""Get list of custom trained models"""
|
816
|
+
try:
|
817
|
+
from ..cache_manager import cached, custom_models_cache_key
|
818
|
+
from isa_model.inference.services.custom_model_manager import get_custom_model_manager
|
819
|
+
|
820
|
+
@cached(ttl=300.0, cache_key_func=lambda mt=model_type, p=provider: custom_models_cache_key(mt, p)) # 5 minutes cache
|
821
|
+
async def _get_custom_models(model_type_param, provider_param):
|
822
|
+
custom_model_manager = get_custom_model_manager()
|
823
|
+
return custom_model_manager.list_models(model_type=model_type_param, provider=provider_param)
|
824
|
+
|
825
|
+
models = await _get_custom_models(model_type, provider)
|
826
|
+
|
827
|
+
# Convert to API format
|
828
|
+
api_models = []
|
829
|
+
for model in models:
|
830
|
+
api_model = {
|
831
|
+
"model_id": model.model_id,
|
832
|
+
"model_name": model.model_name,
|
833
|
+
"model_type": model.model_type,
|
834
|
+
"provider": model.provider,
|
835
|
+
"base_model": model.base_model,
|
836
|
+
"training_date": model.training_date,
|
837
|
+
"description": model.metadata.get("description", ""),
|
838
|
+
"capabilities": model.capabilities,
|
839
|
+
"custom": True
|
840
|
+
}
|
841
|
+
|
842
|
+
if model.performance_metrics:
|
843
|
+
api_model["performance_metrics"] = model.performance_metrics
|
844
|
+
|
845
|
+
if model.deployment_config:
|
846
|
+
api_model["deployment_status"] = "configured"
|
847
|
+
|
848
|
+
api_models.append(api_model)
|
849
|
+
|
850
|
+
return {
|
851
|
+
"success": True,
|
852
|
+
"custom_models": api_models,
|
853
|
+
"total_count": len(api_models),
|
854
|
+
"filters": {
|
855
|
+
"model_type": model_type,
|
856
|
+
"provider": provider
|
857
|
+
},
|
858
|
+
"stats": custom_model_manager.get_stats()
|
859
|
+
}
|
860
|
+
|
861
|
+
except Exception as e:
|
862
|
+
logger.error(f"Failed to get custom models: {e}")
|
863
|
+
return {
|
864
|
+
"success": False,
|
865
|
+
"error": str(e),
|
866
|
+
"custom_models": [],
|
867
|
+
"total_count": 0
|
868
|
+
}
|
869
|
+
|
870
|
+
@router.get("/models/capabilities")
|
871
|
+
async def get_model_capabilities(user = Depends(optional_auth)):
|
872
|
+
"""Get list of all available model capabilities"""
|
873
|
+
try:
|
874
|
+
from ..cache_manager import cached
|
875
|
+
|
876
|
+
@cached(ttl=3600.0, cache_key_func=lambda: "model_capabilities") # 1 hour cache (static data)
|
877
|
+
async def _get_capabilities():
|
878
|
+
from isa_model.core.models.model_repo import ModelCapability
|
879
|
+
|
880
|
+
return [
|
881
|
+
{
|
882
|
+
"capability": cap.value,
|
883
|
+
"description": cap.value.replace("_", " ").title()
|
884
|
+
}
|
885
|
+
for cap in ModelCapability
|
886
|
+
]
|
887
|
+
|
888
|
+
capabilities = await _get_capabilities()
|
889
|
+
|
890
|
+
return {
|
891
|
+
"success": True,
|
892
|
+
"capabilities": capabilities
|
893
|
+
}
|
894
|
+
|
895
|
+
except Exception as e:
|
896
|
+
logger.error(f"Failed to get model capabilities: {e}")
|
897
|
+
raise HTTPException(status_code=500, detail=f"Failed to get model capabilities: {str(e)}")
|
898
|
+
|
899
|
+
@router.get("/models/{model_id}")
|
900
|
+
async def get_model_details(model_id: str, user = Depends(optional_auth)):
|
901
|
+
"""Get detailed information about a specific model"""
|
902
|
+
try:
|
903
|
+
from ..cache_manager import cached
|
904
|
+
from isa_model.core.models.model_repo import ModelRepo
|
905
|
+
|
906
|
+
@cached(ttl=900.0, cache_key_func=lambda mid=model_id: f"model_details_{mid}") # 15 minutes cache
|
907
|
+
async def _get_model_details(model_id_param):
|
908
|
+
repo = ModelRepo()
|
909
|
+
return repo.get_model_by_id(model_id_param)
|
910
|
+
|
911
|
+
model = await _get_model_details(model_id)
|
912
|
+
|
913
|
+
if not model:
|
914
|
+
raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
|
915
|
+
|
916
|
+
return {
|
917
|
+
"success": True,
|
918
|
+
"model": {
|
919
|
+
"model_id": model.model_id,
|
920
|
+
"model_type": model.model_type,
|
921
|
+
"provider": model.provider,
|
922
|
+
"metadata": model.metadata,
|
923
|
+
"capabilities": model.capabilities,
|
924
|
+
"created_at": model.created_at.isoformat() if model.created_at else None,
|
925
|
+
"updated_at": model.updated_at.isoformat() if model.updated_at else None
|
926
|
+
}
|
927
|
+
}
|
928
|
+
|
929
|
+
except HTTPException:
|
930
|
+
raise
|
931
|
+
except Exception as e:
|
932
|
+
logger.error(f"Failed to get model details for {model_id}: {e}")
|
933
|
+
raise HTTPException(status_code=500, detail=f"Failed to get model details: {str(e)}")
|
934
|
+
|
935
|
+
@router.get("/models/{model_id}/versions")
|
936
|
+
async def get_model_versions(model_id: str, user = Depends(optional_auth)):
|
937
|
+
"""Get version history for a specific model"""
|
938
|
+
try:
|
939
|
+
from isa_model.core.models.model_version_manager import ModelVersionManager
|
940
|
+
|
941
|
+
version_manager = ModelVersionManager()
|
942
|
+
versions = version_manager.get_model_versions(model_id)
|
943
|
+
|
944
|
+
return {
|
945
|
+
"success": True,
|
946
|
+
"model_id": model_id,
|
947
|
+
"versions": [
|
948
|
+
{
|
949
|
+
"version": v.version,
|
950
|
+
"created_at": v.created_at.isoformat(),
|
951
|
+
"metadata": v.metadata,
|
952
|
+
"is_active": v.is_active
|
953
|
+
}
|
954
|
+
for v in versions
|
955
|
+
],
|
956
|
+
"total_versions": len(versions)
|
957
|
+
}
|
958
|
+
|
959
|
+
except Exception as e:
|
960
|
+
logger.error(f"Failed to get model versions for {model_id}: {e}")
|
961
|
+
raise HTTPException(status_code=500, detail=f"Failed to get model versions: {str(e)}")
|
962
|
+
|
963
|
+
@router.post("/models/{model_id}/versions")
|
964
|
+
async def create_model_version(
|
965
|
+
model_id: str,
|
966
|
+
version_data: Dict[str, Any],
|
967
|
+
user = Depends(require_write_access)
|
968
|
+
):
|
969
|
+
"""Create a new version for a model"""
|
970
|
+
try:
|
971
|
+
from isa_model.core.models.model_version_manager import ModelVersionManager
|
972
|
+
|
973
|
+
version_manager = ModelVersionManager()
|
974
|
+
new_version = version_manager.create_version(
|
975
|
+
model_id=model_id,
|
976
|
+
metadata=version_data.get("metadata", {}),
|
977
|
+
user_id=user.get("user_id") if user else None
|
978
|
+
)
|
979
|
+
|
980
|
+
return {
|
981
|
+
"success": True,
|
982
|
+
"message": f"New version created for model {model_id}",
|
983
|
+
"version": {
|
984
|
+
"version": new_version.version,
|
985
|
+
"created_at": new_version.created_at.isoformat(),
|
986
|
+
"metadata": new_version.metadata
|
987
|
+
}
|
988
|
+
}
|
989
|
+
|
990
|
+
except Exception as e:
|
991
|
+
logger.error(f"Failed to create model version for {model_id}: {e}")
|
992
|
+
raise HTTPException(status_code=500, detail=f"Failed to create model version: {str(e)}")
|
993
|
+
|
994
|
+
@router.get("/models/{model_id}/billing")
|
995
|
+
async def get_model_billing_info(
|
996
|
+
model_id: str,
|
997
|
+
start_date: Optional[str] = Query(None, description="Start date (ISO format)"),
|
998
|
+
end_date: Optional[str] = Query(None, description="End date (ISO format)"),
|
999
|
+
user = Depends(optional_auth)
|
1000
|
+
):
|
1001
|
+
"""Get billing information for a specific model"""
|
1002
|
+
try:
|
1003
|
+
from isa_model.core.models.model_billing_tracker import ModelBillingTracker
|
1004
|
+
from datetime import datetime, timedelta
|
1005
|
+
|
1006
|
+
# Parse dates
|
1007
|
+
if start_date:
|
1008
|
+
start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
|
1009
|
+
else:
|
1010
|
+
start_dt = datetime.now() - timedelta(days=30)
|
1011
|
+
|
1012
|
+
if end_date:
|
1013
|
+
end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
|
1014
|
+
else:
|
1015
|
+
end_dt = datetime.now()
|
1016
|
+
|
1017
|
+
billing_tracker = ModelBillingTracker()
|
1018
|
+
billing_info = billing_tracker.get_model_billing_summary(
|
1019
|
+
model_id=model_id,
|
1020
|
+
start_date=start_dt,
|
1021
|
+
end_date=end_dt
|
1022
|
+
)
|
1023
|
+
|
1024
|
+
return {
|
1025
|
+
"success": True,
|
1026
|
+
"model_id": model_id,
|
1027
|
+
"billing_period": {
|
1028
|
+
"start_date": start_dt.isoformat(),
|
1029
|
+
"end_date": end_dt.isoformat()
|
1030
|
+
},
|
1031
|
+
"billing_summary": billing_info
|
1032
|
+
}
|
1033
|
+
|
1034
|
+
except Exception as e:
|
1035
|
+
logger.error(f"Failed to get billing info for {model_id}: {e}")
|
1036
|
+
raise HTTPException(status_code=500, detail=f"Failed to get billing info: {str(e)}")
|
1037
|
+
|
1038
|
+
@router.put("/models/{model_id}/metadata")
|
1039
|
+
async def update_model_metadata(
|
1040
|
+
model_id: str,
|
1041
|
+
metadata_update: Dict[str, Any],
|
1042
|
+
user = Depends(require_write_access)
|
1043
|
+
):
|
1044
|
+
"""Update metadata for a specific model"""
|
1045
|
+
try:
|
1046
|
+
from isa_model.core.models.model_repo import ModelRepo
|
1047
|
+
|
1048
|
+
repo = ModelRepo()
|
1049
|
+
success = repo.update_model_metadata(
|
1050
|
+
model_id=model_id,
|
1051
|
+
metadata_updates=metadata_update,
|
1052
|
+
updated_by=user.get("user_id") if user else None
|
1053
|
+
)
|
1054
|
+
|
1055
|
+
if not success:
|
1056
|
+
raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
|
1057
|
+
|
1058
|
+
return {
|
1059
|
+
"success": True,
|
1060
|
+
"message": f"Metadata updated for model {model_id}",
|
1061
|
+
"updated_fields": list(metadata_update.keys())
|
1062
|
+
}
|
1063
|
+
|
1064
|
+
except HTTPException:
|
1065
|
+
raise
|
1066
|
+
except Exception as e:
|
1067
|
+
logger.error(f"Failed to update metadata for {model_id}: {e}")
|
1068
|
+
raise HTTPException(status_code=500, detail=f"Failed to update metadata: {str(e)}")
|
1069
|
+
|
1070
|
+
@router.get("/models/{model_id}/statistics")
|
1071
|
+
async def get_model_statistics(
|
1072
|
+
model_id: str,
|
1073
|
+
days: int = Query(30, ge=1, le=365, description="Number of days for statistics"),
|
1074
|
+
user = Depends(optional_auth)
|
1075
|
+
):
|
1076
|
+
"""Get usage statistics for a specific model"""
|
1077
|
+
try:
|
1078
|
+
from isa_model.core.models.model_statistics_tracker import ModelStatisticsTracker
|
1079
|
+
|
1080
|
+
stats_tracker = ModelStatisticsTracker()
|
1081
|
+
statistics = stats_tracker.get_model_statistics(
|
1082
|
+
model_id=model_id,
|
1083
|
+
days=days
|
1084
|
+
)
|
1085
|
+
|
1086
|
+
return {
|
1087
|
+
"success": True,
|
1088
|
+
"model_id": model_id,
|
1089
|
+
"period_days": days,
|
1090
|
+
"statistics": statistics
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
except Exception as e:
|
1094
|
+
logger.error(f"Failed to get statistics for {model_id}: {e}")
|
1095
|
+
raise HTTPException(status_code=500, detail=f"Failed to get model statistics: {str(e)}")
|