isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
isa_model/client.py
CHANGED
@@ -2,17 +2,84 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
"""
|
5
|
-
ISA Model Client - Unified
|
6
|
-
|
5
|
+
ISA Model Client - Unified AI Service Interface
|
6
|
+
===============================================
|
7
|
+
|
8
|
+
功能描述:
|
9
|
+
ISA Model平台的统一客户端接口,提供智能模型选择和简化的API调用
|
10
|
+
|
11
|
+
主要功能:
|
12
|
+
- 多模态AI服务统一接口:文本、视觉、音频、图像生成、嵌入向量
|
13
|
+
- 智能模型自动选择:基于任务类型和输入数据自动选择最适合的模型
|
14
|
+
- 流式响应支持:支持实时流式文本生成,提供更好的用户体验
|
15
|
+
- 远程/本地服务:支持本地服务调用和远程API调用两种模式
|
16
|
+
- 成本跟踪:自动计算和跟踪API调用成本
|
17
|
+
- 工具支持:支持LangChain工具集成,扩展模型能力
|
18
|
+
- 缓存机制:服务实例缓存,提高性能
|
19
|
+
|
20
|
+
输入接口:
|
21
|
+
- input_data: 多类型输入数据(文本、图像路径、音频文件、字节数据等)
|
22
|
+
- task: 任务类型(chat, analyze, generate_speech, transcribe等)
|
23
|
+
- service_type: 服务类型(text, vision, audio, image, embedding)
|
24
|
+
- model: 可选模型名称(如不指定则智能选择)
|
25
|
+
- provider: 可选提供商名称(openai, ollama, replicate等)
|
26
|
+
|
27
|
+
输出格式:
|
28
|
+
- 统一响应字典,包含result和metadata
|
29
|
+
- 流式响应:包含stream异步生成器
|
30
|
+
- 非流式响应:包含result结果数据
|
31
|
+
- metadata:包含模型信息、计费信息、选择原因等
|
32
|
+
|
33
|
+
核心依赖:
|
34
|
+
- isa_model.inference.ai_factory: AI服务工厂
|
35
|
+
- isa_model.core.services.intelligent_model_selector: 智能模型选择器
|
36
|
+
- aiohttp: HTTP客户端(远程API模式)
|
37
|
+
- asyncio: 异步编程支持
|
38
|
+
|
39
|
+
使用示例:
|
40
|
+
```python
|
41
|
+
# 创建客户端
|
42
|
+
client = ISAModelClient()
|
43
|
+
|
44
|
+
# 流式文本生成
|
45
|
+
result = await client.invoke("写一个故事", "chat", "text")
|
46
|
+
async for token in result["stream"]:
|
47
|
+
print(token, end="", flush=True)
|
48
|
+
|
49
|
+
# 图像分析
|
50
|
+
result = await client.invoke("image.jpg", "analyze", "vision")
|
51
|
+
print(result["result"])
|
52
|
+
|
53
|
+
# 语音合成
|
54
|
+
result = await client.invoke("Hello world", "generate_speech", "audio")
|
55
|
+
print(result["result"])
|
56
|
+
```
|
57
|
+
|
58
|
+
架构特点:
|
59
|
+
- 单例模式:确保配置一致性
|
60
|
+
- 异步支持:所有操作都是异步的
|
61
|
+
- 错误处理:统一的错误处理和响应格式
|
62
|
+
- 可扩展性:支持新的服务提供商和模型
|
63
|
+
|
64
|
+
优化建议:
|
65
|
+
1. 增加请求重试机制:处理网络不稳定情况
|
66
|
+
2. 添加请求限流:避免超出API限制
|
67
|
+
3. 优化缓存策略:支持LRU缓存和TTL过期
|
68
|
+
4. 增加监控指标:记录延迟、成功率等指标
|
69
|
+
5. 支持批处理:提高大量请求的处理效率
|
70
|
+
6. 添加配置验证:启动时验证API密钥和配置
|
7
71
|
"""
|
8
72
|
|
9
73
|
import logging
|
10
74
|
import asyncio
|
75
|
+
import time
|
76
|
+
import uuid
|
11
77
|
from typing import Any, Dict, Optional, List, Union
|
12
78
|
from pathlib import Path
|
13
|
-
import
|
79
|
+
from datetime import datetime, timezone
|
14
80
|
|
15
81
|
from isa_model.inference.ai_factory import AIFactory
|
82
|
+
from isa_model.core.logging import get_inference_logger, generate_request_id
|
16
83
|
|
17
84
|
try:
|
18
85
|
from isa_model.core.services.intelligent_model_selector import IntelligentModelSelector, get_model_selector
|
@@ -36,41 +103,104 @@ class ISAModelClient:
|
|
36
103
|
response = await client.invoke("audio.mp3", "transcribe", "audio")
|
37
104
|
"""
|
38
105
|
|
106
|
+
# Consolidated task mappings for all service types
|
107
|
+
TASK_MAPPINGS = {
|
108
|
+
"vision": {
|
109
|
+
# Core tasks (direct mapping)
|
110
|
+
"analyze": "analyze",
|
111
|
+
"describe": "describe",
|
112
|
+
"extract": "extract",
|
113
|
+
"detect": "detect",
|
114
|
+
"classify": "classify",
|
115
|
+
"compare": "compare",
|
116
|
+
|
117
|
+
# Common aliases (backward compatibility)
|
118
|
+
"analyze_image": "analyze",
|
119
|
+
"describe_image": "describe",
|
120
|
+
"extract_text": "extract",
|
121
|
+
"extract_table": "extract",
|
122
|
+
"detect_objects": "detect",
|
123
|
+
"detect_ui": "detect",
|
124
|
+
"detect_ui_elements": "detect",
|
125
|
+
"get_coordinates": "detect",
|
126
|
+
"ocr": "extract",
|
127
|
+
"ui_analysis": "analyze",
|
128
|
+
"navigation": "analyze"
|
129
|
+
},
|
130
|
+
"audio": {
|
131
|
+
"generate_speech": "synthesize",
|
132
|
+
"text_to_speech": "synthesize",
|
133
|
+
"tts": "synthesize",
|
134
|
+
"transcribe": "transcribe",
|
135
|
+
"speech_to_text": "transcribe",
|
136
|
+
"stt": "transcribe",
|
137
|
+
"translate": "translate",
|
138
|
+
"detect_language": "detect_language"
|
139
|
+
},
|
140
|
+
"text": {
|
141
|
+
"chat": "chat",
|
142
|
+
"generate": "generate",
|
143
|
+
"complete": "complete",
|
144
|
+
"translate": "translate",
|
145
|
+
"summarize": "summarize",
|
146
|
+
"analyze": "analyze",
|
147
|
+
"extract": "extract",
|
148
|
+
"classify": "classify"
|
149
|
+
},
|
150
|
+
"image": {
|
151
|
+
"generate_image": "generate",
|
152
|
+
"generate": "generate",
|
153
|
+
"img2img": "img2img",
|
154
|
+
"image_to_image": "img2img",
|
155
|
+
"generate_batch": "generate_batch"
|
156
|
+
},
|
157
|
+
"embedding": {
|
158
|
+
"create_embedding": "embed",
|
159
|
+
"embed": "embed",
|
160
|
+
"embed_batch": "embed_batch",
|
161
|
+
"chunk_and_embed": "chunk_and_embed",
|
162
|
+
"similarity": "similarity",
|
163
|
+
"find_similar": "find_similar",
|
164
|
+
"rerank": "rerank",
|
165
|
+
"rerank_documents": "rerank_documents",
|
166
|
+
"document_ranking": "document_ranking"
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
# Service type configuration
|
171
|
+
SUPPORTED_SERVICE_TYPES = {"vision", "audio", "text", "image", "embedding"}
|
172
|
+
|
39
173
|
def __init__(self,
|
40
174
|
config: Optional[Dict[str, Any]] = None,
|
41
|
-
|
42
|
-
api_url: Optional[str] = None,
|
175
|
+
service_endpoint: Optional[str] = None,
|
43
176
|
api_key: Optional[str] = None):
|
44
177
|
"""Initialize ISA Model Client
|
45
178
|
|
46
179
|
Args:
|
47
180
|
config: Optional configuration override
|
48
|
-
|
49
|
-
|
50
|
-
api_key: API key for authentication (optional)
|
181
|
+
service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
|
182
|
+
api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
|
51
183
|
"""
|
52
184
|
self.config = config or {}
|
53
|
-
self.
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
if self.
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
self.headers = {
|
63
|
-
"Content-Type": "application/json",
|
64
|
-
"User-Agent": "ISA-Model-Client/1.0.0"
|
65
|
-
}
|
66
|
-
if self.api_key:
|
67
|
-
self.headers["Authorization"] = f"Bearer {self.api_key}"
|
185
|
+
self.service_endpoint = service_endpoint
|
186
|
+
|
187
|
+
# Handle API key authentication
|
188
|
+
import os
|
189
|
+
self.api_key = api_key or os.getenv("ISA_API_KEY")
|
190
|
+
if self.api_key:
|
191
|
+
logger.info("API key provided for authentication")
|
192
|
+
else:
|
193
|
+
logger.debug("No API key provided - using anonymous access")
|
68
194
|
|
69
|
-
# Initialize AI Factory for
|
70
|
-
if self.
|
195
|
+
# Initialize AI Factory for direct service access (when service_endpoint is None)
|
196
|
+
if not self.service_endpoint:
|
71
197
|
self.ai_factory = AIFactory.get_instance()
|
72
198
|
else:
|
73
199
|
self.ai_factory = None
|
200
|
+
logger.info(f"Using remote service endpoint: {self.service_endpoint}")
|
201
|
+
|
202
|
+
# HTTP client for remote API calls
|
203
|
+
self._http_session = None
|
74
204
|
|
75
205
|
# Initialize intelligent model selector
|
76
206
|
self.model_selector = None
|
@@ -87,169 +217,474 @@ class ISAModelClient:
|
|
87
217
|
# Cache for frequently used services
|
88
218
|
self._service_cache: Dict[str, Any] = {}
|
89
219
|
|
220
|
+
# Initialize inference logger
|
221
|
+
self.inference_logger = get_inference_logger()
|
222
|
+
|
90
223
|
logger.info("ISA Model Client initialized")
|
91
224
|
|
92
|
-
async def
|
225
|
+
async def _get_http_session(self):
|
226
|
+
"""Get or create HTTP session for remote API calls"""
|
227
|
+
if self._http_session is None:
|
228
|
+
import aiohttp
|
229
|
+
headers = {}
|
230
|
+
|
231
|
+
# Add API key authentication if available
|
232
|
+
if self.api_key:
|
233
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
234
|
+
headers["X-API-Key"] = self.api_key
|
235
|
+
|
236
|
+
self._http_session = aiohttp.ClientSession(headers=headers)
|
237
|
+
|
238
|
+
return self._http_session
|
239
|
+
|
240
|
+
async def _make_api_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
241
|
+
"""Make HTTP request to remote API endpoint"""
|
242
|
+
if not self.service_endpoint:
|
243
|
+
raise ValueError("Service endpoint not configured for remote API calls")
|
244
|
+
|
245
|
+
session = await self._get_http_session()
|
246
|
+
url = f"{self.service_endpoint.rstrip('/')}/{endpoint.lstrip('/')}"
|
247
|
+
|
248
|
+
try:
|
249
|
+
async with session.post(url, json=data) as response:
|
250
|
+
if response.status == 401:
|
251
|
+
raise Exception("Authentication required or invalid API key")
|
252
|
+
elif response.status == 403:
|
253
|
+
raise Exception("Insufficient permissions")
|
254
|
+
elif not response.ok:
|
255
|
+
error_detail = await response.text()
|
256
|
+
raise Exception(f"API request failed ({response.status}): {error_detail}")
|
257
|
+
|
258
|
+
return await response.json()
|
259
|
+
|
260
|
+
except Exception as e:
|
261
|
+
logger.error(f"Remote API request failed: {e}")
|
262
|
+
raise
|
263
|
+
|
264
|
+
async def close(self):
|
265
|
+
"""Close HTTP session and cleanup resources"""
|
266
|
+
if self._http_session:
|
267
|
+
await self._http_session.close()
|
268
|
+
self._http_session = None
|
269
|
+
|
270
|
+
async def _invoke_remote_api(
|
93
271
|
self,
|
94
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
95
|
-
task: str,
|
272
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
273
|
+
task: str,
|
96
274
|
service_type: str,
|
97
|
-
|
98
|
-
|
275
|
+
model: Optional[str] = None,
|
276
|
+
provider: Optional[str] = None,
|
277
|
+
stream: Optional[bool] = None,
|
99
278
|
**kwargs
|
100
|
-
):
|
101
|
-
"""
|
102
|
-
Streaming invoke method that yields tokens in real-time
|
103
|
-
|
104
|
-
Args:
|
105
|
-
input_data: Input data (text for LLM streaming)
|
106
|
-
task: Task to perform
|
107
|
-
service_type: Type of service (only "text" supports streaming)
|
108
|
-
model_hint: Optional model preference
|
109
|
-
provider_hint: Optional provider preference
|
110
|
-
**kwargs: Additional parameters
|
111
|
-
|
112
|
-
Yields:
|
113
|
-
Individual tokens as they arrive from the model
|
114
|
-
|
115
|
-
Example:
|
116
|
-
async for token in client.stream("Hello world", "chat", "text"):
|
117
|
-
print(token, end="", flush=True)
|
118
|
-
"""
|
119
|
-
if service_type != "text":
|
120
|
-
raise ValueError("Streaming is only supported for text/LLM services")
|
121
|
-
|
279
|
+
) -> Dict[str, Any]:
|
280
|
+
"""Invoke remote API endpoint"""
|
122
281
|
try:
|
123
|
-
|
124
|
-
|
125
|
-
|
282
|
+
# Prepare request data for unified API
|
283
|
+
request_data = {
|
284
|
+
"task": task,
|
285
|
+
"service_type": service_type,
|
286
|
+
**kwargs
|
287
|
+
}
|
288
|
+
|
289
|
+
# Add model and provider if specified
|
290
|
+
if model:
|
291
|
+
request_data["model"] = model
|
292
|
+
if provider:
|
293
|
+
request_data["provider"] = provider
|
294
|
+
# For remote API, disable streaming to get JSON response
|
295
|
+
request_data["stream"] = False
|
296
|
+
|
297
|
+
# Handle different input data types
|
298
|
+
if isinstance(input_data, (str, Path)):
|
299
|
+
request_data["input_data"] = str(input_data)
|
300
|
+
elif isinstance(input_data, (dict, list)):
|
301
|
+
request_data["input_data"] = input_data
|
126
302
|
else:
|
127
|
-
|
128
|
-
|
303
|
+
# For binary data, convert to base64
|
304
|
+
import base64
|
305
|
+
if isinstance(input_data, bytes):
|
306
|
+
request_data["input_data"] = base64.b64encode(input_data).decode()
|
307
|
+
request_data["data_type"] = "base64"
|
308
|
+
else:
|
309
|
+
request_data["input_data"] = str(input_data)
|
310
|
+
|
311
|
+
# Make API request
|
312
|
+
response = await self._make_api_request("api/v1/invoke", request_data)
|
313
|
+
|
314
|
+
return response
|
315
|
+
|
129
316
|
except Exception as e:
|
130
|
-
logger.error(f"
|
131
|
-
|
132
|
-
|
317
|
+
logger.error(f"Remote API invocation failed: {e}")
|
318
|
+
return {
|
319
|
+
"success": False,
|
320
|
+
"error": str(e),
|
321
|
+
"metadata": {
|
322
|
+
"task": task,
|
323
|
+
"service_type": service_type,
|
324
|
+
"endpoint": "remote"
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
133
328
|
async def invoke(
|
134
329
|
self,
|
135
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
330
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
136
331
|
task: str,
|
137
332
|
service_type: str,
|
138
|
-
|
139
|
-
|
140
|
-
stream: bool =
|
141
|
-
|
333
|
+
model: Optional[str] = None,
|
334
|
+
provider: Optional[str] = None,
|
335
|
+
stream: Optional[bool] = None,
|
336
|
+
show_reasoning: Optional[bool] = False,
|
337
|
+
output_format: Optional[str] = None,
|
338
|
+
json_schema: Optional[Dict] = None,
|
339
|
+
repair_attempts: Optional[int] = 3,
|
142
340
|
**kwargs
|
143
|
-
) ->
|
341
|
+
) -> Dict[str, Any]:
|
144
342
|
"""
|
145
343
|
Unified invoke method with intelligent model selection
|
146
344
|
|
147
345
|
Args:
|
148
|
-
input_data: Input data (image path,
|
149
|
-
task: Task to perform (analyze_image, generate_speech, transcribe, etc.)
|
150
|
-
service_type: Type of service (vision, audio,
|
151
|
-
|
152
|
-
|
153
|
-
stream: Enable streaming for text
|
154
|
-
|
155
|
-
**kwargs: Additional task-specific parameters
|
346
|
+
input_data: Input data (str, LangChain messages, image path, audio, etc.)
|
347
|
+
task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
|
348
|
+
service_type: Type of service (text, vision, audio, image, embedding)
|
349
|
+
model: Model name (if None, uses intelligent selection)
|
350
|
+
provider: Provider name (if None, uses intelligent selection)
|
351
|
+
stream: Enable streaming for text tasks (default True for chat/generate tasks, supports tools)
|
352
|
+
show_reasoning: Show reasoning process for O4 models (uses Responses API)
|
353
|
+
**kwargs: Additional task-specific parameters (including tools for LangChain)
|
156
354
|
|
157
355
|
Returns:
|
158
|
-
|
159
|
-
|
356
|
+
Unified response dictionary with result and metadata
|
357
|
+
For streaming: result["stream"] contains async generator
|
358
|
+
For non-streaming: result["result"] contains the response
|
160
359
|
|
161
360
|
Examples:
|
162
|
-
#
|
163
|
-
await client.invoke("
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
await client.invoke("audio.mp3", "transcribe", "audio")
|
170
|
-
|
171
|
-
# Text tasks
|
172
|
-
await client.invoke("Translate this text", "translate", "text")
|
173
|
-
await client.invoke("What is AI?", "chat", "text")
|
361
|
+
# Text tasks with streaming (default for chat)
|
362
|
+
result = await client.invoke("Write a story", "chat", "text")
|
363
|
+
if "stream" in result:
|
364
|
+
async for chunk in result["stream"]:
|
365
|
+
print(chunk, end="", flush=True)
|
366
|
+
else:
|
367
|
+
print(result["result"])
|
174
368
|
|
175
|
-
#
|
176
|
-
|
177
|
-
|
369
|
+
# Text tasks with tools (also supports streaming)
|
370
|
+
result = await client.invoke("What's the weather?", "chat", "text", tools=[get_weather])
|
371
|
+
if "stream" in result:
|
372
|
+
async for chunk in result["stream"]:
|
373
|
+
print(chunk, end="", flush=True)
|
374
|
+
else:
|
375
|
+
print(result["result"])
|
178
376
|
|
179
|
-
#
|
180
|
-
await client.invoke("
|
377
|
+
# Vision tasks (always non-streaming)
|
378
|
+
result = await client.invoke("image.jpg", "analyze", "vision")
|
379
|
+
print(result["result"])
|
181
380
|
|
182
|
-
#
|
183
|
-
|
184
|
-
|
381
|
+
# Audio tasks
|
382
|
+
result = await client.invoke("Hello world", "generate_speech", "audio")
|
383
|
+
print(result["result"])
|
185
384
|
|
186
385
|
# Image generation
|
187
|
-
await client.invoke("A beautiful sunset", "generate_image", "image")
|
386
|
+
result = await client.invoke("A beautiful sunset", "generate_image", "image")
|
387
|
+
print(result["result"])
|
188
388
|
|
189
389
|
# Embedding
|
190
|
-
await client.invoke("Text to embed", "create_embedding", "embedding")
|
390
|
+
result = await client.invoke("Text to embed", "create_embedding", "embedding")
|
391
|
+
print(result["result"])
|
191
392
|
"""
|
192
393
|
try:
|
193
|
-
#
|
194
|
-
if
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
394
|
+
# If using remote service endpoint, make API call
|
395
|
+
if self.service_endpoint:
|
396
|
+
return await self._invoke_remote_api(
|
397
|
+
input_data=input_data,
|
398
|
+
task=task,
|
399
|
+
service_type=service_type,
|
400
|
+
model=model,
|
401
|
+
provider=provider,
|
402
|
+
stream=stream,
|
403
|
+
**kwargs
|
404
|
+
)
|
405
|
+
|
406
|
+
# Set default streaming for text tasks
|
407
|
+
if stream is None and service_type == "text":
|
408
|
+
if task in ["chat", "generate"]:
|
409
|
+
stream = True # Enable streaming for chat and generate tasks
|
208
410
|
else:
|
209
|
-
|
210
|
-
input_data=input_data,
|
211
|
-
task=task,
|
212
|
-
service_type=service_type,
|
213
|
-
model_hint=model_hint,
|
214
|
-
provider_hint=provider_hint,
|
215
|
-
tools=tools,
|
216
|
-
**kwargs
|
217
|
-
)
|
411
|
+
stream = False # Disable for other text tasks
|
218
412
|
|
219
|
-
#
|
220
|
-
if
|
221
|
-
return await self.
|
413
|
+
# If streaming is enabled for text tasks, return streaming response
|
414
|
+
if stream and service_type == "text":
|
415
|
+
return await self._invoke_service_streaming(
|
222
416
|
input_data=input_data,
|
223
417
|
task=task,
|
224
418
|
service_type=service_type,
|
225
|
-
model_hint=
|
226
|
-
provider_hint=
|
227
|
-
|
419
|
+
model_hint=model,
|
420
|
+
provider_hint=provider,
|
421
|
+
show_reasoning=show_reasoning, # Explicitly pass show_reasoning
|
422
|
+
output_format=output_format,
|
423
|
+
json_schema=json_schema,
|
424
|
+
repair_attempts=repair_attempts,
|
228
425
|
**kwargs
|
229
426
|
)
|
230
427
|
else:
|
231
|
-
|
428
|
+
# Use regular non-streaming service
|
429
|
+
return await self._invoke_service(
|
232
430
|
input_data=input_data,
|
233
431
|
task=task,
|
234
432
|
service_type=service_type,
|
235
|
-
model_hint=
|
236
|
-
provider_hint=
|
237
|
-
|
433
|
+
model_hint=model,
|
434
|
+
provider_hint=provider,
|
435
|
+
stream=False, # Force non-streaming
|
436
|
+
output_format=output_format,
|
437
|
+
json_schema=json_schema,
|
438
|
+
repair_attempts=repair_attempts,
|
238
439
|
**kwargs
|
239
440
|
)
|
240
441
|
|
241
442
|
except Exception as e:
|
242
|
-
|
243
|
-
|
244
|
-
"
|
245
|
-
"
|
246
|
-
"
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
443
|
+
return self._handle_error(e, {
|
444
|
+
"operation": "invoke",
|
445
|
+
"task": task,
|
446
|
+
"service_type": service_type,
|
447
|
+
"input_type": type(input_data).__name__
|
448
|
+
})
|
449
|
+
|
450
|
+
async def invoke_stream(
|
451
|
+
self,
|
452
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
453
|
+
task: str,
|
454
|
+
service_type: str,
|
455
|
+
model: Optional[str] = None,
|
456
|
+
provider: Optional[str] = None,
|
457
|
+
return_metadata: bool = False,
|
458
|
+
**kwargs
|
459
|
+
):
|
460
|
+
"""
|
461
|
+
Unified streaming invoke method - returns async generator for real-time token streaming
|
462
|
+
|
463
|
+
Args:
|
464
|
+
input_data: Input data (str, LangChain messages, image path, audio, etc.)
|
465
|
+
task: Task to perform (chat, analyze_image, generate_speech, transcribe, etc.)
|
466
|
+
service_type: Type of service (text, vision, audio, image, embedding)
|
467
|
+
model: Model name (if None, uses intelligent selection)
|
468
|
+
provider: Provider name (if None, uses intelligent selection)
|
469
|
+
return_metadata: If True, yields ('metadata', metadata_dict) as final item
|
470
|
+
**kwargs: Additional task-specific parameters (including tools for LangChain)
|
471
|
+
|
472
|
+
Returns:
|
473
|
+
For text services: AsyncGenerator[Union[str, Tuple[str, Dict]], None] - yields tokens as they arrive
|
474
|
+
- Normal items: token strings
|
475
|
+
- Final item (if return_metadata=True): ('metadata', metadata_dict) with billing info
|
476
|
+
For other services: Raises ValueError (streaming not supported)
|
477
|
+
|
478
|
+
Examples:
|
479
|
+
# Simple streaming
|
480
|
+
async for token in client.invoke_stream("Hello!", "chat", "text"):
|
481
|
+
print(token, end='', flush=True)
|
482
|
+
|
483
|
+
# Streaming with metadata
|
484
|
+
async for item in client.invoke_stream("Hello!", "chat", "text", return_metadata=True):
|
485
|
+
if isinstance(item, tuple) and item[0] == 'metadata':
|
486
|
+
print(f"\nBilling: {item[1]['billing']}")
|
487
|
+
else:
|
488
|
+
print(item, end='', flush=True)
|
489
|
+
"""
|
490
|
+
try:
|
491
|
+
# Only text services support streaming
|
492
|
+
if service_type != "text":
|
493
|
+
raise ValueError(f"Streaming not supported for service type: {service_type}")
|
494
|
+
|
495
|
+
# Tools are supported with streaming
|
496
|
+
|
497
|
+
# Step 1: Select best model for this task
|
498
|
+
selected_model = await self._select_model(
|
499
|
+
input_data=input_data,
|
500
|
+
task=task,
|
501
|
+
service_type=service_type,
|
502
|
+
model_hint=model,
|
503
|
+
provider_hint=provider
|
504
|
+
)
|
505
|
+
|
506
|
+
# Step 2: Get appropriate service
|
507
|
+
service, _ = await self._get_service(
|
508
|
+
service_type=service_type,
|
509
|
+
model_name=selected_model["model_id"],
|
510
|
+
provider=selected_model["provider"],
|
511
|
+
task=task,
|
512
|
+
use_cache=False # Don't cache for streaming to avoid state issues
|
513
|
+
)
|
514
|
+
|
515
|
+
# Step 3: Ensure service supports streaming
|
516
|
+
if not hasattr(service, 'astream'):
|
517
|
+
raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
|
518
|
+
|
519
|
+
# Step 4: Enable streaming on the service
|
520
|
+
if hasattr(service, 'streaming'):
|
521
|
+
service.streaming = True
|
522
|
+
|
523
|
+
# Step 5: Stream tokens and collect for billing
|
524
|
+
content_chunks = []
|
525
|
+
async for token in service.astream(input_data):
|
526
|
+
content_chunks.append(token)
|
527
|
+
# Only yield string tokens for streaming (filter out dict/objects)
|
528
|
+
if isinstance(token, str):
|
529
|
+
yield token
|
530
|
+
|
531
|
+
# Step 6: After streaming is complete, calculate billing info and optionally return metadata
|
532
|
+
try:
|
533
|
+
await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
|
534
|
+
|
535
|
+
# Get billing info (similar to _invoke_service)
|
536
|
+
billing_info = self._get_billing_info(service, selected_model["model_id"])
|
537
|
+
|
538
|
+
# Log billing info for tracking
|
539
|
+
logger.info(f"Streaming completed - Model: {selected_model['model_id']}, "
|
540
|
+
f"Tokens: {billing_info.get('total_tokens', 'N/A')}, "
|
541
|
+
f"Cost: ${billing_info.get('cost_usd', 0):.4f}")
|
542
|
+
|
543
|
+
# Return metadata if requested
|
544
|
+
if return_metadata:
|
545
|
+
metadata = {
|
546
|
+
"model_used": selected_model["model_id"],
|
547
|
+
"provider": selected_model["provider"],
|
548
|
+
"task": task,
|
549
|
+
"service_type": service_type,
|
550
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
551
|
+
"billing": billing_info,
|
552
|
+
"streaming": True,
|
553
|
+
"tokens_streamed": len(content_chunks),
|
554
|
+
"content_length": len("".join(str(chunk) if isinstance(chunk, str) else "" for chunk in content_chunks))
|
555
|
+
}
|
556
|
+
yield ('metadata', metadata)
|
557
|
+
|
558
|
+
except Exception as billing_error:
|
559
|
+
logger.warning(f"Failed to track billing for streaming: {billing_error}")
|
560
|
+
if return_metadata:
|
561
|
+
# Return fallback metadata even if billing fails
|
562
|
+
fallback_metadata = {
|
563
|
+
"model_used": selected_model["model_id"],
|
564
|
+
"provider": selected_model["provider"],
|
565
|
+
"task": task,
|
566
|
+
"service_type": service_type,
|
567
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
568
|
+
"billing": {
|
569
|
+
"cost_usd": 0.0,
|
570
|
+
"error": str(billing_error),
|
571
|
+
"currency": "USD"
|
572
|
+
},
|
573
|
+
"streaming": True,
|
574
|
+
"tokens_streamed": len(content_chunks),
|
575
|
+
"content_length": len("".join(str(chunk) if isinstance(chunk, str) else "" for chunk in content_chunks))
|
576
|
+
}
|
577
|
+
yield ('metadata', fallback_metadata)
|
578
|
+
|
579
|
+
except Exception as e:
|
580
|
+
logger.error(f"Streaming invoke failed: {e}")
|
581
|
+
raise
|
582
|
+
|
583
|
+
def _is_rate_limit_error(self, error: Exception) -> bool:
|
584
|
+
"""Check if an error is due to rate limiting"""
|
585
|
+
error_str = str(error).lower()
|
586
|
+
|
587
|
+
# Check for common rate limit indicators
|
588
|
+
rate_limit_indicators = [
|
589
|
+
'rate limit',
|
590
|
+
'rate_limit',
|
591
|
+
'ratelimit',
|
592
|
+
'too many requests',
|
593
|
+
'quota exceeded',
|
594
|
+
'limit exceeded',
|
595
|
+
'throttled',
|
596
|
+
'429'
|
597
|
+
]
|
598
|
+
|
599
|
+
return any(indicator in error_str for indicator in rate_limit_indicators)
|
600
|
+
|
601
|
+
async def _invoke_with_fallback(
|
602
|
+
self,
|
603
|
+
service_type: str,
|
604
|
+
task: str,
|
605
|
+
input_data: Any,
|
606
|
+
selected_model: Dict[str, Any],
|
607
|
+
**kwargs
|
608
|
+
) -> Any:
|
609
|
+
"""Invoke service with automatic fallback on rate limit"""
|
610
|
+
try:
|
611
|
+
# First attempt with selected model
|
612
|
+
return await self._invoke_service_direct(service_type, task, input_data, selected_model, **kwargs)
|
613
|
+
except Exception as e:
|
614
|
+
# Check if this is a rate limit error
|
615
|
+
if self._is_rate_limit_error(e):
|
616
|
+
logger.warning(f"Rate limit detected for {selected_model['provider']}: {e}")
|
617
|
+
|
618
|
+
# Try to get fallback model using intelligent model selector
|
619
|
+
if INTELLIGENT_SELECTOR_AVAILABLE and self.model_selector:
|
620
|
+
try:
|
621
|
+
fallback_selection = self.model_selector.get_rate_limit_fallback(
|
622
|
+
service_type,
|
623
|
+
selected_model['provider']
|
624
|
+
)
|
625
|
+
|
626
|
+
if fallback_selection.get('success') and fallback_selection.get('is_fallback'):
|
627
|
+
fallback_model = fallback_selection['selected_model']
|
628
|
+
logger.info(f"Switching to fallback: {fallback_model['provider']}/{fallback_model['model_id']}")
|
629
|
+
|
630
|
+
# Retry with fallback model
|
631
|
+
return await self._invoke_service_direct(service_type, task, input_data, fallback_model, **kwargs)
|
632
|
+
except Exception as fallback_error:
|
633
|
+
logger.error(f"Fallback also failed: {fallback_error}")
|
634
|
+
raise e # Raise original rate limit error
|
635
|
+
|
636
|
+
# Re-raise the original error if not rate limit or fallback failed
|
637
|
+
raise
|
252
638
|
|
639
|
+
async def _invoke_service_direct(
|
640
|
+
self,
|
641
|
+
service_type: str,
|
642
|
+
task: str,
|
643
|
+
input_data: Any,
|
644
|
+
model_config: Dict[str, Any],
|
645
|
+
**kwargs
|
646
|
+
) -> Any:
|
647
|
+
"""Direct service invocation without fallback logic"""
|
648
|
+
# Get appropriate service
|
649
|
+
factory = AIFactory.get_instance()
|
650
|
+
|
651
|
+
# Create service with the specified model
|
652
|
+
if service_type == "text":
|
653
|
+
service = factory.get_llm(model_config["model_id"], model_config["provider"])
|
654
|
+
elif service_type == "vision":
|
655
|
+
service = factory.get_vision(model_config["model_id"], model_config["provider"])
|
656
|
+
elif service_type == "audio":
|
657
|
+
service = factory.get_audio(model_config["model_id"], model_config["provider"])
|
658
|
+
elif service_type == "image":
|
659
|
+
service = factory.get_image(model_config["model_id"], model_config["provider"])
|
660
|
+
elif service_type == "embedding":
|
661
|
+
service = factory.get_embed(model_config["model_id"], model_config["provider"])
|
662
|
+
else:
|
663
|
+
raise ValueError(f"Unsupported service type: {service_type}")
|
664
|
+
|
665
|
+
# Invoke the service
|
666
|
+
if service_type == "text":
|
667
|
+
show_reasoning = kwargs.pop('show_reasoning', False)
|
668
|
+
|
669
|
+
# Check if service supports show_reasoning parameter (mainly OpenAI services)
|
670
|
+
if model_config["provider"] == "openai":
|
671
|
+
result = await service.invoke(
|
672
|
+
input_data=input_data,
|
673
|
+
task=task,
|
674
|
+
show_reasoning=show_reasoning,
|
675
|
+
**kwargs
|
676
|
+
)
|
677
|
+
else:
|
678
|
+
# For other providers like yyds, don't pass show_reasoning
|
679
|
+
result = await service.invoke(
|
680
|
+
input_data=input_data,
|
681
|
+
task=task,
|
682
|
+
**kwargs
|
683
|
+
)
|
684
|
+
return result
|
685
|
+
else:
|
686
|
+
return await service.invoke(input_data=input_data, task=task, **kwargs)
|
687
|
+
|
253
688
|
async def _select_model(
|
254
689
|
self,
|
255
690
|
input_data: Any,
|
@@ -268,8 +703,26 @@ class ISAModelClient:
|
|
268
703
|
"reason": "User specified"
|
269
704
|
}
|
270
705
|
|
706
|
+
# If model_hint provided but no provider_hint, handle special cases
|
707
|
+
if model_hint:
|
708
|
+
# Special handling for hybrid service
|
709
|
+
if model_hint == "hybrid":
|
710
|
+
return {
|
711
|
+
"model_id": model_hint,
|
712
|
+
"provider": "hybrid",
|
713
|
+
"reason": "Hybrid service requested"
|
714
|
+
}
|
715
|
+
# If only model_hint provided, use default provider for that service type
|
716
|
+
elif provider_hint is None:
|
717
|
+
default_provider = self._get_default_provider(service_type)
|
718
|
+
return {
|
719
|
+
"model_id": model_hint,
|
720
|
+
"provider": default_provider,
|
721
|
+
"reason": "Model specified with default provider"
|
722
|
+
}
|
723
|
+
|
271
724
|
# Use intelligent model selector if available
|
272
|
-
if INTELLIGENT_SELECTOR_AVAILABLE:
|
725
|
+
if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
|
273
726
|
try:
|
274
727
|
# Initialize model selector if not already done
|
275
728
|
if self.model_selector is None:
|
@@ -304,6 +757,17 @@ class ISAModelClient:
|
|
304
757
|
# Fallback to default model selection
|
305
758
|
return self._get_default_model(service_type, task, provider_hint)
|
306
759
|
|
760
|
+
def _get_default_provider(self, service_type: str) -> str:
|
761
|
+
"""Get default provider for service type"""
|
762
|
+
defaults = {
|
763
|
+
"vision": "openai",
|
764
|
+
"audio": "openai",
|
765
|
+
"text": "openai",
|
766
|
+
"image": "replicate",
|
767
|
+
"embedding": "openai"
|
768
|
+
}
|
769
|
+
return defaults.get(service_type, "openai")
|
770
|
+
|
307
771
|
def _get_default_model(
|
308
772
|
self,
|
309
773
|
service_type: str,
|
@@ -314,16 +778,17 @@ class ISAModelClient:
|
|
314
778
|
|
315
779
|
defaults = {
|
316
780
|
"vision": {
|
317
|
-
"model_id": "gpt-
|
781
|
+
"model_id": "gpt-4.1-nano",
|
318
782
|
"provider": "openai"
|
319
783
|
},
|
320
784
|
"audio": {
|
321
785
|
"tts": {"model_id": "tts-1", "provider": "openai"},
|
322
786
|
"stt": {"model_id": "whisper-1", "provider": "openai"},
|
787
|
+
"realtime": {"model_id": "gpt-4o-realtime-preview-2024-10-01", "provider": "openai"},
|
323
788
|
"default": {"model_id": "whisper-1", "provider": "openai"}
|
324
789
|
},
|
325
790
|
"text": {
|
326
|
-
"model_id": "gpt-4.1-
|
791
|
+
"model_id": "gpt-4.1-nano",
|
327
792
|
"provider": "openai"
|
328
793
|
},
|
329
794
|
"image": {
|
@@ -331,19 +796,33 @@ class ISAModelClient:
|
|
331
796
|
"provider": "replicate"
|
332
797
|
},
|
333
798
|
"embedding": {
|
334
|
-
"model_id": "text-embedding-3-small",
|
335
|
-
"provider": "
|
799
|
+
"embed": {"model_id": "text-embedding-3-small", "provider": "openai"},
|
800
|
+
"rerank": {"model_id": "isa-jina-reranker-v2-service", "provider": "isa"},
|
801
|
+
"default": {"model_id": "text-embedding-3-small", "provider": "openai"}
|
336
802
|
}
|
337
803
|
}
|
338
804
|
|
339
805
|
# Handle audio service type with task-specific models
|
340
806
|
if service_type == "audio":
|
341
|
-
|
807
|
+
# Realtime audio tasks
|
808
|
+
if any(realtime_task in task for realtime_task in ["realtime", "audio_chat", "text_chat", "create_session", "connect", "send_audio", "send_text", "listen"]):
|
809
|
+
default = defaults["audio"]["realtime"]
|
810
|
+
# Traditional TTS tasks
|
811
|
+
elif "speech" in task or "tts" in task or task in ["synthesize", "text_to_speech", "generate_speech"]:
|
342
812
|
default = defaults["audio"]["tts"]
|
343
|
-
|
813
|
+
# Traditional STT tasks
|
814
|
+
elif "transcribe" in task or "stt" in task or task in ["speech_to_text", "transcription"]:
|
344
815
|
default = defaults["audio"]["stt"]
|
345
816
|
else:
|
346
817
|
default = defaults["audio"]["default"]
|
818
|
+
# Handle embedding service type with task-specific models
|
819
|
+
elif service_type == "embedding":
|
820
|
+
if "rerank" in task:
|
821
|
+
default = defaults["embedding"]["rerank"]
|
822
|
+
elif "embed" in task:
|
823
|
+
default = defaults["embedding"]["embed"]
|
824
|
+
else:
|
825
|
+
default = defaults["embedding"]["default"]
|
347
826
|
else:
|
348
827
|
default = defaults.get(service_type, defaults["vision"])
|
349
828
|
|
@@ -363,59 +842,80 @@ class ISAModelClient:
|
|
363
842
|
model_name: str,
|
364
843
|
provider: str,
|
365
844
|
task: str,
|
366
|
-
|
367
|
-
) -> Any:
|
368
|
-
"""Get appropriate service instance"""
|
845
|
+
use_cache: bool = True
|
846
|
+
) -> tuple[Any, str]:
|
847
|
+
"""Get appropriate service instance and return actual model used"""
|
369
848
|
|
370
|
-
cache_key = f"{service_type}_{provider}_{model_name}"
|
849
|
+
cache_key = f"{service_type}_{provider}_{model_name}_{task}"
|
850
|
+
actual_model_used = model_name # Track the actual model used
|
371
851
|
|
372
|
-
# Check cache first
|
373
|
-
if cache_key in self._service_cache:
|
374
|
-
|
375
|
-
|
376
|
-
if tools and service_type == "text":
|
377
|
-
return service.bind_tools(tools)
|
378
|
-
return service
|
852
|
+
# Check cache first (if caching is enabled)
|
853
|
+
if use_cache and cache_key in self._service_cache:
|
854
|
+
cached_service, cached_model = self._service_cache[cache_key]
|
855
|
+
return cached_service, cached_model
|
379
856
|
|
380
857
|
try:
|
858
|
+
# Validate service type
|
859
|
+
self._validate_service_type(service_type)
|
860
|
+
|
381
861
|
# Route to appropriate AIFactory method
|
382
862
|
if service_type == "vision":
|
383
863
|
service = self.ai_factory.get_vision(model_name, provider)
|
384
|
-
|
864
|
+
actual_model_used = model_name
|
385
865
|
elif service_type == "audio":
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
866
|
+
# Realtime audio tasks
|
867
|
+
if any(realtime_task in task for realtime_task in ["realtime", "audio_chat", "text_chat", "create_session", "connect", "send_audio", "send_text", "listen"]):
|
868
|
+
# Use realtime model
|
869
|
+
realtime_model = "gpt-4o-realtime-preview-2024-10-01" if model_name == "tts-1" or model_name == "whisper-1" else model_name
|
870
|
+
service = self.ai_factory.get_realtime(realtime_model, provider)
|
871
|
+
actual_model_used = realtime_model
|
872
|
+
# Traditional TTS tasks
|
873
|
+
elif "speech" in task or "tts" in task or task in ["synthesize", "text_to_speech", "generate_speech"]:
|
874
|
+
# Use TTS model
|
875
|
+
tts_model = "tts-1" if model_name == "whisper-1" else model_name
|
876
|
+
service = self.ai_factory.get_tts(tts_model, provider)
|
877
|
+
actual_model_used = tts_model
|
878
|
+
# Traditional STT tasks
|
879
|
+
elif "transcribe" in task or "stt" in task or task in ["speech_to_text", "transcription"]:
|
880
|
+
# Use STT model
|
881
|
+
stt_model = "whisper-1" if model_name == "tts-1" else model_name
|
882
|
+
service = self.ai_factory.get_stt(stt_model, provider)
|
883
|
+
actual_model_used = stt_model
|
884
|
+
# Default to STT for backward compatibility
|
390
885
|
else:
|
391
|
-
#
|
392
|
-
|
393
|
-
|
886
|
+
# Use STT model by default
|
887
|
+
stt_model = "whisper-1" if model_name == "tts-1" else model_name
|
888
|
+
service = self.ai_factory.get_stt(stt_model, provider)
|
889
|
+
actual_model_used = stt_model
|
394
890
|
elif service_type == "text":
|
395
891
|
service = self.ai_factory.get_llm(model_name, provider)
|
396
|
-
|
892
|
+
actual_model_used = model_name
|
397
893
|
elif service_type == "image":
|
398
894
|
service = self.ai_factory.get_img("t2i", model_name, provider)
|
399
|
-
|
895
|
+
actual_model_used = model_name
|
400
896
|
elif service_type == "embedding":
|
401
897
|
service = self.ai_factory.get_embed(model_name, provider)
|
898
|
+
actual_model_used = model_name
|
402
899
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
self._service_cache[cache_key] = service
|
408
|
-
|
409
|
-
# If tools are needed, bind them to the service
|
410
|
-
if tools and service_type == "text":
|
411
|
-
return service.bind_tools(tools)
|
412
|
-
|
413
|
-
return service
|
900
|
+
# Cache the service and actual model (if caching is enabled)
|
901
|
+
if use_cache:
|
902
|
+
self._service_cache[cache_key] = (service, actual_model_used)
|
903
|
+
return service, actual_model_used
|
414
904
|
|
415
905
|
except Exception as e:
|
416
906
|
logger.error(f"Failed to get service {service_type}/{provider}/{model_name}: {e}")
|
417
907
|
raise
|
418
908
|
|
909
|
+
def _validate_service_type(self, service_type: str) -> None:
|
910
|
+
"""Validate service type is supported"""
|
911
|
+
if service_type not in self.SUPPORTED_SERVICE_TYPES:
|
912
|
+
raise ValueError(f"Unsupported service type: {service_type}")
|
913
|
+
|
914
|
+
def _map_task(self, task: str, service_type: str) -> str:
|
915
|
+
"""Map common task names to unified task names"""
|
916
|
+
task_mapping = self.TASK_MAPPINGS.get(service_type, {})
|
917
|
+
return task_mapping.get(task, task)
|
918
|
+
|
419
919
|
async def _execute_task(
|
420
920
|
self,
|
421
921
|
service: Any,
|
@@ -427,166 +927,119 @@ class ISAModelClient:
|
|
427
927
|
"""Execute the task using the appropriate service"""
|
428
928
|
|
429
929
|
try:
|
930
|
+
self._validate_service_type(service_type)
|
931
|
+
unified_task = self._map_task(task, service_type)
|
932
|
+
|
430
933
|
if service_type == "vision":
|
431
|
-
return await
|
934
|
+
return await service.invoke(
|
935
|
+
image=input_data,
|
936
|
+
task=unified_task,
|
937
|
+
**kwargs
|
938
|
+
)
|
432
939
|
|
433
940
|
elif service_type == "audio":
|
434
|
-
|
941
|
+
# Realtime audio tasks
|
942
|
+
if any(realtime_task in unified_task for realtime_task in ["realtime", "audio_chat", "text_chat", "create_session", "connect", "send_audio", "send_text", "listen"]):
|
943
|
+
# For realtime text_chat and audio_chat, pass text parameter
|
944
|
+
if unified_task in ["text_chat", "audio_chat"]:
|
945
|
+
if isinstance(input_data, str):
|
946
|
+
kwargs['text'] = input_data
|
947
|
+
elif isinstance(input_data, bytes):
|
948
|
+
kwargs['audio_data'] = input_data
|
949
|
+
return await service.invoke(
|
950
|
+
task=unified_task,
|
951
|
+
**kwargs
|
952
|
+
)
|
953
|
+
# Traditional TTS tasks
|
954
|
+
elif unified_task in ["synthesize", "text_to_speech", "tts", "generate_speech"]:
|
955
|
+
return await service.invoke(
|
956
|
+
text=input_data,
|
957
|
+
task=unified_task,
|
958
|
+
**kwargs
|
959
|
+
)
|
960
|
+
# Traditional STT tasks
|
961
|
+
else:
|
962
|
+
return await service.invoke(
|
963
|
+
audio_input=input_data,
|
964
|
+
task=unified_task,
|
965
|
+
**kwargs
|
966
|
+
)
|
435
967
|
|
436
968
|
elif service_type == "text":
|
437
|
-
|
969
|
+
# Extract show_reasoning from kwargs if present
|
970
|
+
show_reasoning = kwargs.pop('show_reasoning', False)
|
971
|
+
|
972
|
+
# Check if service provider supports show_reasoning
|
973
|
+
# Only OpenAI services support this parameter
|
974
|
+
if hasattr(service, 'provider_name') and service.provider_name == 'openai':
|
975
|
+
result = await service.invoke(
|
976
|
+
input_data=input_data,
|
977
|
+
task=unified_task,
|
978
|
+
show_reasoning=show_reasoning,
|
979
|
+
**kwargs
|
980
|
+
)
|
981
|
+
else:
|
982
|
+
# For other providers like yyds, don't pass show_reasoning
|
983
|
+
result = await service.invoke(
|
984
|
+
input_data=input_data,
|
985
|
+
task=unified_task,
|
986
|
+
**kwargs
|
987
|
+
)
|
988
|
+
|
989
|
+
logger.debug(f"Service result type: {type(result)}")
|
990
|
+
logger.debug(f"Service result: {result}")
|
991
|
+
|
992
|
+
# Check if this is a formatted result from invoke method
|
993
|
+
if isinstance(result, dict) and 'formatted' in result:
|
994
|
+
# This is a formatted result from the new invoke method
|
995
|
+
logger.debug(f"Returning formatted result: {result}")
|
996
|
+
return result
|
997
|
+
elif isinstance(result, dict) and 'message' in result:
|
998
|
+
# This is a traditional message result
|
999
|
+
message = result['message']
|
1000
|
+
logger.debug(f"Extracted message type: {type(message)}")
|
1001
|
+
logger.debug(f"Extracted message length: {len(str(message)) if message else 0}")
|
1002
|
+
|
1003
|
+
# Handle AIMessage objects from LangChain
|
1004
|
+
if hasattr(message, 'content'):
|
1005
|
+
# Check if there are tool_calls
|
1006
|
+
if hasattr(message, 'tool_calls') and message.tool_calls:
|
1007
|
+
logger.debug(f"AIMessage contains tool_calls: {len(message.tool_calls)}")
|
1008
|
+
# Return a dict with both content and tool_calls
|
1009
|
+
return {
|
1010
|
+
"content": message.content if message.content else "",
|
1011
|
+
"tool_calls": message.tool_calls
|
1012
|
+
}
|
1013
|
+
else:
|
1014
|
+
content = message.content
|
1015
|
+
logger.debug(f"Extracted content from AIMessage: {len(content) if content else 0} chars")
|
1016
|
+
return content
|
1017
|
+
else:
|
1018
|
+
# Direct string message
|
1019
|
+
logger.debug(f"Returning direct message: {len(str(message)) if message else 0} chars")
|
1020
|
+
return message
|
1021
|
+
else:
|
1022
|
+
logger.debug(f"Returning result directly: {result}")
|
1023
|
+
return result
|
438
1024
|
|
439
1025
|
elif service_type == "image":
|
440
|
-
return await
|
1026
|
+
return await service.invoke(
|
1027
|
+
prompt=input_data,
|
1028
|
+
task=unified_task,
|
1029
|
+
**kwargs
|
1030
|
+
)
|
441
1031
|
|
442
1032
|
elif service_type == "embedding":
|
443
|
-
return await
|
444
|
-
|
445
|
-
|
446
|
-
|
1033
|
+
return await service.invoke(
|
1034
|
+
input_data=input_data,
|
1035
|
+
task=unified_task,
|
1036
|
+
**kwargs
|
1037
|
+
)
|
447
1038
|
|
448
1039
|
except Exception as e:
|
449
1040
|
logger.error(f"Task execution failed: {e}")
|
450
1041
|
raise
|
451
1042
|
|
452
|
-
async def _execute_vision_task(self, service, input_data, task, **kwargs):
|
453
|
-
"""Execute vision-related tasks using unified invoke method"""
|
454
|
-
|
455
|
-
# Map common task names to unified task names
|
456
|
-
task_mapping = {
|
457
|
-
"analyze_image": "analyze_image",
|
458
|
-
"detect_ui_elements": "detect_ui",
|
459
|
-
"extract_table": "extract_table",
|
460
|
-
"extract_text": "extract_text",
|
461
|
-
"ocr": "extract_text",
|
462
|
-
"describe": "analyze_image"
|
463
|
-
}
|
464
|
-
|
465
|
-
unified_task = task_mapping.get(task, task)
|
466
|
-
|
467
|
-
# Use unified invoke method with proper parameters
|
468
|
-
return await service.invoke(
|
469
|
-
image=input_data,
|
470
|
-
task=unified_task,
|
471
|
-
**kwargs
|
472
|
-
)
|
473
|
-
|
474
|
-
async def _execute_audio_task(self, service, input_data, task, **kwargs):
|
475
|
-
"""Execute audio-related tasks using unified invoke method"""
|
476
|
-
|
477
|
-
# Map common task names to unified task names
|
478
|
-
task_mapping = {
|
479
|
-
"generate_speech": "synthesize",
|
480
|
-
"text_to_speech": "synthesize",
|
481
|
-
"tts": "synthesize",
|
482
|
-
"transcribe": "transcribe",
|
483
|
-
"speech_to_text": "transcribe",
|
484
|
-
"stt": "transcribe",
|
485
|
-
"translate": "translate",
|
486
|
-
"detect_language": "detect_language"
|
487
|
-
}
|
488
|
-
|
489
|
-
unified_task = task_mapping.get(task, task)
|
490
|
-
|
491
|
-
# Use unified invoke method with correct parameter name based on task type
|
492
|
-
if unified_task in ["synthesize", "text_to_speech", "tts"]:
|
493
|
-
# TTS services expect 'text' parameter
|
494
|
-
return await service.invoke(
|
495
|
-
text=input_data,
|
496
|
-
task=unified_task,
|
497
|
-
**kwargs
|
498
|
-
)
|
499
|
-
else:
|
500
|
-
# STT services expect 'audio_input' parameter
|
501
|
-
return await service.invoke(
|
502
|
-
audio_input=input_data,
|
503
|
-
task=unified_task,
|
504
|
-
**kwargs
|
505
|
-
)
|
506
|
-
|
507
|
-
async def _execute_text_task(self, service, input_data, task, **kwargs):
|
508
|
-
"""Execute text-related tasks using unified invoke method"""
|
509
|
-
|
510
|
-
# Map common task names to unified task names
|
511
|
-
task_mapping = {
|
512
|
-
"chat": "chat",
|
513
|
-
"generate": "generate",
|
514
|
-
"complete": "complete",
|
515
|
-
"translate": "translate",
|
516
|
-
"summarize": "summarize",
|
517
|
-
"analyze": "analyze",
|
518
|
-
"extract": "extract",
|
519
|
-
"classify": "classify"
|
520
|
-
}
|
521
|
-
|
522
|
-
unified_task = task_mapping.get(task, task)
|
523
|
-
|
524
|
-
# Use unified invoke method
|
525
|
-
result = await service.invoke(
|
526
|
-
input_data=input_data,
|
527
|
-
task=unified_task,
|
528
|
-
**kwargs
|
529
|
-
)
|
530
|
-
|
531
|
-
# Handle the new response format from LLM services
|
532
|
-
# LLM services now return {"message": ..., "success": ..., "metadata": ...}
|
533
|
-
if isinstance(result, dict) and "message" in result:
|
534
|
-
# Extract the message content (convert AIMessage to string)
|
535
|
-
message = result["message"]
|
536
|
-
if hasattr(message, 'content'):
|
537
|
-
# Handle langchain AIMessage objects
|
538
|
-
return message.content
|
539
|
-
elif isinstance(message, str):
|
540
|
-
return message
|
541
|
-
else:
|
542
|
-
# Fallback: convert to string
|
543
|
-
return str(message)
|
544
|
-
|
545
|
-
# Fallback for other service types or legacy format
|
546
|
-
return result
|
547
|
-
|
548
|
-
async def _execute_image_task(self, service, input_data, task, **kwargs):
|
549
|
-
"""Execute image generation tasks using unified invoke method"""
|
550
|
-
|
551
|
-
# Map common task names to unified task names
|
552
|
-
task_mapping = {
|
553
|
-
"generate_image": "generate",
|
554
|
-
"generate": "generate",
|
555
|
-
"img2img": "img2img",
|
556
|
-
"image_to_image": "img2img",
|
557
|
-
"generate_batch": "generate_batch"
|
558
|
-
}
|
559
|
-
|
560
|
-
unified_task = task_mapping.get(task, task)
|
561
|
-
|
562
|
-
# Use unified invoke method
|
563
|
-
return await service.invoke(
|
564
|
-
prompt=input_data,
|
565
|
-
task=unified_task,
|
566
|
-
**kwargs
|
567
|
-
)
|
568
|
-
|
569
|
-
async def _execute_embedding_task(self, service, input_data, task, **kwargs):
|
570
|
-
"""Execute embedding tasks using unified invoke method"""
|
571
|
-
|
572
|
-
# Map common task names to unified task names
|
573
|
-
task_mapping = {
|
574
|
-
"create_embedding": "embed",
|
575
|
-
"embed": "embed",
|
576
|
-
"embed_batch": "embed_batch",
|
577
|
-
"chunk_and_embed": "chunk_and_embed",
|
578
|
-
"similarity": "similarity",
|
579
|
-
"find_similar": "find_similar"
|
580
|
-
}
|
581
|
-
|
582
|
-
unified_task = task_mapping.get(task, task)
|
583
|
-
|
584
|
-
# Use unified invoke method
|
585
|
-
return await service.invoke(
|
586
|
-
input_data=input_data,
|
587
|
-
task=unified_task,
|
588
|
-
**kwargs
|
589
|
-
)
|
590
1043
|
|
591
1044
|
def clear_cache(self):
|
592
1045
|
"""Clear service cache"""
|
@@ -602,7 +1055,7 @@ class ISAModelClient:
|
|
602
1055
|
Returns:
|
603
1056
|
List of available models with metadata
|
604
1057
|
"""
|
605
|
-
if INTELLIGENT_SELECTOR_AVAILABLE:
|
1058
|
+
if INTELLIGENT_SELECTOR_AVAILABLE and get_model_selector:
|
606
1059
|
try:
|
607
1060
|
if self.model_selector is None:
|
608
1061
|
self.model_selector = await get_model_selector(self.config)
|
@@ -636,7 +1089,7 @@ class ISAModelClient:
|
|
636
1089
|
|
637
1090
|
for service_type, provider, model in test_services:
|
638
1091
|
try:
|
639
|
-
await self._get_service(service_type, model, provider, "test")
|
1092
|
+
service, _ = await self._get_service(service_type, model, provider, "test")
|
640
1093
|
health_status["services"][f"{service_type}_{provider}"] = "healthy"
|
641
1094
|
except Exception as e:
|
642
1095
|
health_status["services"][f"{service_type}_{provider}"] = f"error: {str(e)}"
|
@@ -649,17 +1102,35 @@ class ISAModelClient:
|
|
649
1102
|
"error": str(e)
|
650
1103
|
}
|
651
1104
|
|
652
|
-
|
1105
|
+
def _handle_error(self, e: Exception, context: Dict[str, Any]) -> Dict[str, Any]:
|
1106
|
+
"""Handle errors consistently across methods"""
|
1107
|
+
error_msg = f"Failed to {context.get('operation', 'execute')} {context.get('task', '')} on {context.get('service_type', '')}: {e}"
|
1108
|
+
logger.error(error_msg)
|
1109
|
+
return {
|
1110
|
+
"success": False,
|
1111
|
+
"error": str(e),
|
1112
|
+
"metadata": context
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
async def _invoke_service_streaming(
|
653
1116
|
self,
|
654
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
1117
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
655
1118
|
task: str,
|
656
1119
|
service_type: str,
|
657
1120
|
model_hint: Optional[str] = None,
|
658
1121
|
provider_hint: Optional[str] = None,
|
659
|
-
|
1122
|
+
output_format: Optional[str] = None,
|
1123
|
+
json_schema: Optional[Dict] = None,
|
1124
|
+
repair_attempts: Optional[int] = 3,
|
660
1125
|
**kwargs
|
661
1126
|
) -> Dict[str, Any]:
|
662
|
-
"""
|
1127
|
+
"""Service invoke that returns streaming response with async generator"""
|
1128
|
+
|
1129
|
+
# Generate unique request ID for logging
|
1130
|
+
request_id = generate_request_id()
|
1131
|
+
start_time = datetime.now(timezone.utc)
|
1132
|
+
execution_start_time = time.time()
|
1133
|
+
|
663
1134
|
try:
|
664
1135
|
# Step 1: Select best model for this task
|
665
1136
|
selected_model = await self._select_model(
|
@@ -671,310 +1142,421 @@ class ISAModelClient:
|
|
671
1142
|
)
|
672
1143
|
|
673
1144
|
# Step 2: Get appropriate service
|
674
|
-
service = await self._get_service(
|
1145
|
+
service, actual_model_used = await self._get_service(
|
675
1146
|
service_type=service_type,
|
676
1147
|
model_name=selected_model["model_id"],
|
677
1148
|
provider=selected_model["provider"],
|
678
1149
|
task=task,
|
679
|
-
|
1150
|
+
use_cache=False # Don't cache for streaming to avoid state issues
|
680
1151
|
)
|
1152
|
+
# Update selected model with actual model used
|
1153
|
+
selected_model["model_id"] = actual_model_used
|
681
1154
|
|
682
|
-
# Step 3:
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
1155
|
+
# Step 3: Handle tools for LLM services (bind tools if provided)
|
1156
|
+
tools = kwargs.pop("tools", None)
|
1157
|
+
if service_type == "text" and tools:
|
1158
|
+
service, _ = await self._get_service(
|
1159
|
+
service_type=service_type,
|
1160
|
+
model_name=selected_model["model_id"],
|
1161
|
+
provider=selected_model["provider"],
|
1162
|
+
task=task,
|
1163
|
+
use_cache=False
|
1164
|
+
)
|
1165
|
+
service = service.bind_tools(tools)
|
1166
|
+
|
1167
|
+
# Step 4: Ensure service supports streaming
|
1168
|
+
if not hasattr(service, 'astream'):
|
1169
|
+
raise ValueError(f"Service {selected_model['provider']}/{selected_model['model_id']} does not support streaming")
|
690
1170
|
|
691
|
-
# Step
|
1171
|
+
# Step 5: Enable streaming on the service
|
1172
|
+
if hasattr(service, 'streaming'):
|
1173
|
+
service.streaming = True
|
1174
|
+
|
1175
|
+
# Step 6: Create async generator wrapper that yields tokens
|
1176
|
+
async def stream_generator():
|
1177
|
+
# Pass show_reasoning parameter if available for LLM services
|
1178
|
+
if service_type == "text" and hasattr(service, 'astream'):
|
1179
|
+
show_reasoning = kwargs.get('show_reasoning', False)
|
1180
|
+
logger.debug(f"Stream generator: show_reasoning={show_reasoning}")
|
1181
|
+
# Only pass show_reasoning to OpenAI providers
|
1182
|
+
if 'show_reasoning' in kwargs and hasattr(service, 'provider_name') and service.provider_name == 'openai':
|
1183
|
+
async for token in service.astream(input_data, show_reasoning=show_reasoning):
|
1184
|
+
yield token
|
1185
|
+
else:
|
1186
|
+
async for token in service.astream(input_data):
|
1187
|
+
yield token
|
1188
|
+
else:
|
1189
|
+
async for token in service.astream(input_data):
|
1190
|
+
yield token
|
1191
|
+
|
1192
|
+
# Return response with stream generator and metadata
|
692
1193
|
return {
|
693
1194
|
"success": True,
|
694
|
-
"
|
1195
|
+
"stream": stream_generator(),
|
695
1196
|
"metadata": {
|
696
1197
|
"model_used": selected_model["model_id"],
|
697
1198
|
"provider": selected_model["provider"],
|
698
1199
|
"task": task,
|
699
1200
|
"service_type": service_type,
|
700
|
-
"selection_reason": selected_model.get("reason", "Default selection")
|
1201
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
1202
|
+
"streaming": True
|
701
1203
|
}
|
702
1204
|
}
|
703
1205
|
except Exception as e:
|
704
|
-
logger.error(f"
|
1206
|
+
logger.error(f"Streaming service invoke failed: {e}")
|
705
1207
|
raise
|
706
|
-
|
707
|
-
async def
|
1208
|
+
|
1209
|
+
async def _invoke_service(
|
708
1210
|
self,
|
709
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
1211
|
+
input_data: Union[str, bytes, Path, Dict[str, Any], List[Any]],
|
710
1212
|
task: str,
|
711
1213
|
service_type: str,
|
712
1214
|
model_hint: Optional[str] = None,
|
713
1215
|
provider_hint: Optional[str] = None,
|
1216
|
+
stream: Optional[bool] = None,
|
1217
|
+
output_format: Optional[str] = None,
|
1218
|
+
json_schema: Optional[Dict] = None,
|
1219
|
+
repair_attempts: Optional[int] = 3,
|
714
1220
|
**kwargs
|
715
1221
|
) -> Dict[str, Any]:
|
716
|
-
"""
|
1222
|
+
"""Direct service invoke - passes LangChain objects and tools directly to services"""
|
717
1223
|
|
718
|
-
#
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
1224
|
+
# Generate unique request ID for logging
|
1225
|
+
request_id = generate_request_id()
|
1226
|
+
start_time = datetime.now(timezone.utc)
|
1227
|
+
execution_start_time = time.time()
|
1228
|
+
|
1229
|
+
try:
|
1230
|
+
# Step 1: Select best model for this task
|
1231
|
+
selected_model = await self._select_model(
|
1232
|
+
input_data=input_data,
|
1233
|
+
task=task,
|
723
1234
|
service_type=service_type,
|
724
1235
|
model_hint=model_hint,
|
725
|
-
provider_hint=provider_hint
|
726
|
-
**kwargs
|
1236
|
+
provider_hint=provider_hint
|
727
1237
|
)
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
1238
|
+
|
1239
|
+
# Step 1.5: Log inference start
|
1240
|
+
self.inference_logger.log_inference_start(
|
1241
|
+
request_id=request_id,
|
1242
|
+
service_type=service_type,
|
733
1243
|
task=task,
|
1244
|
+
provider=selected_model["provider"],
|
1245
|
+
model_name=selected_model["model_id"],
|
1246
|
+
input_data=input_data if self.inference_logger.log_detailed_requests else None,
|
1247
|
+
is_streaming=stream or False,
|
1248
|
+
custom_metadata={
|
1249
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
1250
|
+
"has_tools": "tools" in kwargs
|
1251
|
+
}
|
1252
|
+
)
|
1253
|
+
|
1254
|
+
# Step 2: Get appropriate service
|
1255
|
+
service, actual_model_used = await self._get_service(
|
734
1256
|
service_type=service_type,
|
735
|
-
|
736
|
-
|
737
|
-
|
1257
|
+
model_name=selected_model["model_id"],
|
1258
|
+
provider=selected_model["provider"],
|
1259
|
+
task=task
|
738
1260
|
)
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
"
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
1261
|
+
# Update selected model with actual model used
|
1262
|
+
selected_model["model_id"] = actual_model_used
|
1263
|
+
|
1264
|
+
# Step 3: Handle tools for LLM services (bind tools if provided)
|
1265
|
+
tools = kwargs.pop("tools", None)
|
1266
|
+
if service_type == "text" and tools:
|
1267
|
+
service, _ = await self._get_service(
|
1268
|
+
service_type=service_type,
|
1269
|
+
model_name=selected_model["model_id"],
|
1270
|
+
provider=selected_model["provider"],
|
1271
|
+
task=task,
|
1272
|
+
use_cache=False
|
1273
|
+
)
|
1274
|
+
service = service.bind_tools(tools)
|
1275
|
+
# Note: streaming is still supported with tools
|
1276
|
+
|
1277
|
+
# Step 4: Set streaming for text services
|
1278
|
+
if service_type == "text" and stream is not None:
|
1279
|
+
if hasattr(service, 'streaming'):
|
1280
|
+
service.streaming = stream
|
1281
|
+
|
1282
|
+
# Step 5: Execute task with unified interface
|
1283
|
+
# Pass JSON formatting parameters to the service
|
1284
|
+
task_kwargs = kwargs.copy()
|
1285
|
+
if service_type == "text":
|
1286
|
+
if output_format:
|
1287
|
+
task_kwargs["output_format"] = output_format
|
1288
|
+
if json_schema:
|
1289
|
+
task_kwargs["json_schema"] = json_schema
|
1290
|
+
if repair_attempts is not None:
|
1291
|
+
task_kwargs["repair_attempts"] = repair_attempts
|
1292
|
+
|
1293
|
+
# Try to execute with rate limit detection
|
751
1294
|
try:
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
return await response.json()
|
760
|
-
else:
|
761
|
-
error_data = await response.text()
|
762
|
-
raise Exception(f"API error {response.status}: {error_data}")
|
763
|
-
|
1295
|
+
result = await self._execute_task(
|
1296
|
+
service=service,
|
1297
|
+
input_data=input_data,
|
1298
|
+
task=task,
|
1299
|
+
service_type=service_type,
|
1300
|
+
**task_kwargs
|
1301
|
+
)
|
764
1302
|
except Exception as e:
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
file_path: Path,
|
771
|
-
task: str,
|
772
|
-
service_type: str,
|
773
|
-
model_hint: Optional[str] = None,
|
774
|
-
provider_hint: Optional[str] = None,
|
775
|
-
**kwargs
|
776
|
-
) -> Dict[str, Any]:
|
777
|
-
"""API file upload"""
|
778
|
-
|
779
|
-
if not file_path.exists():
|
780
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
781
|
-
|
782
|
-
data = aiohttp.FormData()
|
783
|
-
data.add_field('task', task)
|
784
|
-
data.add_field('service_type', service_type)
|
785
|
-
|
786
|
-
if model_hint:
|
787
|
-
data.add_field('model_hint', model_hint)
|
788
|
-
if provider_hint:
|
789
|
-
data.add_field('provider_hint', provider_hint)
|
790
|
-
|
791
|
-
data.add_field('file',
|
792
|
-
open(file_path, 'rb'),
|
793
|
-
filename=file_path.name,
|
794
|
-
content_type='application/octet-stream')
|
795
|
-
|
796
|
-
headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
|
797
|
-
|
798
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
799
|
-
try:
|
800
|
-
async with session.post(
|
801
|
-
f"{self.api_url}/api/v1/invoke-file",
|
802
|
-
data=data,
|
803
|
-
headers=headers
|
804
|
-
) as response:
|
1303
|
+
# Check if this is a rate limit error and we can fallback
|
1304
|
+
if self._is_rate_limit_error(e) and service_type == "text":
|
1305
|
+
# Ensure model selector is initialized
|
1306
|
+
if not self.model_selector:
|
1307
|
+
self.model_selector = await get_model_selector(self.config)
|
805
1308
|
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
except Exception as e:
|
813
|
-
logger.error(f"API file upload failed: {e}")
|
814
|
-
raise
|
815
|
-
|
816
|
-
async def _invoke_api_binary(
|
817
|
-
self,
|
818
|
-
data: bytes,
|
819
|
-
task: str,
|
820
|
-
service_type: str,
|
821
|
-
model_hint: Optional[str] = None,
|
822
|
-
provider_hint: Optional[str] = None,
|
823
|
-
**kwargs
|
824
|
-
) -> Dict[str, Any]:
|
825
|
-
"""API binary upload"""
|
826
|
-
|
827
|
-
form_data = aiohttp.FormData()
|
828
|
-
form_data.add_field('task', task)
|
829
|
-
form_data.add_field('service_type', service_type)
|
830
|
-
|
831
|
-
if model_hint:
|
832
|
-
form_data.add_field('model_hint', model_hint)
|
833
|
-
if provider_hint:
|
834
|
-
form_data.add_field('provider_hint', provider_hint)
|
835
|
-
|
836
|
-
form_data.add_field('file',
|
837
|
-
data,
|
838
|
-
filename='data.bin',
|
839
|
-
content_type='application/octet-stream')
|
840
|
-
|
841
|
-
headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
|
842
|
-
|
843
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
844
|
-
try:
|
845
|
-
async with session.post(
|
846
|
-
f"{self.api_url}/api/v1/invoke-file",
|
847
|
-
data=form_data,
|
848
|
-
headers=headers
|
849
|
-
) as response:
|
1309
|
+
# Get fallback model selection
|
1310
|
+
fallback_selection = self.model_selector.get_rate_limit_fallback(
|
1311
|
+
service_type=service_type,
|
1312
|
+
original_provider=selected_model["provider"]
|
1313
|
+
)
|
850
1314
|
|
851
|
-
if
|
852
|
-
|
853
|
-
|
854
|
-
error_data = await response.text()
|
855
|
-
raise Exception(f"API error {response.status}: {error_data}")
|
1315
|
+
if fallback_selection.get('success'):
|
1316
|
+
fallback_model = fallback_selection.get('selected_model', {})
|
1317
|
+
logger.info(f"Rate limit hit, switching to fallback: {fallback_model}")
|
856
1318
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
task: str,
|
865
|
-
service_type: str,
|
866
|
-
model_hint: Optional[str] = None,
|
867
|
-
provider_hint: Optional[str] = None,
|
868
|
-
tools: Optional[List[Any]] = None,
|
869
|
-
**kwargs
|
870
|
-
):
|
871
|
-
"""Local streaming using AI Factory"""
|
872
|
-
# Step 1: Select best model for this task
|
873
|
-
selected_model = await self._select_model(
|
874
|
-
input_data=input_data,
|
875
|
-
task=task,
|
876
|
-
service_type=service_type,
|
877
|
-
model_hint=model_hint,
|
878
|
-
provider_hint=provider_hint
|
879
|
-
)
|
880
|
-
|
881
|
-
# Step 2: Get appropriate service
|
882
|
-
service = await self._get_service(
|
883
|
-
service_type=service_type,
|
884
|
-
model_name=selected_model["model_id"],
|
885
|
-
provider=selected_model["provider"],
|
886
|
-
task=task,
|
887
|
-
tools=tools
|
888
|
-
)
|
889
|
-
|
890
|
-
# Step 3: Yield tokens from the stream
|
891
|
-
async for token in service.astream(input_data):
|
892
|
-
yield token
|
893
|
-
|
894
|
-
async def _stream_api(
|
895
|
-
self,
|
896
|
-
input_data: Union[str, bytes, Path, Dict[str, Any]],
|
897
|
-
task: str,
|
898
|
-
service_type: str,
|
899
|
-
model_hint: Optional[str] = None,
|
900
|
-
provider_hint: Optional[str] = None,
|
901
|
-
**kwargs
|
902
|
-
):
|
903
|
-
"""API streaming using Server-Sent Events (SSE)"""
|
904
|
-
|
905
|
-
# Only support text streaming for now
|
906
|
-
if not isinstance(input_data, (str, dict)):
|
907
|
-
raise ValueError("API streaming only supports text input")
|
908
|
-
|
909
|
-
payload = {
|
910
|
-
"input_data": input_data,
|
911
|
-
"task": task,
|
912
|
-
"service_type": service_type,
|
913
|
-
"model_hint": model_hint,
|
914
|
-
"provider_hint": provider_hint,
|
915
|
-
"stream": True,
|
916
|
-
"parameters": kwargs
|
917
|
-
}
|
918
|
-
|
919
|
-
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
|
920
|
-
try:
|
921
|
-
async with session.post(
|
922
|
-
f"{self.api_url}/api/v1/stream",
|
923
|
-
json=payload,
|
924
|
-
headers=self.headers
|
925
|
-
) as response:
|
926
|
-
|
927
|
-
if response.status == 200:
|
928
|
-
# Parse SSE stream
|
929
|
-
async for line in response.content:
|
930
|
-
if line:
|
931
|
-
line_str = line.decode().strip()
|
932
|
-
if line_str.startswith("data: "):
|
933
|
-
try:
|
934
|
-
# Parse SSE data
|
935
|
-
import json
|
936
|
-
json_str = line_str[6:] # Remove "data: " prefix
|
937
|
-
data = json.loads(json_str)
|
938
|
-
|
939
|
-
if data.get("type") == "token" and "token" in data:
|
940
|
-
yield data["token"]
|
941
|
-
elif data.get("type") == "completion":
|
942
|
-
# End of stream
|
943
|
-
break
|
944
|
-
elif data.get("type") == "error":
|
945
|
-
raise Exception(f"Server error: {data.get('error')}")
|
946
|
-
|
947
|
-
except json.JSONDecodeError:
|
948
|
-
# Skip malformed lines
|
949
|
-
continue
|
950
|
-
else:
|
951
|
-
error_data = await response.text()
|
952
|
-
raise Exception(f"API streaming error {response.status}: {error_data}")
|
1319
|
+
# Get fallback service
|
1320
|
+
fallback_service, fallback_model_used = await self._get_service(
|
1321
|
+
service_type=service_type,
|
1322
|
+
model_name=fallback_model["model_id"],
|
1323
|
+
provider=fallback_model["provider"],
|
1324
|
+
task=task
|
1325
|
+
)
|
953
1326
|
|
954
|
-
|
955
|
-
|
956
|
-
|
1327
|
+
# Update selected model for metadata
|
1328
|
+
selected_model = fallback_model
|
1329
|
+
selected_model["model_id"] = fallback_model_used
|
1330
|
+
selected_model["reason"] = "Rate limit fallback"
|
1331
|
+
|
1332
|
+
# Retry with fallback service
|
1333
|
+
result = await self._execute_task(
|
1334
|
+
service=fallback_service,
|
1335
|
+
input_data=input_data,
|
1336
|
+
task=task,
|
1337
|
+
service_type=service_type,
|
1338
|
+
**task_kwargs
|
1339
|
+
)
|
1340
|
+
else:
|
1341
|
+
# No fallback available, re-raise original error
|
1342
|
+
raise
|
1343
|
+
else:
|
1344
|
+
# Not a rate limit error or no fallback, re-raise
|
1345
|
+
raise
|
1346
|
+
|
1347
|
+
# Step 6: Wait for billing tracking to complete, then get billing information
|
1348
|
+
await asyncio.sleep(0.01) # Small delay to ensure billing tracking completes
|
1349
|
+
billing_info = self._get_billing_info(service, selected_model["model_id"])
|
1350
|
+
|
1351
|
+
# Step 6.5: Calculate execution time and log completion
|
1352
|
+
execution_time_ms = int((time.time() - execution_start_time) * 1000)
|
1353
|
+
|
1354
|
+
# Log inference completion
|
1355
|
+
self.inference_logger.log_inference_complete(
|
1356
|
+
request_id=request_id,
|
1357
|
+
status="completed",
|
1358
|
+
execution_time_ms=execution_time_ms,
|
1359
|
+
input_tokens=billing_info.get("input_tokens"),
|
1360
|
+
output_tokens=billing_info.get("output_tokens"),
|
1361
|
+
estimated_cost_usd=billing_info.get("cost_usd"),
|
1362
|
+
output_data=result if self.inference_logger.log_detailed_requests else None,
|
1363
|
+
custom_metadata={
|
1364
|
+
"billing_operation": billing_info.get("operation"),
|
1365
|
+
"timestamp": billing_info.get("timestamp")
|
1366
|
+
}
|
1367
|
+
)
|
1368
|
+
|
1369
|
+
# Log detailed token usage if available
|
1370
|
+
if billing_info.get("input_tokens") and billing_info.get("output_tokens"):
|
1371
|
+
self.inference_logger.log_token_usage(
|
1372
|
+
request_id=request_id,
|
1373
|
+
provider=selected_model["provider"],
|
1374
|
+
model_name=selected_model["model_id"],
|
1375
|
+
prompt_tokens=billing_info.get("input_tokens"),
|
1376
|
+
completion_tokens=billing_info.get("output_tokens"),
|
1377
|
+
prompt_cost_usd=billing_info.get("cost_usd", 0) * 0.6 if billing_info.get("cost_usd") else None, # Rough estimate
|
1378
|
+
completion_cost_usd=billing_info.get("cost_usd", 0) * 0.4 if billing_info.get("cost_usd") else None
|
1379
|
+
)
|
1380
|
+
|
1381
|
+
# Handle formatting - check if result is already formatted
|
1382
|
+
formatted_result = result
|
1383
|
+
if service_type == "text" and output_format:
|
1384
|
+
# Check if result is already formatted by the service
|
1385
|
+
if isinstance(result, dict) and result.get("formatted"):
|
1386
|
+
# Result is already formatted by the service
|
1387
|
+
formatted_result = result.get("result", result)
|
1388
|
+
billing_info["formatting"] = {
|
1389
|
+
"output_format": output_format,
|
1390
|
+
"format_success": True,
|
1391
|
+
"format_method": "service_level",
|
1392
|
+
"format_errors": result.get("format_errors", []),
|
1393
|
+
"repaired": False,
|
1394
|
+
"pre_formatted": True
|
1395
|
+
}
|
1396
|
+
else:
|
1397
|
+
# Apply formatting at client level (fallback)
|
1398
|
+
try:
|
1399
|
+
service, _ = await self._get_service(
|
1400
|
+
service_type=service_type,
|
1401
|
+
model_name=selected_model["model_id"],
|
1402
|
+
provider=selected_model["provider"],
|
1403
|
+
task=task
|
1404
|
+
)
|
1405
|
+
if hasattr(service, 'format_structured_output'):
|
1406
|
+
formatting_result = service.format_structured_output(
|
1407
|
+
response=result,
|
1408
|
+
output_format=output_format,
|
1409
|
+
schema=json_schema,
|
1410
|
+
repair_attempts=repair_attempts or 3
|
1411
|
+
)
|
1412
|
+
# Update result and add formatting metadata
|
1413
|
+
if formatting_result.get("success") and formatting_result.get("data") is not None:
|
1414
|
+
# Extract the actual formatted data
|
1415
|
+
formatted_data = formatting_result["data"]
|
1416
|
+
|
1417
|
+
# For JSON output, ensure we return clean data
|
1418
|
+
if output_format == "json" and isinstance(formatted_data, dict):
|
1419
|
+
formatted_result = formatted_data
|
1420
|
+
else:
|
1421
|
+
formatted_result = formatted_data
|
1422
|
+
else:
|
1423
|
+
# Keep original result if formatting failed
|
1424
|
+
formatted_result = result
|
1425
|
+
|
1426
|
+
# Add formatting info to metadata
|
1427
|
+
billing_info["formatting"] = {
|
1428
|
+
"output_format": output_format,
|
1429
|
+
"format_success": formatting_result.get("success", False),
|
1430
|
+
"format_method": formatting_result.get("method"),
|
1431
|
+
"format_errors": formatting_result.get("errors", []),
|
1432
|
+
"repaired": formatting_result.get("repaired", False),
|
1433
|
+
"pre_formatted": False
|
1434
|
+
}
|
1435
|
+
|
1436
|
+
except Exception as format_error:
|
1437
|
+
logger.warning(f"Failed to apply output formatting: {format_error}")
|
1438
|
+
# Continue with unformatted result
|
1439
|
+
formatted_result = result
|
1440
|
+
billing_info["formatting"] = {
|
1441
|
+
"output_format": output_format,
|
1442
|
+
"format_success": False,
|
1443
|
+
"format_error": str(format_error)
|
1444
|
+
}
|
1445
|
+
|
1446
|
+
# Return unified response
|
1447
|
+
response = {
|
1448
|
+
"success": True,
|
1449
|
+
"result": formatted_result,
|
1450
|
+
"metadata": {
|
1451
|
+
"request_id": request_id, # Include request ID for tracking
|
1452
|
+
"model_used": selected_model["model_id"],
|
1453
|
+
"provider": selected_model["provider"],
|
1454
|
+
"task": task,
|
1455
|
+
"service_type": service_type,
|
1456
|
+
"selection_reason": selected_model.get("reason", "Default selection"),
|
1457
|
+
"execution_time_ms": execution_time_ms,
|
1458
|
+
"billing": billing_info
|
1459
|
+
}
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
return response
|
1463
|
+
except Exception as e:
|
1464
|
+
# Calculate execution time even for errors
|
1465
|
+
execution_time_ms = int((time.time() - execution_start_time) * 1000)
|
1466
|
+
|
1467
|
+
# Log inference error
|
1468
|
+
error_type = type(e).__name__
|
1469
|
+
error_message = str(e)
|
1470
|
+
|
1471
|
+
self.inference_logger.log_inference_complete(
|
1472
|
+
request_id=request_id,
|
1473
|
+
status="failed",
|
1474
|
+
execution_time_ms=execution_time_ms,
|
1475
|
+
error_message=error_message,
|
1476
|
+
error_code=error_type,
|
1477
|
+
custom_metadata={
|
1478
|
+
"error_location": "client._invoke_service"
|
1479
|
+
}
|
1480
|
+
)
|
1481
|
+
|
1482
|
+
# Also log to the error table
|
1483
|
+
self.inference_logger.log_error(
|
1484
|
+
request_id=request_id,
|
1485
|
+
error_type=error_type,
|
1486
|
+
error_message=error_message,
|
1487
|
+
provider=model_hint or "unknown",
|
1488
|
+
model_name=provider_hint or "unknown"
|
1489
|
+
)
|
1490
|
+
|
1491
|
+
logger.error(f"Service invoke failed: {e}")
|
1492
|
+
raise
|
1493
|
+
|
1494
|
+
def _get_billing_info(self, service: Any, model_id: str) -> Dict[str, Any]:
|
1495
|
+
"""Extract billing information from service after task execution"""
|
1496
|
+
try:
|
1497
|
+
# Check if service has model_manager with billing_tracker
|
1498
|
+
if hasattr(service, 'model_manager') and hasattr(service.model_manager, 'billing_tracker'):
|
1499
|
+
billing_tracker = service.model_manager.billing_tracker
|
1500
|
+
|
1501
|
+
# Get the latest usage record for this model
|
1502
|
+
model_records = [
|
1503
|
+
record for record in billing_tracker.usage_records
|
1504
|
+
if record.model_id == model_id
|
1505
|
+
]
|
1506
|
+
|
1507
|
+
if model_records:
|
1508
|
+
# Get the most recent record
|
1509
|
+
latest_record = max(model_records, key=lambda r: r.timestamp)
|
1510
|
+
|
1511
|
+
return {
|
1512
|
+
"cost_usd": latest_record.cost_usd,
|
1513
|
+
"input_tokens": latest_record.input_tokens,
|
1514
|
+
"output_tokens": latest_record.output_tokens,
|
1515
|
+
"total_tokens": latest_record.total_tokens,
|
1516
|
+
"operation": latest_record.operation,
|
1517
|
+
"timestamp": latest_record.timestamp,
|
1518
|
+
"currency": "USD"
|
1519
|
+
}
|
1520
|
+
|
1521
|
+
# Fallback: no billing info available
|
1522
|
+
return {
|
1523
|
+
"cost_usd": 0.0,
|
1524
|
+
"input_tokens": None,
|
1525
|
+
"output_tokens": None,
|
1526
|
+
"total_tokens": None,
|
1527
|
+
"operation": None,
|
1528
|
+
"timestamp": None,
|
1529
|
+
"currency": "USD",
|
1530
|
+
"note": "Billing information not available"
|
1531
|
+
}
|
1532
|
+
|
1533
|
+
except Exception as e:
|
1534
|
+
logger.warning(f"Failed to get billing info: {e}")
|
1535
|
+
return {
|
1536
|
+
"cost_usd": 0.0,
|
1537
|
+
"error": str(e),
|
1538
|
+
"currency": "USD"
|
1539
|
+
}
|
1540
|
+
|
957
1541
|
|
958
1542
|
|
959
1543
|
# Convenience function for quick access
|
960
1544
|
def create_client(
|
961
1545
|
config: Optional[Dict[str, Any]] = None,
|
962
|
-
|
963
|
-
api_url: Optional[str] = None,
|
1546
|
+
service_endpoint: Optional[str] = None,
|
964
1547
|
api_key: Optional[str] = None
|
965
1548
|
) -> ISAModelClient:
|
966
1549
|
"""Create ISA Model Client instance
|
967
1550
|
|
968
1551
|
Args:
|
969
1552
|
config: Optional configuration
|
970
|
-
|
971
|
-
|
972
|
-
api_key: API key for authentication (optional)
|
1553
|
+
service_endpoint: Optional service endpoint URL (if None, uses local AI Factory)
|
1554
|
+
api_key: Optional API key for authentication (can also be set via ISA_API_KEY env var)
|
973
1555
|
|
974
1556
|
Returns:
|
975
1557
|
ISAModelClient instance
|
976
1558
|
"""
|
977
|
-
return ISAModelClient(config=config,
|
1559
|
+
return ISAModelClient(config=config, service_endpoint=service_endpoint, api_key=api_key)
|
978
1560
|
|
979
1561
|
|
980
1562
|
# Export for easy import
|