isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,582 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
ISA Model Client Integration for Evaluation Framework.
|
3
|
-
|
4
|
-
Provides interfaces between the evaluation framework and ISA Model services.
|
5
|
-
Supports all ISA services: LLM, Vision, Audio, Embedding, Image Generation.
|
6
|
-
"""
|
7
|
-
|
8
|
-
import asyncio
|
9
|
-
import logging
|
10
|
-
import time
|
11
|
-
from typing import Dict, List, Any, Optional, Union
|
12
|
-
from pathlib import Path
|
13
|
-
import base64
|
14
|
-
from io import BytesIO
|
15
|
-
from PIL import Image
|
16
|
-
|
17
|
-
try:
|
18
|
-
from ..client import ISAModelClient
|
19
|
-
ISA_CLIENT_AVAILABLE = True
|
20
|
-
except ImportError:
|
21
|
-
ISA_CLIENT_AVAILABLE = False
|
22
|
-
logging.warning("ISA Model Client not available. Using mock interface.")
|
23
|
-
|
24
|
-
logger = logging.getLogger(__name__)
|
25
|
-
|
26
|
-
|
27
|
-
class ISAModelInterface:
|
28
|
-
"""
|
29
|
-
Interface adapter for ISA Model services in evaluation framework.
|
30
|
-
|
31
|
-
Provides unified interfaces for:
|
32
|
-
- LLM services (OpenAI, Ollama, YYDS)
|
33
|
-
- Vision services (OCR, Table, UI, Document analysis)
|
34
|
-
- Audio services (STT, TTS, Emotion, Diarization)
|
35
|
-
- Embedding services (Text embedding, Reranking)
|
36
|
-
- Image generation services
|
37
|
-
"""
|
38
|
-
|
39
|
-
def __init__(self, service_config: Optional[Dict[str, Any]] = None):
|
40
|
-
"""
|
41
|
-
Initialize ISA Model interface.
|
42
|
-
|
43
|
-
Args:
|
44
|
-
service_config: Configuration for ISA services
|
45
|
-
"""
|
46
|
-
self.config = service_config or {}
|
47
|
-
|
48
|
-
if ISA_CLIENT_AVAILABLE:
|
49
|
-
self.client = ISAModelClient()
|
50
|
-
else:
|
51
|
-
self.client = None
|
52
|
-
logger.warning("ISA Model Client not available, using mock client")
|
53
|
-
|
54
|
-
# Performance tracking
|
55
|
-
self.request_count = 0
|
56
|
-
self.total_latency = 0.0
|
57
|
-
self.error_count = 0
|
58
|
-
|
59
|
-
async def llm_completion(self,
|
60
|
-
prompt: str,
|
61
|
-
model_name: str = "gpt-4.1-nano",
|
62
|
-
provider: str = "openai",
|
63
|
-
**kwargs) -> Dict[str, Any]:
|
64
|
-
"""
|
65
|
-
Generate text completion using ISA LLM services.
|
66
|
-
|
67
|
-
Args:
|
68
|
-
prompt: Input text prompt
|
69
|
-
model_name: Model name (e.g., gpt-4.1-nano, llama3.2:3b-instruct-fp16)
|
70
|
-
provider: Provider (openai, ollama, yyds)
|
71
|
-
**kwargs: Additional parameters
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
LLM completion result
|
75
|
-
"""
|
76
|
-
start_time = time.time()
|
77
|
-
self.request_count += 1
|
78
|
-
|
79
|
-
try:
|
80
|
-
if self.client:
|
81
|
-
# Use real ISA client
|
82
|
-
result = await self.client.invoke(
|
83
|
-
input_data=prompt,
|
84
|
-
task="generate",
|
85
|
-
service_type="text",
|
86
|
-
provider=provider,
|
87
|
-
model_name=model_name,
|
88
|
-
**kwargs
|
89
|
-
)
|
90
|
-
|
91
|
-
# Extract text from result
|
92
|
-
if isinstance(result, dict):
|
93
|
-
text = result.get("result", str(result))
|
94
|
-
else:
|
95
|
-
text = str(result)
|
96
|
-
|
97
|
-
completion_result = {
|
98
|
-
"text": text,
|
99
|
-
"model": model_name,
|
100
|
-
"provider": provider,
|
101
|
-
"latency": time.time() - start_time,
|
102
|
-
"tokens_used": self._estimate_tokens(prompt + text),
|
103
|
-
"cost_usd": self._estimate_cost(prompt + text, provider)
|
104
|
-
}
|
105
|
-
|
106
|
-
else:
|
107
|
-
# Mock response
|
108
|
-
completion_result = {
|
109
|
-
"text": f"Mock response for: {prompt[:50]}...",
|
110
|
-
"model": model_name,
|
111
|
-
"provider": provider,
|
112
|
-
"latency": 0.5,
|
113
|
-
"tokens_used": len(prompt.split()) + 10,
|
114
|
-
"cost_usd": 0.001
|
115
|
-
}
|
116
|
-
|
117
|
-
self.total_latency += completion_result["latency"]
|
118
|
-
return completion_result
|
119
|
-
|
120
|
-
except Exception as e:
|
121
|
-
self.error_count += 1
|
122
|
-
logger.error(f"LLM completion error: {e}")
|
123
|
-
raise
|
124
|
-
|
125
|
-
async def vision_analysis(self,
|
126
|
-
image: Union[str, bytes, Image.Image, Path],
|
127
|
-
prompt: str = "",
|
128
|
-
task_type: str = "ocr",
|
129
|
-
model_name: str = "gpt-4.1-mini",
|
130
|
-
**kwargs) -> Dict[str, Any]:
|
131
|
-
"""
|
132
|
-
Analyze image using ISA Vision services.
|
133
|
-
|
134
|
-
Args:
|
135
|
-
image: Image data (path, bytes, PIL Image, or base64)
|
136
|
-
prompt: Analysis prompt
|
137
|
-
task_type: Vision task (ocr, table, ui, document, caption)
|
138
|
-
model_name: Vision model name
|
139
|
-
**kwargs: Additional parameters
|
140
|
-
|
141
|
-
Returns:
|
142
|
-
Vision analysis result
|
143
|
-
"""
|
144
|
-
start_time = time.time()
|
145
|
-
self.request_count += 1
|
146
|
-
|
147
|
-
try:
|
148
|
-
# Convert image to format expected by ISA client
|
149
|
-
image_data = self._prepare_image_data(image)
|
150
|
-
|
151
|
-
if self.client:
|
152
|
-
# Map task types to ISA service calls
|
153
|
-
if task_type == "ocr":
|
154
|
-
result = await self.client.invoke(
|
155
|
-
input_data=image_data,
|
156
|
-
task="extract_text",
|
157
|
-
service_type="vision",
|
158
|
-
model_name="isa-surya-ocr-service",
|
159
|
-
**kwargs
|
160
|
-
)
|
161
|
-
elif task_type == "table":
|
162
|
-
result = await self.client.invoke(
|
163
|
-
input_data=image_data,
|
164
|
-
task="extract_table",
|
165
|
-
service_type="vision",
|
166
|
-
model_name="isa_vision_table",
|
167
|
-
**kwargs
|
168
|
-
)
|
169
|
-
elif task_type == "ui":
|
170
|
-
result = await self.client.invoke(
|
171
|
-
input_data=image_data,
|
172
|
-
task="detect_ui",
|
173
|
-
service_type="vision",
|
174
|
-
model_name="isa-omniparser-ui-detection",
|
175
|
-
**kwargs
|
176
|
-
)
|
177
|
-
else:
|
178
|
-
# Generic vision analysis
|
179
|
-
result = await self.client.invoke(
|
180
|
-
input_data={"image": image_data, "prompt": prompt},
|
181
|
-
task="analyze",
|
182
|
-
service_type="vision",
|
183
|
-
model_name=model_name,
|
184
|
-
**kwargs
|
185
|
-
)
|
186
|
-
|
187
|
-
# Extract text from result
|
188
|
-
if isinstance(result, dict):
|
189
|
-
text = result.get("result", result.get("text", str(result)))
|
190
|
-
else:
|
191
|
-
text = str(result)
|
192
|
-
|
193
|
-
vision_result = {
|
194
|
-
"text": text,
|
195
|
-
"task_type": task_type,
|
196
|
-
"model": model_name,
|
197
|
-
"latency": time.time() - start_time,
|
198
|
-
"cost_usd": self._estimate_vision_cost(task_type)
|
199
|
-
}
|
200
|
-
|
201
|
-
else:
|
202
|
-
# Mock response
|
203
|
-
vision_result = {
|
204
|
-
"text": f"Mock {task_type} result for image analysis",
|
205
|
-
"task_type": task_type,
|
206
|
-
"model": model_name,
|
207
|
-
"latency": 1.0,
|
208
|
-
"cost_usd": 0.01
|
209
|
-
}
|
210
|
-
|
211
|
-
self.total_latency += vision_result["latency"]
|
212
|
-
return vision_result
|
213
|
-
|
214
|
-
except Exception as e:
|
215
|
-
self.error_count += 1
|
216
|
-
logger.error(f"Vision analysis error: {e}")
|
217
|
-
raise
|
218
|
-
|
219
|
-
async def audio_processing(self,
|
220
|
-
audio: Union[str, bytes, Path],
|
221
|
-
task_type: str = "stt",
|
222
|
-
model_name: str = "whisper-1",
|
223
|
-
**kwargs) -> Dict[str, Any]:
|
224
|
-
"""
|
225
|
-
Process audio using ISA Audio services.
|
226
|
-
|
227
|
-
Args:
|
228
|
-
audio: Audio data (path, bytes)
|
229
|
-
task_type: Audio task (stt, tts, emotion, diarization)
|
230
|
-
model_name: Audio model name
|
231
|
-
**kwargs: Additional parameters
|
232
|
-
|
233
|
-
Returns:
|
234
|
-
Audio processing result
|
235
|
-
"""
|
236
|
-
start_time = time.time()
|
237
|
-
self.request_count += 1
|
238
|
-
|
239
|
-
try:
|
240
|
-
# Prepare audio data
|
241
|
-
audio_data = self._prepare_audio_data(audio)
|
242
|
-
|
243
|
-
if self.client:
|
244
|
-
if task_type == "stt":
|
245
|
-
result = await self.client.invoke(
|
246
|
-
input_data=audio_data,
|
247
|
-
task="transcribe",
|
248
|
-
service_type="audio",
|
249
|
-
model_name="isa_audio_sota_service" if "isa" in model_name else model_name,
|
250
|
-
**kwargs
|
251
|
-
)
|
252
|
-
elif task_type == "emotion":
|
253
|
-
result = await self.client.invoke(
|
254
|
-
input_data=audio_data,
|
255
|
-
task="detect_emotion",
|
256
|
-
service_type="audio",
|
257
|
-
model_name="isa_audio_sota_service",
|
258
|
-
**kwargs
|
259
|
-
)
|
260
|
-
elif task_type == "diarization":
|
261
|
-
result = await self.client.invoke(
|
262
|
-
input_data=audio_data,
|
263
|
-
task="diarize_speakers",
|
264
|
-
service_type="audio",
|
265
|
-
model_name="isa_audio_sota_service",
|
266
|
-
**kwargs
|
267
|
-
)
|
268
|
-
else:
|
269
|
-
# Generic audio processing
|
270
|
-
result = await self.client.invoke(
|
271
|
-
input_data=audio_data,
|
272
|
-
task=task_type,
|
273
|
-
service_type="audio",
|
274
|
-
model_name=model_name,
|
275
|
-
**kwargs
|
276
|
-
)
|
277
|
-
|
278
|
-
# Extract result
|
279
|
-
if isinstance(result, dict):
|
280
|
-
if task_type == "stt":
|
281
|
-
text = result.get("result", result.get("text", str(result)))
|
282
|
-
else:
|
283
|
-
text = result
|
284
|
-
else:
|
285
|
-
text = str(result)
|
286
|
-
|
287
|
-
audio_result = {
|
288
|
-
"result": text,
|
289
|
-
"task_type": task_type,
|
290
|
-
"model": model_name,
|
291
|
-
"latency": time.time() - start_time,
|
292
|
-
"cost_usd": self._estimate_audio_cost(task_type)
|
293
|
-
}
|
294
|
-
|
295
|
-
else:
|
296
|
-
# Mock response
|
297
|
-
audio_result = {
|
298
|
-
"result": f"Mock {task_type} result for audio processing",
|
299
|
-
"task_type": task_type,
|
300
|
-
"model": model_name,
|
301
|
-
"latency": 2.0,
|
302
|
-
"cost_usd": 0.005
|
303
|
-
}
|
304
|
-
|
305
|
-
self.total_latency += audio_result["latency"]
|
306
|
-
return audio_result
|
307
|
-
|
308
|
-
except Exception as e:
|
309
|
-
self.error_count += 1
|
310
|
-
logger.error(f"Audio processing error: {e}")
|
311
|
-
raise
|
312
|
-
|
313
|
-
async def embedding_generation(self,
|
314
|
-
text: str,
|
315
|
-
model_name: str = "text-embedding-3-small",
|
316
|
-
**kwargs) -> Dict[str, Any]:
|
317
|
-
"""
|
318
|
-
Generate embeddings using ISA Embedding services.
|
319
|
-
|
320
|
-
Args:
|
321
|
-
text: Input text
|
322
|
-
model_name: Embedding model name
|
323
|
-
**kwargs: Additional parameters
|
324
|
-
|
325
|
-
Returns:
|
326
|
-
Embedding result
|
327
|
-
"""
|
328
|
-
start_time = time.time()
|
329
|
-
self.request_count += 1
|
330
|
-
|
331
|
-
try:
|
332
|
-
if self.client:
|
333
|
-
result = await self.client.invoke(
|
334
|
-
input_data=text,
|
335
|
-
task="embed",
|
336
|
-
service_type="embedding",
|
337
|
-
model_name=model_name,
|
338
|
-
**kwargs
|
339
|
-
)
|
340
|
-
|
341
|
-
# Extract embedding vector
|
342
|
-
if isinstance(result, dict):
|
343
|
-
embedding = result.get("result", result.get("embedding", []))
|
344
|
-
else:
|
345
|
-
embedding = result if isinstance(result, list) else []
|
346
|
-
|
347
|
-
embedding_result = {
|
348
|
-
"embedding": embedding,
|
349
|
-
"model": model_name,
|
350
|
-
"dimension": len(embedding) if embedding else 0,
|
351
|
-
"latency": time.time() - start_time,
|
352
|
-
"cost_usd": self._estimate_embedding_cost(text)
|
353
|
-
}
|
354
|
-
|
355
|
-
else:
|
356
|
-
# Mock embedding (1536 dimensions like OpenAI)
|
357
|
-
import numpy as np
|
358
|
-
embedding = np.random.randn(1536).tolist()
|
359
|
-
|
360
|
-
embedding_result = {
|
361
|
-
"embedding": embedding,
|
362
|
-
"model": model_name,
|
363
|
-
"dimension": 1536,
|
364
|
-
"latency": 0.3,
|
365
|
-
"cost_usd": 0.0001
|
366
|
-
}
|
367
|
-
|
368
|
-
self.total_latency += embedding_result["latency"]
|
369
|
-
return embedding_result
|
370
|
-
|
371
|
-
except Exception as e:
|
372
|
-
self.error_count += 1
|
373
|
-
logger.error(f"Embedding generation error: {e}")
|
374
|
-
raise
|
375
|
-
|
376
|
-
async def reranking(self,
|
377
|
-
query: str,
|
378
|
-
documents: List[str],
|
379
|
-
model_name: str = "isa-jina-reranker-v2-service",
|
380
|
-
**kwargs) -> Dict[str, Any]:
|
381
|
-
"""
|
382
|
-
Rerank documents using ISA Reranking services.
|
383
|
-
|
384
|
-
Args:
|
385
|
-
query: Search query
|
386
|
-
documents: List of documents to rerank
|
387
|
-
model_name: Reranking model name
|
388
|
-
**kwargs: Additional parameters
|
389
|
-
|
390
|
-
Returns:
|
391
|
-
Reranking result
|
392
|
-
"""
|
393
|
-
start_time = time.time()
|
394
|
-
self.request_count += 1
|
395
|
-
|
396
|
-
try:
|
397
|
-
if self.client:
|
398
|
-
result = await self.client.invoke(
|
399
|
-
input_data={
|
400
|
-
"query": query,
|
401
|
-
"documents": documents
|
402
|
-
},
|
403
|
-
task="rerank",
|
404
|
-
service_type="embedding",
|
405
|
-
model_name=model_name,
|
406
|
-
**kwargs
|
407
|
-
)
|
408
|
-
|
409
|
-
# Extract reranked results
|
410
|
-
if isinstance(result, dict):
|
411
|
-
reranked = result.get("result", result.get("rankings", []))
|
412
|
-
else:
|
413
|
-
reranked = result if isinstance(result, list) else []
|
414
|
-
|
415
|
-
reranking_result = {
|
416
|
-
"rankings": reranked,
|
417
|
-
"model": model_name,
|
418
|
-
"query": query,
|
419
|
-
"num_documents": len(documents),
|
420
|
-
"latency": time.time() - start_time,
|
421
|
-
"cost_usd": self._estimate_reranking_cost(len(documents))
|
422
|
-
}
|
423
|
-
|
424
|
-
else:
|
425
|
-
# Mock reranking (random shuffle)
|
426
|
-
import random
|
427
|
-
indices = list(range(len(documents)))
|
428
|
-
random.shuffle(indices)
|
429
|
-
|
430
|
-
reranking_result = {
|
431
|
-
"rankings": [{"index": i, "score": random.random()} for i in indices],
|
432
|
-
"model": model_name,
|
433
|
-
"query": query,
|
434
|
-
"num_documents": len(documents),
|
435
|
-
"latency": 0.5,
|
436
|
-
"cost_usd": 0.001
|
437
|
-
}
|
438
|
-
|
439
|
-
self.total_latency += reranking_result["latency"]
|
440
|
-
return reranking_result
|
441
|
-
|
442
|
-
except Exception as e:
|
443
|
-
self.error_count += 1
|
444
|
-
logger.error(f"Reranking error: {e}")
|
445
|
-
raise
|
446
|
-
|
447
|
-
def _prepare_image_data(self, image: Union[str, bytes, Image.Image, Path]) -> str:
|
448
|
-
"""Convert image to base64 string for ISA client."""
|
449
|
-
try:
|
450
|
-
if isinstance(image, str):
|
451
|
-
if image.startswith("data:"):
|
452
|
-
return image # Already base64 data URL
|
453
|
-
elif Path(image).exists():
|
454
|
-
# File path
|
455
|
-
with open(image, "rb") as f:
|
456
|
-
image_bytes = f.read()
|
457
|
-
else:
|
458
|
-
# Assume base64 string
|
459
|
-
return f"data:image/jpeg;base64,{image}"
|
460
|
-
|
461
|
-
elif isinstance(image, bytes):
|
462
|
-
image_bytes = image
|
463
|
-
|
464
|
-
elif isinstance(image, Path):
|
465
|
-
with open(image, "rb") as f:
|
466
|
-
image_bytes = f.read()
|
467
|
-
|
468
|
-
elif isinstance(image, Image.Image):
|
469
|
-
buffer = BytesIO()
|
470
|
-
image.save(buffer, format="PNG")
|
471
|
-
image_bytes = buffer.getvalue()
|
472
|
-
|
473
|
-
else:
|
474
|
-
raise ValueError(f"Unsupported image type: {type(image)}")
|
475
|
-
|
476
|
-
# Convert to base64 data URL
|
477
|
-
base64_str = base64.b64encode(image_bytes).decode()
|
478
|
-
return f"data:image/jpeg;base64,{base64_str}"
|
479
|
-
|
480
|
-
except Exception as e:
|
481
|
-
logger.error(f"Error preparing image data: {e}")
|
482
|
-
raise
|
483
|
-
|
484
|
-
def _prepare_audio_data(self, audio: Union[str, bytes, Path]) -> str:
|
485
|
-
"""Convert audio to format for ISA client."""
|
486
|
-
try:
|
487
|
-
if isinstance(audio, (str, Path)):
|
488
|
-
# Return file path for ISA client
|
489
|
-
return str(audio)
|
490
|
-
elif isinstance(audio, bytes):
|
491
|
-
# Save to temporary file
|
492
|
-
import tempfile
|
493
|
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
494
|
-
tmp_file.write(audio)
|
495
|
-
return tmp_file.name
|
496
|
-
else:
|
497
|
-
raise ValueError(f"Unsupported audio type: {type(audio)}")
|
498
|
-
|
499
|
-
except Exception as e:
|
500
|
-
logger.error(f"Error preparing audio data: {e}")
|
501
|
-
raise
|
502
|
-
|
503
|
-
def _estimate_tokens(self, text: str) -> int:
|
504
|
-
"""Estimate token count (rough approximation)."""
|
505
|
-
return len(text.split()) * 1.3 # Rough estimate
|
506
|
-
|
507
|
-
def _estimate_cost(self, text: str, provider: str) -> float:
|
508
|
-
"""Estimate API cost."""
|
509
|
-
tokens = self._estimate_tokens(text)
|
510
|
-
|
511
|
-
# Rough cost estimates (per 1k tokens)
|
512
|
-
cost_per_1k = {
|
513
|
-
"openai": 0.002, # GPT-4 turbo
|
514
|
-
"ollama": 0.0, # Local model
|
515
|
-
"yyds": 0.01 # Claude
|
516
|
-
}
|
517
|
-
|
518
|
-
return (tokens / 1000) * cost_per_1k.get(provider, 0.001)
|
519
|
-
|
520
|
-
def _estimate_vision_cost(self, task_type: str) -> float:
|
521
|
-
"""Estimate vision processing cost."""
|
522
|
-
costs = {
|
523
|
-
"ocr": 0.01,
|
524
|
-
"table": 0.02,
|
525
|
-
"ui": 0.015,
|
526
|
-
"document": 0.03,
|
527
|
-
"caption": 0.02
|
528
|
-
}
|
529
|
-
return costs.get(task_type, 0.01)
|
530
|
-
|
531
|
-
def _estimate_audio_cost(self, task_type: str) -> float:
|
532
|
-
"""Estimate audio processing cost."""
|
533
|
-
costs = {
|
534
|
-
"stt": 0.006, # Whisper pricing
|
535
|
-
"tts": 0.015,
|
536
|
-
"emotion": 0.01,
|
537
|
-
"diarization": 0.02
|
538
|
-
}
|
539
|
-
return costs.get(task_type, 0.01)
|
540
|
-
|
541
|
-
def _estimate_embedding_cost(self, text: str) -> float:
|
542
|
-
"""Estimate embedding cost."""
|
543
|
-
tokens = self._estimate_tokens(text)
|
544
|
-
return (tokens / 1000) * 0.0001 # text-embedding-3-small pricing
|
545
|
-
|
546
|
-
def _estimate_reranking_cost(self, num_docs: int) -> float:
|
547
|
-
"""Estimate reranking cost."""
|
548
|
-
return num_docs * 0.0001 # Rough estimate per document
|
549
|
-
|
550
|
-
def get_performance_stats(self) -> Dict[str, Any]:
|
551
|
-
"""Get performance statistics."""
|
552
|
-
avg_latency = self.total_latency / self.request_count if self.request_count > 0 else 0
|
553
|
-
|
554
|
-
return {
|
555
|
-
"total_requests": self.request_count,
|
556
|
-
"total_errors": self.error_count,
|
557
|
-
"error_rate": self.error_count / self.request_count if self.request_count > 0 else 0,
|
558
|
-
"avg_latency_seconds": avg_latency,
|
559
|
-
"total_latency_seconds": self.total_latency,
|
560
|
-
"success_rate": 1 - (self.error_count / self.request_count) if self.request_count > 0 else 0
|
561
|
-
}
|
562
|
-
|
563
|
-
|
564
|
-
# Convenience functions for creating service interfaces
|
565
|
-
def create_llm_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
|
566
|
-
"""Create LLM service interface."""
|
567
|
-
return ISAModelInterface(config)
|
568
|
-
|
569
|
-
|
570
|
-
def create_vision_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
|
571
|
-
"""Create Vision service interface."""
|
572
|
-
return ISAModelInterface(config)
|
573
|
-
|
574
|
-
|
575
|
-
def create_audio_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
|
576
|
-
"""Create Audio service interface."""
|
577
|
-
return ISAModelInterface(config)
|
578
|
-
|
579
|
-
|
580
|
-
def create_embedding_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
|
581
|
-
"""Create Embedding service interface."""
|
582
|
-
return ISAModelInterface(config)
|