isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,582 +0,0 @@
1
- """
2
- ISA Model Client Integration for Evaluation Framework.
3
-
4
- Provides interfaces between the evaluation framework and ISA Model services.
5
- Supports all ISA services: LLM, Vision, Audio, Embedding, Image Generation.
6
- """
7
-
8
- import asyncio
9
- import logging
10
- import time
11
- from typing import Dict, List, Any, Optional, Union
12
- from pathlib import Path
13
- import base64
14
- from io import BytesIO
15
- from PIL import Image
16
-
17
- try:
18
- from ..client import ISAModelClient
19
- ISA_CLIENT_AVAILABLE = True
20
- except ImportError:
21
- ISA_CLIENT_AVAILABLE = False
22
- logging.warning("ISA Model Client not available. Using mock interface.")
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- class ISAModelInterface:
28
- """
29
- Interface adapter for ISA Model services in evaluation framework.
30
-
31
- Provides unified interfaces for:
32
- - LLM services (OpenAI, Ollama, YYDS)
33
- - Vision services (OCR, Table, UI, Document analysis)
34
- - Audio services (STT, TTS, Emotion, Diarization)
35
- - Embedding services (Text embedding, Reranking)
36
- - Image generation services
37
- """
38
-
39
- def __init__(self, service_config: Optional[Dict[str, Any]] = None):
40
- """
41
- Initialize ISA Model interface.
42
-
43
- Args:
44
- service_config: Configuration for ISA services
45
- """
46
- self.config = service_config or {}
47
-
48
- if ISA_CLIENT_AVAILABLE:
49
- self.client = ISAModelClient()
50
- else:
51
- self.client = None
52
- logger.warning("ISA Model Client not available, using mock client")
53
-
54
- # Performance tracking
55
- self.request_count = 0
56
- self.total_latency = 0.0
57
- self.error_count = 0
58
-
59
- async def llm_completion(self,
60
- prompt: str,
61
- model_name: str = "gpt-4.1-nano",
62
- provider: str = "openai",
63
- **kwargs) -> Dict[str, Any]:
64
- """
65
- Generate text completion using ISA LLM services.
66
-
67
- Args:
68
- prompt: Input text prompt
69
- model_name: Model name (e.g., gpt-4.1-nano, llama3.2:3b-instruct-fp16)
70
- provider: Provider (openai, ollama, yyds)
71
- **kwargs: Additional parameters
72
-
73
- Returns:
74
- LLM completion result
75
- """
76
- start_time = time.time()
77
- self.request_count += 1
78
-
79
- try:
80
- if self.client:
81
- # Use real ISA client
82
- result = await self.client.invoke(
83
- input_data=prompt,
84
- task="generate",
85
- service_type="text",
86
- provider=provider,
87
- model_name=model_name,
88
- **kwargs
89
- )
90
-
91
- # Extract text from result
92
- if isinstance(result, dict):
93
- text = result.get("result", str(result))
94
- else:
95
- text = str(result)
96
-
97
- completion_result = {
98
- "text": text,
99
- "model": model_name,
100
- "provider": provider,
101
- "latency": time.time() - start_time,
102
- "tokens_used": self._estimate_tokens(prompt + text),
103
- "cost_usd": self._estimate_cost(prompt + text, provider)
104
- }
105
-
106
- else:
107
- # Mock response
108
- completion_result = {
109
- "text": f"Mock response for: {prompt[:50]}...",
110
- "model": model_name,
111
- "provider": provider,
112
- "latency": 0.5,
113
- "tokens_used": len(prompt.split()) + 10,
114
- "cost_usd": 0.001
115
- }
116
-
117
- self.total_latency += completion_result["latency"]
118
- return completion_result
119
-
120
- except Exception as e:
121
- self.error_count += 1
122
- logger.error(f"LLM completion error: {e}")
123
- raise
124
-
125
- async def vision_analysis(self,
126
- image: Union[str, bytes, Image.Image, Path],
127
- prompt: str = "",
128
- task_type: str = "ocr",
129
- model_name: str = "gpt-4.1-mini",
130
- **kwargs) -> Dict[str, Any]:
131
- """
132
- Analyze image using ISA Vision services.
133
-
134
- Args:
135
- image: Image data (path, bytes, PIL Image, or base64)
136
- prompt: Analysis prompt
137
- task_type: Vision task (ocr, table, ui, document, caption)
138
- model_name: Vision model name
139
- **kwargs: Additional parameters
140
-
141
- Returns:
142
- Vision analysis result
143
- """
144
- start_time = time.time()
145
- self.request_count += 1
146
-
147
- try:
148
- # Convert image to format expected by ISA client
149
- image_data = self._prepare_image_data(image)
150
-
151
- if self.client:
152
- # Map task types to ISA service calls
153
- if task_type == "ocr":
154
- result = await self.client.invoke(
155
- input_data=image_data,
156
- task="extract_text",
157
- service_type="vision",
158
- model_name="isa-surya-ocr-service",
159
- **kwargs
160
- )
161
- elif task_type == "table":
162
- result = await self.client.invoke(
163
- input_data=image_data,
164
- task="extract_table",
165
- service_type="vision",
166
- model_name="isa_vision_table",
167
- **kwargs
168
- )
169
- elif task_type == "ui":
170
- result = await self.client.invoke(
171
- input_data=image_data,
172
- task="detect_ui",
173
- service_type="vision",
174
- model_name="isa-omniparser-ui-detection",
175
- **kwargs
176
- )
177
- else:
178
- # Generic vision analysis
179
- result = await self.client.invoke(
180
- input_data={"image": image_data, "prompt": prompt},
181
- task="analyze",
182
- service_type="vision",
183
- model_name=model_name,
184
- **kwargs
185
- )
186
-
187
- # Extract text from result
188
- if isinstance(result, dict):
189
- text = result.get("result", result.get("text", str(result)))
190
- else:
191
- text = str(result)
192
-
193
- vision_result = {
194
- "text": text,
195
- "task_type": task_type,
196
- "model": model_name,
197
- "latency": time.time() - start_time,
198
- "cost_usd": self._estimate_vision_cost(task_type)
199
- }
200
-
201
- else:
202
- # Mock response
203
- vision_result = {
204
- "text": f"Mock {task_type} result for image analysis",
205
- "task_type": task_type,
206
- "model": model_name,
207
- "latency": 1.0,
208
- "cost_usd": 0.01
209
- }
210
-
211
- self.total_latency += vision_result["latency"]
212
- return vision_result
213
-
214
- except Exception as e:
215
- self.error_count += 1
216
- logger.error(f"Vision analysis error: {e}")
217
- raise
218
-
219
- async def audio_processing(self,
220
- audio: Union[str, bytes, Path],
221
- task_type: str = "stt",
222
- model_name: str = "whisper-1",
223
- **kwargs) -> Dict[str, Any]:
224
- """
225
- Process audio using ISA Audio services.
226
-
227
- Args:
228
- audio: Audio data (path, bytes)
229
- task_type: Audio task (stt, tts, emotion, diarization)
230
- model_name: Audio model name
231
- **kwargs: Additional parameters
232
-
233
- Returns:
234
- Audio processing result
235
- """
236
- start_time = time.time()
237
- self.request_count += 1
238
-
239
- try:
240
- # Prepare audio data
241
- audio_data = self._prepare_audio_data(audio)
242
-
243
- if self.client:
244
- if task_type == "stt":
245
- result = await self.client.invoke(
246
- input_data=audio_data,
247
- task="transcribe",
248
- service_type="audio",
249
- model_name="isa_audio_sota_service" if "isa" in model_name else model_name,
250
- **kwargs
251
- )
252
- elif task_type == "emotion":
253
- result = await self.client.invoke(
254
- input_data=audio_data,
255
- task="detect_emotion",
256
- service_type="audio",
257
- model_name="isa_audio_sota_service",
258
- **kwargs
259
- )
260
- elif task_type == "diarization":
261
- result = await self.client.invoke(
262
- input_data=audio_data,
263
- task="diarize_speakers",
264
- service_type="audio",
265
- model_name="isa_audio_sota_service",
266
- **kwargs
267
- )
268
- else:
269
- # Generic audio processing
270
- result = await self.client.invoke(
271
- input_data=audio_data,
272
- task=task_type,
273
- service_type="audio",
274
- model_name=model_name,
275
- **kwargs
276
- )
277
-
278
- # Extract result
279
- if isinstance(result, dict):
280
- if task_type == "stt":
281
- text = result.get("result", result.get("text", str(result)))
282
- else:
283
- text = result
284
- else:
285
- text = str(result)
286
-
287
- audio_result = {
288
- "result": text,
289
- "task_type": task_type,
290
- "model": model_name,
291
- "latency": time.time() - start_time,
292
- "cost_usd": self._estimate_audio_cost(task_type)
293
- }
294
-
295
- else:
296
- # Mock response
297
- audio_result = {
298
- "result": f"Mock {task_type} result for audio processing",
299
- "task_type": task_type,
300
- "model": model_name,
301
- "latency": 2.0,
302
- "cost_usd": 0.005
303
- }
304
-
305
- self.total_latency += audio_result["latency"]
306
- return audio_result
307
-
308
- except Exception as e:
309
- self.error_count += 1
310
- logger.error(f"Audio processing error: {e}")
311
- raise
312
-
313
- async def embedding_generation(self,
314
- text: str,
315
- model_name: str = "text-embedding-3-small",
316
- **kwargs) -> Dict[str, Any]:
317
- """
318
- Generate embeddings using ISA Embedding services.
319
-
320
- Args:
321
- text: Input text
322
- model_name: Embedding model name
323
- **kwargs: Additional parameters
324
-
325
- Returns:
326
- Embedding result
327
- """
328
- start_time = time.time()
329
- self.request_count += 1
330
-
331
- try:
332
- if self.client:
333
- result = await self.client.invoke(
334
- input_data=text,
335
- task="embed",
336
- service_type="embedding",
337
- model_name=model_name,
338
- **kwargs
339
- )
340
-
341
- # Extract embedding vector
342
- if isinstance(result, dict):
343
- embedding = result.get("result", result.get("embedding", []))
344
- else:
345
- embedding = result if isinstance(result, list) else []
346
-
347
- embedding_result = {
348
- "embedding": embedding,
349
- "model": model_name,
350
- "dimension": len(embedding) if embedding else 0,
351
- "latency": time.time() - start_time,
352
- "cost_usd": self._estimate_embedding_cost(text)
353
- }
354
-
355
- else:
356
- # Mock embedding (1536 dimensions like OpenAI)
357
- import numpy as np
358
- embedding = np.random.randn(1536).tolist()
359
-
360
- embedding_result = {
361
- "embedding": embedding,
362
- "model": model_name,
363
- "dimension": 1536,
364
- "latency": 0.3,
365
- "cost_usd": 0.0001
366
- }
367
-
368
- self.total_latency += embedding_result["latency"]
369
- return embedding_result
370
-
371
- except Exception as e:
372
- self.error_count += 1
373
- logger.error(f"Embedding generation error: {e}")
374
- raise
375
-
376
- async def reranking(self,
377
- query: str,
378
- documents: List[str],
379
- model_name: str = "isa-jina-reranker-v2-service",
380
- **kwargs) -> Dict[str, Any]:
381
- """
382
- Rerank documents using ISA Reranking services.
383
-
384
- Args:
385
- query: Search query
386
- documents: List of documents to rerank
387
- model_name: Reranking model name
388
- **kwargs: Additional parameters
389
-
390
- Returns:
391
- Reranking result
392
- """
393
- start_time = time.time()
394
- self.request_count += 1
395
-
396
- try:
397
- if self.client:
398
- result = await self.client.invoke(
399
- input_data={
400
- "query": query,
401
- "documents": documents
402
- },
403
- task="rerank",
404
- service_type="embedding",
405
- model_name=model_name,
406
- **kwargs
407
- )
408
-
409
- # Extract reranked results
410
- if isinstance(result, dict):
411
- reranked = result.get("result", result.get("rankings", []))
412
- else:
413
- reranked = result if isinstance(result, list) else []
414
-
415
- reranking_result = {
416
- "rankings": reranked,
417
- "model": model_name,
418
- "query": query,
419
- "num_documents": len(documents),
420
- "latency": time.time() - start_time,
421
- "cost_usd": self._estimate_reranking_cost(len(documents))
422
- }
423
-
424
- else:
425
- # Mock reranking (random shuffle)
426
- import random
427
- indices = list(range(len(documents)))
428
- random.shuffle(indices)
429
-
430
- reranking_result = {
431
- "rankings": [{"index": i, "score": random.random()} for i in indices],
432
- "model": model_name,
433
- "query": query,
434
- "num_documents": len(documents),
435
- "latency": 0.5,
436
- "cost_usd": 0.001
437
- }
438
-
439
- self.total_latency += reranking_result["latency"]
440
- return reranking_result
441
-
442
- except Exception as e:
443
- self.error_count += 1
444
- logger.error(f"Reranking error: {e}")
445
- raise
446
-
447
- def _prepare_image_data(self, image: Union[str, bytes, Image.Image, Path]) -> str:
448
- """Convert image to base64 string for ISA client."""
449
- try:
450
- if isinstance(image, str):
451
- if image.startswith("data:"):
452
- return image # Already base64 data URL
453
- elif Path(image).exists():
454
- # File path
455
- with open(image, "rb") as f:
456
- image_bytes = f.read()
457
- else:
458
- # Assume base64 string
459
- return f"data:image/jpeg;base64,{image}"
460
-
461
- elif isinstance(image, bytes):
462
- image_bytes = image
463
-
464
- elif isinstance(image, Path):
465
- with open(image, "rb") as f:
466
- image_bytes = f.read()
467
-
468
- elif isinstance(image, Image.Image):
469
- buffer = BytesIO()
470
- image.save(buffer, format="PNG")
471
- image_bytes = buffer.getvalue()
472
-
473
- else:
474
- raise ValueError(f"Unsupported image type: {type(image)}")
475
-
476
- # Convert to base64 data URL
477
- base64_str = base64.b64encode(image_bytes).decode()
478
- return f"data:image/jpeg;base64,{base64_str}"
479
-
480
- except Exception as e:
481
- logger.error(f"Error preparing image data: {e}")
482
- raise
483
-
484
- def _prepare_audio_data(self, audio: Union[str, bytes, Path]) -> str:
485
- """Convert audio to format for ISA client."""
486
- try:
487
- if isinstance(audio, (str, Path)):
488
- # Return file path for ISA client
489
- return str(audio)
490
- elif isinstance(audio, bytes):
491
- # Save to temporary file
492
- import tempfile
493
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
494
- tmp_file.write(audio)
495
- return tmp_file.name
496
- else:
497
- raise ValueError(f"Unsupported audio type: {type(audio)}")
498
-
499
- except Exception as e:
500
- logger.error(f"Error preparing audio data: {e}")
501
- raise
502
-
503
- def _estimate_tokens(self, text: str) -> int:
504
- """Estimate token count (rough approximation)."""
505
- return len(text.split()) * 1.3 # Rough estimate
506
-
507
- def _estimate_cost(self, text: str, provider: str) -> float:
508
- """Estimate API cost."""
509
- tokens = self._estimate_tokens(text)
510
-
511
- # Rough cost estimates (per 1k tokens)
512
- cost_per_1k = {
513
- "openai": 0.002, # GPT-4 turbo
514
- "ollama": 0.0, # Local model
515
- "yyds": 0.01 # Claude
516
- }
517
-
518
- return (tokens / 1000) * cost_per_1k.get(provider, 0.001)
519
-
520
- def _estimate_vision_cost(self, task_type: str) -> float:
521
- """Estimate vision processing cost."""
522
- costs = {
523
- "ocr": 0.01,
524
- "table": 0.02,
525
- "ui": 0.015,
526
- "document": 0.03,
527
- "caption": 0.02
528
- }
529
- return costs.get(task_type, 0.01)
530
-
531
- def _estimate_audio_cost(self, task_type: str) -> float:
532
- """Estimate audio processing cost."""
533
- costs = {
534
- "stt": 0.006, # Whisper pricing
535
- "tts": 0.015,
536
- "emotion": 0.01,
537
- "diarization": 0.02
538
- }
539
- return costs.get(task_type, 0.01)
540
-
541
- def _estimate_embedding_cost(self, text: str) -> float:
542
- """Estimate embedding cost."""
543
- tokens = self._estimate_tokens(text)
544
- return (tokens / 1000) * 0.0001 # text-embedding-3-small pricing
545
-
546
- def _estimate_reranking_cost(self, num_docs: int) -> float:
547
- """Estimate reranking cost."""
548
- return num_docs * 0.0001 # Rough estimate per document
549
-
550
- def get_performance_stats(self) -> Dict[str, Any]:
551
- """Get performance statistics."""
552
- avg_latency = self.total_latency / self.request_count if self.request_count > 0 else 0
553
-
554
- return {
555
- "total_requests": self.request_count,
556
- "total_errors": self.error_count,
557
- "error_rate": self.error_count / self.request_count if self.request_count > 0 else 0,
558
- "avg_latency_seconds": avg_latency,
559
- "total_latency_seconds": self.total_latency,
560
- "success_rate": 1 - (self.error_count / self.request_count) if self.request_count > 0 else 0
561
- }
562
-
563
-
564
- # Convenience functions for creating service interfaces
565
- def create_llm_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
566
- """Create LLM service interface."""
567
- return ISAModelInterface(config)
568
-
569
-
570
- def create_vision_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
571
- """Create Vision service interface."""
572
- return ISAModelInterface(config)
573
-
574
-
575
- def create_audio_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
576
- """Create Audio service interface."""
577
- return ISAModelInterface(config)
578
-
579
-
580
- def create_embedding_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
581
- """Create Embedding service interface."""
582
- return ISAModelInterface(config)