isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,582 +0,0 @@
1
- """
2
- ISA Model Client Integration for Evaluation Framework.
3
-
4
- Provides interfaces between the evaluation framework and ISA Model services.
5
- Supports all ISA services: LLM, Vision, Audio, Embedding, Image Generation.
6
- """
7
-
8
- import asyncio
9
- import logging
10
- import time
11
- from typing import Dict, List, Any, Optional, Union
12
- from pathlib import Path
13
- import base64
14
- from io import BytesIO
15
- from PIL import Image
16
-
17
- try:
18
- from ..client import ISAModelClient
19
- ISA_CLIENT_AVAILABLE = True
20
- except ImportError:
21
- ISA_CLIENT_AVAILABLE = False
22
- logging.warning("ISA Model Client not available. Using mock interface.")
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- class ISAModelInterface:
28
- """
29
- Interface adapter for ISA Model services in evaluation framework.
30
-
31
- Provides unified interfaces for:
32
- - LLM services (OpenAI, Ollama, YYDS)
33
- - Vision services (OCR, Table, UI, Document analysis)
34
- - Audio services (STT, TTS, Emotion, Diarization)
35
- - Embedding services (Text embedding, Reranking)
36
- - Image generation services
37
- """
38
-
39
- def __init__(self, service_config: Optional[Dict[str, Any]] = None):
40
- """
41
- Initialize ISA Model interface.
42
-
43
- Args:
44
- service_config: Configuration for ISA services
45
- """
46
- self.config = service_config or {}
47
-
48
- if ISA_CLIENT_AVAILABLE:
49
- self.client = ISAModelClient()
50
- else:
51
- self.client = None
52
- logger.warning("ISA Model Client not available, using mock client")
53
-
54
- # Performance tracking
55
- self.request_count = 0
56
- self.total_latency = 0.0
57
- self.error_count = 0
58
-
59
- async def llm_completion(self,
60
- prompt: str,
61
- model_name: str = "gpt-4.1-nano",
62
- provider: str = "openai",
63
- **kwargs) -> Dict[str, Any]:
64
- """
65
- Generate text completion using ISA LLM services.
66
-
67
- Args:
68
- prompt: Input text prompt
69
- model_name: Model name (e.g., gpt-4.1-nano, llama3.2:3b-instruct-fp16)
70
- provider: Provider (openai, ollama, yyds)
71
- **kwargs: Additional parameters
72
-
73
- Returns:
74
- LLM completion result
75
- """
76
- start_time = time.time()
77
- self.request_count += 1
78
-
79
- try:
80
- if self.client:
81
- # Use real ISA client
82
- result = await self.client.invoke(
83
- input_data=prompt,
84
- task="generate",
85
- service_type="text",
86
- provider=provider,
87
- model_name=model_name,
88
- **kwargs
89
- )
90
-
91
- # Extract text from result
92
- if isinstance(result, dict):
93
- text = result.get("result", str(result))
94
- else:
95
- text = str(result)
96
-
97
- completion_result = {
98
- "text": text,
99
- "model": model_name,
100
- "provider": provider,
101
- "latency": time.time() - start_time,
102
- "tokens_used": self._estimate_tokens(prompt + text),
103
- "cost_usd": self._estimate_cost(prompt + text, provider)
104
- }
105
-
106
- else:
107
- # Mock response
108
- completion_result = {
109
- "text": f"Mock response for: {prompt[:50]}...",
110
- "model": model_name,
111
- "provider": provider,
112
- "latency": 0.5,
113
- "tokens_used": len(prompt.split()) + 10,
114
- "cost_usd": 0.001
115
- }
116
-
117
- self.total_latency += completion_result["latency"]
118
- return completion_result
119
-
120
- except Exception as e:
121
- self.error_count += 1
122
- logger.error(f"LLM completion error: {e}")
123
- raise
124
-
125
- async def vision_analysis(self,
126
- image: Union[str, bytes, Image.Image, Path],
127
- prompt: str = "",
128
- task_type: str = "ocr",
129
- model_name: str = "gpt-4.1-mini",
130
- **kwargs) -> Dict[str, Any]:
131
- """
132
- Analyze image using ISA Vision services.
133
-
134
- Args:
135
- image: Image data (path, bytes, PIL Image, or base64)
136
- prompt: Analysis prompt
137
- task_type: Vision task (ocr, table, ui, document, caption)
138
- model_name: Vision model name
139
- **kwargs: Additional parameters
140
-
141
- Returns:
142
- Vision analysis result
143
- """
144
- start_time = time.time()
145
- self.request_count += 1
146
-
147
- try:
148
- # Convert image to format expected by ISA client
149
- image_data = self._prepare_image_data(image)
150
-
151
- if self.client:
152
- # Map task types to ISA service calls
153
- if task_type == "ocr":
154
- result = await self.client.invoke(
155
- input_data=image_data,
156
- task="extract_text",
157
- service_type="vision",
158
- model_name="isa-surya-ocr-service",
159
- **kwargs
160
- )
161
- elif task_type == "table":
162
- result = await self.client.invoke(
163
- input_data=image_data,
164
- task="extract_table",
165
- service_type="vision",
166
- model_name="isa_vision_table",
167
- **kwargs
168
- )
169
- elif task_type == "ui":
170
- result = await self.client.invoke(
171
- input_data=image_data,
172
- task="detect_ui",
173
- service_type="vision",
174
- model_name="isa-omniparser-ui-detection",
175
- **kwargs
176
- )
177
- else:
178
- # Generic vision analysis
179
- result = await self.client.invoke(
180
- input_data={"image": image_data, "prompt": prompt},
181
- task="analyze",
182
- service_type="vision",
183
- model_name=model_name,
184
- **kwargs
185
- )
186
-
187
- # Extract text from result
188
- if isinstance(result, dict):
189
- text = result.get("result", result.get("text", str(result)))
190
- else:
191
- text = str(result)
192
-
193
- vision_result = {
194
- "text": text,
195
- "task_type": task_type,
196
- "model": model_name,
197
- "latency": time.time() - start_time,
198
- "cost_usd": self._estimate_vision_cost(task_type)
199
- }
200
-
201
- else:
202
- # Mock response
203
- vision_result = {
204
- "text": f"Mock {task_type} result for image analysis",
205
- "task_type": task_type,
206
- "model": model_name,
207
- "latency": 1.0,
208
- "cost_usd": 0.01
209
- }
210
-
211
- self.total_latency += vision_result["latency"]
212
- return vision_result
213
-
214
- except Exception as e:
215
- self.error_count += 1
216
- logger.error(f"Vision analysis error: {e}")
217
- raise
218
-
219
- async def audio_processing(self,
220
- audio: Union[str, bytes, Path],
221
- task_type: str = "stt",
222
- model_name: str = "whisper-1",
223
- **kwargs) -> Dict[str, Any]:
224
- """
225
- Process audio using ISA Audio services.
226
-
227
- Args:
228
- audio: Audio data (path, bytes)
229
- task_type: Audio task (stt, tts, emotion, diarization)
230
- model_name: Audio model name
231
- **kwargs: Additional parameters
232
-
233
- Returns:
234
- Audio processing result
235
- """
236
- start_time = time.time()
237
- self.request_count += 1
238
-
239
- try:
240
- # Prepare audio data
241
- audio_data = self._prepare_audio_data(audio)
242
-
243
- if self.client:
244
- if task_type == "stt":
245
- result = await self.client.invoke(
246
- input_data=audio_data,
247
- task="transcribe",
248
- service_type="audio",
249
- model_name="isa_audio_sota_service" if "isa" in model_name else model_name,
250
- **kwargs
251
- )
252
- elif task_type == "emotion":
253
- result = await self.client.invoke(
254
- input_data=audio_data,
255
- task="detect_emotion",
256
- service_type="audio",
257
- model_name="isa_audio_sota_service",
258
- **kwargs
259
- )
260
- elif task_type == "diarization":
261
- result = await self.client.invoke(
262
- input_data=audio_data,
263
- task="diarize_speakers",
264
- service_type="audio",
265
- model_name="isa_audio_sota_service",
266
- **kwargs
267
- )
268
- else:
269
- # Generic audio processing
270
- result = await self.client.invoke(
271
- input_data=audio_data,
272
- task=task_type,
273
- service_type="audio",
274
- model_name=model_name,
275
- **kwargs
276
- )
277
-
278
- # Extract result
279
- if isinstance(result, dict):
280
- if task_type == "stt":
281
- text = result.get("result", result.get("text", str(result)))
282
- else:
283
- text = result
284
- else:
285
- text = str(result)
286
-
287
- audio_result = {
288
- "result": text,
289
- "task_type": task_type,
290
- "model": model_name,
291
- "latency": time.time() - start_time,
292
- "cost_usd": self._estimate_audio_cost(task_type)
293
- }
294
-
295
- else:
296
- # Mock response
297
- audio_result = {
298
- "result": f"Mock {task_type} result for audio processing",
299
- "task_type": task_type,
300
- "model": model_name,
301
- "latency": 2.0,
302
- "cost_usd": 0.005
303
- }
304
-
305
- self.total_latency += audio_result["latency"]
306
- return audio_result
307
-
308
- except Exception as e:
309
- self.error_count += 1
310
- logger.error(f"Audio processing error: {e}")
311
- raise
312
-
313
- async def embedding_generation(self,
314
- text: str,
315
- model_name: str = "text-embedding-3-small",
316
- **kwargs) -> Dict[str, Any]:
317
- """
318
- Generate embeddings using ISA Embedding services.
319
-
320
- Args:
321
- text: Input text
322
- model_name: Embedding model name
323
- **kwargs: Additional parameters
324
-
325
- Returns:
326
- Embedding result
327
- """
328
- start_time = time.time()
329
- self.request_count += 1
330
-
331
- try:
332
- if self.client:
333
- result = await self.client.invoke(
334
- input_data=text,
335
- task="embed",
336
- service_type="embedding",
337
- model_name=model_name,
338
- **kwargs
339
- )
340
-
341
- # Extract embedding vector
342
- if isinstance(result, dict):
343
- embedding = result.get("result", result.get("embedding", []))
344
- else:
345
- embedding = result if isinstance(result, list) else []
346
-
347
- embedding_result = {
348
- "embedding": embedding,
349
- "model": model_name,
350
- "dimension": len(embedding) if embedding else 0,
351
- "latency": time.time() - start_time,
352
- "cost_usd": self._estimate_embedding_cost(text)
353
- }
354
-
355
- else:
356
- # Mock embedding (1536 dimensions like OpenAI)
357
- import numpy as np
358
- embedding = np.random.randn(1536).tolist()
359
-
360
- embedding_result = {
361
- "embedding": embedding,
362
- "model": model_name,
363
- "dimension": 1536,
364
- "latency": 0.3,
365
- "cost_usd": 0.0001
366
- }
367
-
368
- self.total_latency += embedding_result["latency"]
369
- return embedding_result
370
-
371
- except Exception as e:
372
- self.error_count += 1
373
- logger.error(f"Embedding generation error: {e}")
374
- raise
375
-
376
- async def reranking(self,
377
- query: str,
378
- documents: List[str],
379
- model_name: str = "isa-jina-reranker-v2-service",
380
- **kwargs) -> Dict[str, Any]:
381
- """
382
- Rerank documents using ISA Reranking services.
383
-
384
- Args:
385
- query: Search query
386
- documents: List of documents to rerank
387
- model_name: Reranking model name
388
- **kwargs: Additional parameters
389
-
390
- Returns:
391
- Reranking result
392
- """
393
- start_time = time.time()
394
- self.request_count += 1
395
-
396
- try:
397
- if self.client:
398
- result = await self.client.invoke(
399
- input_data={
400
- "query": query,
401
- "documents": documents
402
- },
403
- task="rerank",
404
- service_type="embedding",
405
- model_name=model_name,
406
- **kwargs
407
- )
408
-
409
- # Extract reranked results
410
- if isinstance(result, dict):
411
- reranked = result.get("result", result.get("rankings", []))
412
- else:
413
- reranked = result if isinstance(result, list) else []
414
-
415
- reranking_result = {
416
- "rankings": reranked,
417
- "model": model_name,
418
- "query": query,
419
- "num_documents": len(documents),
420
- "latency": time.time() - start_time,
421
- "cost_usd": self._estimate_reranking_cost(len(documents))
422
- }
423
-
424
- else:
425
- # Mock reranking (random shuffle)
426
- import random
427
- indices = list(range(len(documents)))
428
- random.shuffle(indices)
429
-
430
- reranking_result = {
431
- "rankings": [{"index": i, "score": random.random()} for i in indices],
432
- "model": model_name,
433
- "query": query,
434
- "num_documents": len(documents),
435
- "latency": 0.5,
436
- "cost_usd": 0.001
437
- }
438
-
439
- self.total_latency += reranking_result["latency"]
440
- return reranking_result
441
-
442
- except Exception as e:
443
- self.error_count += 1
444
- logger.error(f"Reranking error: {e}")
445
- raise
446
-
447
- def _prepare_image_data(self, image: Union[str, bytes, Image.Image, Path]) -> str:
448
- """Convert image to base64 string for ISA client."""
449
- try:
450
- if isinstance(image, str):
451
- if image.startswith("data:"):
452
- return image # Already base64 data URL
453
- elif Path(image).exists():
454
- # File path
455
- with open(image, "rb") as f:
456
- image_bytes = f.read()
457
- else:
458
- # Assume base64 string
459
- return f"data:image/jpeg;base64,{image}"
460
-
461
- elif isinstance(image, bytes):
462
- image_bytes = image
463
-
464
- elif isinstance(image, Path):
465
- with open(image, "rb") as f:
466
- image_bytes = f.read()
467
-
468
- elif isinstance(image, Image.Image):
469
- buffer = BytesIO()
470
- image.save(buffer, format="PNG")
471
- image_bytes = buffer.getvalue()
472
-
473
- else:
474
- raise ValueError(f"Unsupported image type: {type(image)}")
475
-
476
- # Convert to base64 data URL
477
- base64_str = base64.b64encode(image_bytes).decode()
478
- return f"data:image/jpeg;base64,{base64_str}"
479
-
480
- except Exception as e:
481
- logger.error(f"Error preparing image data: {e}")
482
- raise
483
-
484
- def _prepare_audio_data(self, audio: Union[str, bytes, Path]) -> str:
485
- """Convert audio to format for ISA client."""
486
- try:
487
- if isinstance(audio, (str, Path)):
488
- # Return file path for ISA client
489
- return str(audio)
490
- elif isinstance(audio, bytes):
491
- # Save to temporary file
492
- import tempfile
493
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
494
- tmp_file.write(audio)
495
- return tmp_file.name
496
- else:
497
- raise ValueError(f"Unsupported audio type: {type(audio)}")
498
-
499
- except Exception as e:
500
- logger.error(f"Error preparing audio data: {e}")
501
- raise
502
-
503
- def _estimate_tokens(self, text: str) -> int:
504
- """Estimate token count (rough approximation)."""
505
- return len(text.split()) * 1.3 # Rough estimate
506
-
507
- def _estimate_cost(self, text: str, provider: str) -> float:
508
- """Estimate API cost."""
509
- tokens = self._estimate_tokens(text)
510
-
511
- # Rough cost estimates (per 1k tokens)
512
- cost_per_1k = {
513
- "openai": 0.002, # GPT-4 turbo
514
- "ollama": 0.0, # Local model
515
- "yyds": 0.01 # Claude
516
- }
517
-
518
- return (tokens / 1000) * cost_per_1k.get(provider, 0.001)
519
-
520
- def _estimate_vision_cost(self, task_type: str) -> float:
521
- """Estimate vision processing cost."""
522
- costs = {
523
- "ocr": 0.01,
524
- "table": 0.02,
525
- "ui": 0.015,
526
- "document": 0.03,
527
- "caption": 0.02
528
- }
529
- return costs.get(task_type, 0.01)
530
-
531
- def _estimate_audio_cost(self, task_type: str) -> float:
532
- """Estimate audio processing cost."""
533
- costs = {
534
- "stt": 0.006, # Whisper pricing
535
- "tts": 0.015,
536
- "emotion": 0.01,
537
- "diarization": 0.02
538
- }
539
- return costs.get(task_type, 0.01)
540
-
541
- def _estimate_embedding_cost(self, text: str) -> float:
542
- """Estimate embedding cost."""
543
- tokens = self._estimate_tokens(text)
544
- return (tokens / 1000) * 0.0001 # text-embedding-3-small pricing
545
-
546
- def _estimate_reranking_cost(self, num_docs: int) -> float:
547
- """Estimate reranking cost."""
548
- return num_docs * 0.0001 # Rough estimate per document
549
-
550
- def get_performance_stats(self) -> Dict[str, Any]:
551
- """Get performance statistics."""
552
- avg_latency = self.total_latency / self.request_count if self.request_count > 0 else 0
553
-
554
- return {
555
- "total_requests": self.request_count,
556
- "total_errors": self.error_count,
557
- "error_rate": self.error_count / self.request_count if self.request_count > 0 else 0,
558
- "avg_latency_seconds": avg_latency,
559
- "total_latency_seconds": self.total_latency,
560
- "success_rate": 1 - (self.error_count / self.request_count) if self.request_count > 0 else 0
561
- }
562
-
563
-
564
- # Convenience functions for creating service interfaces
565
- def create_llm_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
566
- """Create LLM service interface."""
567
- return ISAModelInterface(config)
568
-
569
-
570
- def create_vision_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
571
- """Create Vision service interface."""
572
- return ISAModelInterface(config)
573
-
574
-
575
- def create_audio_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
576
- """Create Audio service interface."""
577
- return ISAModelInterface(config)
578
-
579
-
580
- def create_embedding_interface(config: Optional[Dict[str, Any]] = None) -> ISAModelInterface:
581
- """Create Embedding service interface."""
582
- return ISAModelInterface(config)