isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,172 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ import aiohttp
4
+ import asyncio
5
+ import tempfile
6
+ import os
7
+ import logging
8
+ from io import BytesIO
3
9
  from isa_model.inference.services.base_service import BaseService
4
10
 
11
+ logger = logging.getLogger(__name__)
12
+
5
13
  class BaseSTTService(BaseService):
6
- """Base class for Speech-to-Text services with unified task dispatch"""
14
+ """Base class for Speech-to-Text services with unified task dispatch and URL support"""
15
+
16
+ async def _prepare_audio_input(self, audio_input: Union[str, BinaryIO, bytes]) -> Union[str, BinaryIO]:
17
+ """
18
+ Prepare audio input by handling URLs, file paths, bytes data, and file objects
19
+
20
+ Args:
21
+ audio_input: Audio input (URL, file path, bytes data, or file object)
22
+
23
+ Returns:
24
+ Prepared audio input (local file path or file object)
25
+ """
26
+ if isinstance(audio_input, bytes):
27
+ # Handle bytes data from API uploads
28
+ logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes)")
29
+ return await self._save_bytes_to_temp_file(audio_input)
30
+ elif isinstance(audio_input, str):
31
+ # Check if it's a URL
32
+ if audio_input.startswith(('http://', 'https://')):
33
+ logger.info(f"Downloading audio from URL: {audio_input}")
34
+ return await self._download_audio_url(audio_input)
35
+ else:
36
+ # Regular file path or base64 string
37
+ return audio_input
38
+ else:
39
+ # Already a file object
40
+ return audio_input
41
+
42
+ async def _prepare_audio_input_with_context(self, audio_input: Union[str, BinaryIO, bytes], context: Dict[str, Any]) -> Union[str, BinaryIO]:
43
+ """
44
+ Prepare audio input with additional context from kwargs
45
+
46
+ Args:
47
+ audio_input: Audio input (URL, file path, bytes data, or file object)
48
+ context: Additional context including filename, content_type
49
+
50
+ Returns:
51
+ Prepared audio input (local file path or file object)
52
+ """
53
+ if isinstance(audio_input, bytes):
54
+ # Handle bytes data from API uploads
55
+ filename = context.get('filename')
56
+ content_type = context.get('content_type')
57
+ logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes), filename={filename}, content_type={content_type}")
58
+ return await self._save_bytes_to_temp_file(audio_input, filename, content_type)
59
+ else:
60
+ return await self._prepare_audio_input(audio_input)
61
+
62
+ async def _download_audio_url(self, url: str) -> str:
63
+ """
64
+ Download audio file from URL to temporary file
65
+
66
+ Args:
67
+ url: HTTP/HTTPS URL to audio file
68
+
69
+ Returns:
70
+ Path to downloaded temporary file
71
+
72
+ Raises:
73
+ Exception: If download fails
74
+ """
75
+ try:
76
+ async with aiohttp.ClientSession() as session:
77
+ async with session.get(url) as response:
78
+ if response.status != 200:
79
+ raise Exception(f"Failed to download audio: HTTP {response.status}")
80
+
81
+ # Get content type to determine file extension
82
+ content_type = response.headers.get('Content-Type', '')
83
+ file_ext = self._get_file_extension_from_content_type(content_type)
84
+
85
+ # Create temporary file
86
+ temp_file = tempfile.NamedTemporaryFile(
87
+ delete=False,
88
+ suffix=file_ext,
89
+ prefix='audio_download_'
90
+ )
91
+
92
+ # Download and save
93
+ async for chunk in response.content.iter_chunked(8192):
94
+ temp_file.write(chunk)
95
+
96
+ temp_file.close()
97
+ logger.info(f"Downloaded audio to temporary file: {temp_file.name}")
98
+ return temp_file.name
99
+
100
+ except Exception as e:
101
+ logger.error(f"Failed to download audio from URL {url}: {e}")
102
+ raise Exception(f"Audio URL download failed: {e}") from e
103
+
104
+ def _get_file_extension_from_content_type(self, content_type: str) -> str:
105
+ """Get appropriate file extension from Content-Type header"""
106
+ content_type_map = {
107
+ 'audio/mpeg': '.mp3',
108
+ 'audio/mp3': '.mp3',
109
+ 'audio/wav': '.wav',
110
+ 'audio/wave': '.wav',
111
+ 'audio/x-wav': '.wav',
112
+ 'audio/flac': '.flac',
113
+ 'audio/ogg': '.ogg',
114
+ 'audio/m4a': '.m4a',
115
+ 'audio/mp4': '.mp4',
116
+ 'audio/webm': '.webm'
117
+ }
118
+ return content_type_map.get(content_type.lower(), '.audio')
119
+
120
+ async def _save_bytes_to_temp_file(self, audio_bytes: bytes, filename: Optional[str] = None, content_type: Optional[str] = None) -> str:
121
+ """
122
+ Save audio bytes data to temporary file
123
+
124
+ Args:
125
+ audio_bytes: Audio data as bytes
126
+ filename: Optional filename to determine extension
127
+ content_type: Optional content type to determine extension
128
+
129
+ Returns:
130
+ Path to temporary file containing audio data
131
+ """
132
+ try:
133
+ # Determine file extension from filename or content type
134
+ suffix = '.mp3' # Default
135
+ if filename and '.' in filename:
136
+ suffix = '.' + filename.split('.')[-1]
137
+ elif content_type:
138
+ suffix = self._get_file_extension_from_content_type(content_type)
139
+
140
+ # Create temporary file with proper audio extension
141
+ temp_file = tempfile.NamedTemporaryFile(
142
+ delete=False,
143
+ suffix=suffix,
144
+ prefix='audio_bytes_'
145
+ )
146
+
147
+ # Write bytes data
148
+ temp_file.write(audio_bytes)
149
+ temp_file.close()
150
+
151
+ logger.info(f"Saved {len(audio_bytes)} bytes to temporary file: {temp_file.name}")
152
+ return temp_file.name
153
+
154
+ except Exception as e:
155
+ logger.error(f"Failed to save audio bytes to temporary file: {e}")
156
+ raise Exception(f"Audio bytes save failed: {e}") from e
157
+
158
+ def _cleanup_temp_file(self, file_path: str):
159
+ """Clean up temporary downloaded file"""
160
+ try:
161
+ if file_path and file_path.startswith(tempfile.gettempdir()):
162
+ os.unlink(file_path)
163
+ logger.debug(f"Cleaned up temporary file: {file_path}")
164
+ except Exception as e:
165
+ logger.warning(f"Failed to cleanup temporary file {file_path}: {e}")
7
166
 
8
167
  async def invoke(
9
168
  self,
10
- audio_input: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
169
+ audio_input: Union[str, BinaryIO, bytes, List[Union[str, BinaryIO, bytes]]],
11
170
  task: Optional[str] = None,
12
171
  **kwargs
13
172
  ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
@@ -30,33 +189,47 @@ class BaseSTTService(BaseService):
30
189
  # ==================== 语音转文本类任务 ====================
31
190
  if task == "transcribe":
32
191
  if isinstance(audio_input, list):
192
+ # Prepare all audio inputs (handle URLs)
193
+ prepared_inputs = []
194
+ for audio in audio_input:
195
+ prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
196
+ prepared_inputs.append(prepared_input)
33
197
  return await self.transcribe_batch(
34
- audio_input,
198
+ prepared_inputs,
35
199
  kwargs.get("language"),
36
200
  kwargs.get("prompt")
37
201
  )
38
202
  else:
203
+ # Prepare single audio input (handle URLs)
204
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
39
205
  return await self.transcribe(
40
- audio_input,
206
+ prepared_input,
41
207
  kwargs.get("language"),
42
208
  kwargs.get("prompt")
43
209
  )
44
210
  elif task == "translate":
45
211
  if isinstance(audio_input, list):
46
212
  raise ValueError("translate task requires single audio input")
47
- return await self.translate(audio_input)
213
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
214
+ return await self.translate(prepared_input)
48
215
  elif task == "batch_transcribe":
49
216
  if not isinstance(audio_input, list):
50
217
  audio_input = [audio_input]
218
+ # Prepare all audio inputs (handle URLs)
219
+ prepared_inputs = []
220
+ for audio in audio_input:
221
+ prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
222
+ prepared_inputs.append(prepared_input)
51
223
  return await self.transcribe_batch(
52
- audio_input,
224
+ prepared_inputs,
53
225
  kwargs.get("language"),
54
226
  kwargs.get("prompt")
55
227
  )
56
228
  elif task == "detect_language":
57
229
  if isinstance(audio_input, list):
58
230
  raise ValueError("detect_language task requires single audio input")
59
- return await self.detect_language(audio_input)
231
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
232
+ return await self.detect_language(prepared_input)
60
233
  else:
61
234
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
62
235
 
@@ -72,7 +245,7 @@ class BaseSTTService(BaseService):
72
245
  @abstractmethod
73
246
  async def transcribe(
74
247
  self,
75
- audio_file: Union[str, BinaryIO],
248
+ audio_file: Union[str, BinaryIO, bytes],
76
249
  language: Optional[str] = None,
77
250
  prompt: Optional[str] = None
78
251
  ) -> Dict[str, Any]:
@@ -96,7 +269,7 @@ class BaseSTTService(BaseService):
96
269
  @abstractmethod
97
270
  async def translate(
98
271
  self,
99
- audio_file: Union[str, BinaryIO]
272
+ audio_file: Union[str, BinaryIO, bytes]
100
273
  ) -> Dict[str, Any]:
101
274
  """
102
275
  Translate audio file to English text
@@ -115,7 +288,7 @@ class BaseSTTService(BaseService):
115
288
  @abstractmethod
116
289
  async def transcribe_batch(
117
290
  self,
118
- audio_files: List[Union[str, BinaryIO]],
291
+ audio_files: List[Union[str, BinaryIO, bytes]],
119
292
  language: Optional[str] = None,
120
293
  prompt: Optional[str] = None
121
294
  ) -> List[Dict[str, Any]]:
@@ -133,7 +306,7 @@ class BaseSTTService(BaseService):
133
306
  pass
134
307
 
135
308
  @abstractmethod
136
- async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
309
+ async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
137
310
  """
138
311
  Detect language of audio file
139
312
 
@@ -47,7 +47,7 @@ class OpenAISTTService(BaseSTTService):
47
47
  wait=wait_exponential(multiplier=1, min=4, max=10),
48
48
  reraise=True
49
49
  )
50
- async def transcribe(self, audio_file: Union[str, BinaryIO], language: Optional[str] = None, prompt: Optional[str] = None) -> Dict[str, Any]:
50
+ async def transcribe(self, audio_file: Union[str, BinaryIO, bytes], language: Optional[str] = None, prompt: Optional[str] = None, **kwargs) -> Dict[str, Any]:
51
51
  """
52
52
  Transcribe audio file to text using OpenAI's Whisper model.
53
53
 
@@ -73,8 +73,24 @@ class OpenAISTTService(BaseSTTService):
73
73
  if prompt:
74
74
  transcription_params["prompt"] = prompt
75
75
 
76
- # Handle file input - support base64 strings, file paths, and file objects
77
- if isinstance(audio_file, str):
76
+ # Handle file input - support bytes, base64 strings, file paths, and file objects
77
+ if isinstance(audio_file, bytes):
78
+ # Handle bytes data directly
79
+ logger.info(f"Processing bytes audio data ({len(audio_file)} bytes)")
80
+ from io import BytesIO
81
+ audio_buffer = BytesIO(audio_file)
82
+
83
+ # Use filename from kwargs if provided, otherwise default to .mp3
84
+ filename = kwargs.get('filename', 'audio.mp3')
85
+ if filename and not filename.endswith(('.mp3', '.wav', '.m4a', '.flac', '.ogg', '.webm', '.mp4')):
86
+ filename += '.mp3' # Add extension if missing
87
+ audio_buffer.name = filename
88
+ logger.info(f"Using filename: {filename}")
89
+ transcription = await self.client.audio.transcriptions.create(
90
+ file=audio_buffer,
91
+ **transcription_params
92
+ )
93
+ elif isinstance(audio_file, str):
78
94
  # Check if it's a base64 string or file path
79
95
  if len(audio_file) > 100 and not os.path.exists(audio_file):
80
96
  # Likely a base64 string
@@ -147,7 +163,7 @@ class OpenAISTTService(BaseSTTService):
147
163
  wait=wait_exponential(multiplier=1, min=4, max=10),
148
164
  reraise=True
149
165
  )
150
- async def translate(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
166
+ async def translate(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
151
167
  """
152
168
  Translate audio file to English text using OpenAI's Whisper model.
153
169
 
@@ -211,7 +227,7 @@ class OpenAISTTService(BaseSTTService):
211
227
  logger.error(f"Translation failed: {e}")
212
228
  raise
213
229
 
214
- async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
230
+ async def transcribe_batch(self, audio_files: List[Union[str, BinaryIO, bytes]], language: Optional[str] = None, prompt: Optional[str] = None) -> List[Dict[str, Any]]:
215
231
  """
216
232
  Transcribe multiple audio files in batch.
217
233
 
@@ -238,7 +254,7 @@ class OpenAISTTService(BaseSTTService):
238
254
 
239
255
  return results
240
256
 
241
- async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
257
+ async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
242
258
  """
243
259
  Detect the language of an audio file.
244
260
 
@@ -4,6 +4,7 @@ import asyncio
4
4
  from typing import List, Dict, Any, Optional
5
5
 
6
6
  from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
7
+ from isa_model.core.config.config_manager import ConfigManager
7
8
 
8
9
  logger = logging.getLogger(__name__)
9
10
 
@@ -21,9 +22,20 @@ class OllamaEmbedService(BaseEmbedService):
21
22
 
22
23
  # Initialize HTTP client with provider configuration
23
24
  try:
24
- host = provider_config.get("host", "localhost")
25
- port = provider_config.get("port", 11434)
26
- base_url = f"http://{host}:{port}"
25
+ config_manager = ConfigManager()
26
+ # Use Consul discovery with fallback
27
+ default_base_url = config_manager.get_ollama_url()
28
+
29
+ if "base_url" in provider_config:
30
+ base_url = provider_config["base_url"]
31
+ else:
32
+ host = provider_config.get("host", "localhost")
33
+ port = provider_config.get("port", 11434)
34
+ base_url = provider_config.get("base_url", f"http://{host}:{port}")
35
+
36
+ # Use config manager default (Consul discovery) if still not set
37
+ if base_url == f"http://localhost:11434":
38
+ base_url = default_base_url
27
39
 
28
40
  self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
29
41
 
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Resilient Embedding Service - Provides fallback mechanisms for embedding operations
6
+ Automatically handles OpenAI API failures with local embedding alternatives
7
+ """
8
+
9
+ import logging
10
+ import random
11
+ import numpy as np
12
+ from typing import List, Dict, Any, Optional, Union
13
+ from openai import APIConnectionError, APITimeoutError, RateLimitError, AuthenticationError
14
+
15
+ from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
16
+ from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class ResilientEmbedService(BaseEmbedService):
21
+ """
22
+ Resilient embedding service with automatic fallback mechanisms
23
+
24
+ When OpenAI service fails, automatically falls back to:
25
+ 1. Simple TF-IDF based embeddings
26
+ 2. Random embeddings (for testing/demo purposes)
27
+ """
28
+
29
+ def __init__(self, provider_name: str = "openai", model_name: str = "text-embedding-3-small", **kwargs):
30
+ super().__init__(provider_name, model_name, **kwargs)
31
+
32
+ # Try to initialize OpenAI service
33
+ self.primary_service = None
34
+ self.fallback_mode = False
35
+
36
+ try:
37
+ self.primary_service = OpenAIEmbedService(provider_name, model_name, **kwargs)
38
+ logger.info("✅ Primary OpenAI embedding service initialized")
39
+ except Exception as e:
40
+ logger.warning(f"Failed to initialize OpenAI service, starting in fallback mode: {e}")
41
+ self.fallback_mode = True
42
+
43
+ # Initialize TF-IDF vectorizer for fallback
44
+ self._init_fallback_vectorizer()
45
+
46
+ def _init_fallback_vectorizer(self):
47
+ """Initialize TF-IDF vectorizer for fallback embeddings"""
48
+ try:
49
+ from sklearn.feature_extraction.text import TfidfVectorizer
50
+
51
+ # Use a simple TF-IDF vectorizer with limited features
52
+ self.tfidf_vectorizer = TfidfVectorizer(
53
+ max_features=1536, # Match OpenAI dimensions
54
+ stop_words='english',
55
+ ngram_range=(1, 2),
56
+ lowercase=True,
57
+ strip_accents='unicode'
58
+ )
59
+
60
+ # Pre-fit with some common words to ensure consistency
61
+ common_words = [
62
+ "hello world", "machine learning", "artificial intelligence",
63
+ "data science", "natural language processing", "computer vision",
64
+ "deep learning", "neural networks", "text analysis",
65
+ "information retrieval", "semantic search", "embeddings"
66
+ ]
67
+ self.tfidf_vectorizer.fit(common_words)
68
+ self.tfidf_available = True
69
+ logger.info("✅ TF-IDF fallback vectorizer initialized")
70
+
71
+ except ImportError:
72
+ logger.warning("scikit-learn not available, using random embeddings as fallback")
73
+ self.tfidf_available = False
74
+
75
+ def _generate_fallback_embedding(self, text: str, dimension: int = 1536) -> List[float]:
76
+ """Generate fallback embedding for a single text"""
77
+
78
+ if self.tfidf_available and hasattr(self, 'tfidf_vectorizer'):
79
+ try:
80
+ # Use TF-IDF for more meaningful embeddings
81
+ tfidf_vector = self.tfidf_vectorizer.transform([text]).toarray()[0]
82
+
83
+ # Pad or truncate to desired dimension
84
+ if len(tfidf_vector) < dimension:
85
+ padding = [0.0] * (dimension - len(tfidf_vector))
86
+ tfidf_vector = np.concatenate([tfidf_vector, padding])
87
+ elif len(tfidf_vector) > dimension:
88
+ tfidf_vector = tfidf_vector[:dimension]
89
+
90
+ # Normalize to unit vector
91
+ norm = np.linalg.norm(tfidf_vector)
92
+ if norm > 0:
93
+ tfidf_vector = tfidf_vector / norm
94
+
95
+ return tfidf_vector.tolist()
96
+
97
+ except Exception as e:
98
+ logger.warning(f"TF-IDF fallback failed: {e}, using random embedding")
99
+
100
+ # Random embedding as last resort (normalized)
101
+ random.seed(hash(text) % (2**32)) # Deterministic based on text
102
+ embedding = [random.gauss(0, 1) for _ in range(dimension)]
103
+
104
+ # Normalize to unit vector
105
+ norm = np.sqrt(sum(x*x for x in embedding))
106
+ if norm > 0:
107
+ embedding = [x/norm for x in embedding]
108
+
109
+ return embedding
110
+
111
+ async def create_text_embedding(self, text: str) -> List[float]:
112
+ """Create embedding for single text with fallback"""
113
+
114
+ # Try primary service first if available
115
+ if not self.fallback_mode and self.primary_service:
116
+ try:
117
+ result = await self.primary_service.create_text_embedding(text)
118
+ logger.debug("✅ Used primary OpenAI service")
119
+ return result
120
+
121
+ except (APIConnectionError, APITimeoutError) as e:
122
+ logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
123
+ self.fallback_mode = True
124
+ except RateLimitError as e:
125
+ logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
126
+ except AuthenticationError as e:
127
+ logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
128
+ self.fallback_mode = True
129
+ except Exception as e:
130
+ logger.warning(f"OpenAI service error, using fallback: {e}")
131
+
132
+ # Use fallback embedding
133
+ logger.info(f"Using fallback embedding for text: {text[:50]}...")
134
+ return self._generate_fallback_embedding(text)
135
+
136
+ async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
137
+ """Create embeddings for multiple texts with fallback"""
138
+ if not texts:
139
+ return []
140
+
141
+ # Try primary service first if available
142
+ if not self.fallback_mode and self.primary_service:
143
+ try:
144
+ result = await self.primary_service.create_text_embeddings(texts)
145
+ logger.debug(f"✅ Used primary OpenAI service for {len(texts)} texts")
146
+ return result
147
+
148
+ except (APIConnectionError, APITimeoutError) as e:
149
+ logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
150
+ self.fallback_mode = True
151
+ except RateLimitError as e:
152
+ logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
153
+ except AuthenticationError as e:
154
+ logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
155
+ self.fallback_mode = True
156
+ except Exception as e:
157
+ logger.warning(f"OpenAI service error, using fallback: {e}")
158
+
159
+ # Use fallback embeddings
160
+ logger.info(f"Using fallback embeddings for {len(texts)} texts")
161
+ return [self._generate_fallback_embedding(text) for text in texts]
162
+
163
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None,
164
+ chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
165
+ """Create text chunks with embeddings (with fallback)"""
166
+ words = text.split()
167
+ if not words:
168
+ return []
169
+
170
+ chunks = []
171
+ chunk_texts = []
172
+
173
+ for i in range(0, len(words), chunk_size - overlap):
174
+ chunk_words = words[i:i + chunk_size]
175
+ chunk_text = " ".join(chunk_words)
176
+ chunk_texts.append(chunk_text)
177
+
178
+ chunks.append({
179
+ "text": chunk_text,
180
+ "start_index": i,
181
+ "end_index": min(i + chunk_size, len(words)),
182
+ "metadata": metadata or {}
183
+ })
184
+
185
+ # Get embeddings for all chunks
186
+ embeddings = await self.create_text_embeddings(chunk_texts)
187
+
188
+ # Add embeddings to chunks
189
+ for chunk, embedding in zip(chunks, embeddings):
190
+ chunk["embedding"] = embedding
191
+ chunk["fallback_used"] = self.fallback_mode
192
+
193
+ return chunks
194
+
195
+ async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
196
+ """Compute cosine similarity between two embeddings"""
197
+ import math
198
+
199
+ try:
200
+ dot_product = sum(a * b for a, b in zip(embedding1, embedding2))
201
+ norm1 = math.sqrt(sum(a * a for a in embedding1))
202
+ norm2 = math.sqrt(sum(b * b for b in embedding2))
203
+
204
+ if norm1 * norm2 == 0:
205
+ return 0.0
206
+
207
+ return dot_product / (norm1 * norm2)
208
+ except Exception as e:
209
+ logger.error(f"Error computing similarity: {e}")
210
+ return 0.0
211
+
212
+ async def find_similar_texts(
213
+ self,
214
+ query_embedding: List[float],
215
+ candidate_embeddings: List[List[float]],
216
+ top_k: int = 5
217
+ ) -> List[Dict[str, Any]]:
218
+ """Find most similar texts based on embeddings"""
219
+ try:
220
+ similarities = []
221
+
222
+ for i, candidate in enumerate(candidate_embeddings):
223
+ similarity = await self.compute_similarity(query_embedding, candidate)
224
+ similarities.append({
225
+ "index": i,
226
+ "similarity": similarity
227
+ })
228
+
229
+ # Sort by similarity in descending order and return top_k
230
+ similarities.sort(key=lambda x: x["similarity"], reverse=True)
231
+ return similarities[:top_k]
232
+
233
+ except Exception as e:
234
+ logger.error(f"Error finding similar texts: {e}")
235
+ return []
236
+
237
+ def get_embedding_dimension(self) -> int:
238
+ """Get the dimension of embeddings produced by this service"""
239
+ return 1536 # Standard dimension for consistency
240
+
241
+ def get_max_input_length(self) -> int:
242
+ """Get maximum input text length supported"""
243
+ return 8192
244
+
245
+ def is_fallback_mode(self) -> bool:
246
+ """Check if service is running in fallback mode"""
247
+ return self.fallback_mode
248
+
249
+ def get_service_status(self) -> Dict[str, Any]:
250
+ """Get current service status and capabilities"""
251
+ return {
252
+ "primary_service_available": not self.fallback_mode and self.primary_service is not None,
253
+ "fallback_mode": self.fallback_mode,
254
+ "tfidf_available": self.tfidf_available,
255
+ "provider": self.provider_name,
256
+ "model": self.model_name,
257
+ "embedding_dimension": self.get_embedding_dimension(),
258
+ "max_input_length": self.get_max_input_length()
259
+ }
260
+
261
+ async def health_check(self) -> Dict[str, Any]:
262
+ """Health check with detailed status"""
263
+ status = self.get_service_status()
264
+
265
+ # Test embedding generation
266
+ try:
267
+ test_embedding = await self.create_text_embedding("test")
268
+ status["embedding_test"] = {
269
+ "success": True,
270
+ "dimension": len(test_embedding),
271
+ "fallback_used": self.fallback_mode
272
+ }
273
+ except Exception as e:
274
+ status["embedding_test"] = {
275
+ "success": False,
276
+ "error": str(e)
277
+ }
278
+
279
+ return status
280
+
281
+ async def close(self):
282
+ """Cleanup resources"""
283
+ if self.primary_service:
284
+ await self.primary_service.close()
285
+ logger.info("ResilientEmbedService has been closed.")
@@ -6,9 +6,17 @@ LLM Services - Business logic services for Language Models
6
6
  from .ollama_llm_service import OllamaLLMService
7
7
  from .openai_llm_service import OpenAILLMService
8
8
  from .yyds_llm_service import YydsLLMService
9
+ from .huggingface_llm_service import ISALLMService, HuggingFaceLLMService, HuggingFaceInferenceService
10
+ # LocalLLMService requires torch (local mode only) - import explicitly when needed
11
+ # from .local_llm_service import LocalLLMService, create_local_llm_service
9
12
 
10
13
  __all__ = [
11
14
  "OllamaLLMService",
12
- "OpenAILLMService",
13
- "YydsLLMService"
15
+ "OpenAILLMService",
16
+ "YydsLLMService",
17
+ "ISALLMService",
18
+ "HuggingFaceLLMService",
19
+ "HuggingFaceInferenceService",
20
+ # "LocalLLMService", # Requires isa_model[local]
21
+ # "create_local_llm_service"
14
22
  ]