isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ """
2
+ Audio Services - Speech, TTS, and Audio Processing Services
3
+ """
4
+
5
+ from .base_stt_service import BaseSTTService
6
+ from .base_tts_service import BaseTTSService
7
+ from .base_realtime_service import BaseRealtimeService
8
+ from .openai_stt_service import OpenAISTTService
9
+ from .openai_tts_service import OpenAITTSService
10
+ from .openai_realtime_service import OpenAIRealtimeService
11
+ from .replicate_tts_service import ReplicateTTSService
12
+
13
+ __all__ = [
14
+ 'BaseSTTService',
15
+ 'BaseeTTSService',
16
+ 'BaseRealtimeService',
17
+ 'OpenAISTTService',
18
+ 'OpenAITTSService',
19
+ 'OpenAIRealtimeService',
20
+ 'ReplicateTTSService'
21
+ ]
@@ -0,0 +1,225 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, List, Union, Optional, Callable, AsyncGenerator
3
+ from enum import Enum
4
+ import asyncio
5
+ from isa_model.inference.services.base_service import BaseService
6
+
7
+
8
+ class RealtimeEventType(Enum):
9
+ """Realtime API event types"""
10
+ # Session events
11
+ SESSION_CREATED = "session.created"
12
+ SESSION_UPDATED = "session.updated"
13
+
14
+ # Input audio events
15
+ INPUT_AUDIO_BUFFER_APPEND = "input_audio_buffer.append"
16
+ INPUT_AUDIO_BUFFER_COMMIT = "input_audio_buffer.commit"
17
+ INPUT_AUDIO_BUFFER_CLEAR = "input_audio_buffer.clear"
18
+ INPUT_AUDIO_BUFFER_COMMITTED = "input_audio_buffer.committed"
19
+ INPUT_AUDIO_BUFFER_SPEECH_STARTED = "input_audio_buffer.speech_started"
20
+ INPUT_AUDIO_BUFFER_SPEECH_STOPPED = "input_audio_buffer.speech_stopped"
21
+
22
+ # Conversation events
23
+ CONVERSATION_ITEM_CREATE = "conversation.item.create"
24
+ CONVERSATION_ITEM_CREATED = "conversation.item.created"
25
+ CONVERSATION_ITEM_DELETE = "conversation.item.delete"
26
+ CONVERSATION_ITEM_DELETED = "conversation.item.deleted"
27
+ CONVERSATION_ITEM_TRUNCATE = "conversation.item.truncate"
28
+ CONVERSATION_ITEM_TRUNCATED = "conversation.item.truncated"
29
+
30
+ # Response events
31
+ RESPONSE_CREATE = "response.create"
32
+ RESPONSE_CREATED = "response.created"
33
+ RESPONSE_DONE = "response.done"
34
+ RESPONSE_OUTPUT_ITEM_ADDED = "response.output_item.added"
35
+ RESPONSE_OUTPUT_ITEM_DONE = "response.output_item.done"
36
+ RESPONSE_CONTENT_PART_ADDED = "response.content_part.added"
37
+ RESPONSE_CONTENT_PART_DONE = "response.content_part.done"
38
+ RESPONSE_TEXT_DELTA = "response.text.delta"
39
+ RESPONSE_TEXT_DONE = "response.text.done"
40
+ RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.audio_transcript.delta"
41
+ RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.audio_transcript.done"
42
+ RESPONSE_AUDIO_DELTA = "response.audio.delta"
43
+ RESPONSE_AUDIO_DONE = "response.audio.done"
44
+ RESPONSE_FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
45
+ RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
46
+
47
+ # Rate limit events
48
+ RATE_LIMITS_UPDATED = "rate_limits.updated"
49
+
50
+ # Error events
51
+ ERROR = "error"
52
+
53
+
54
+ class BaseRealtimeService(BaseService):
55
+ """Base class for Realtime API services"""
56
+
57
+ def __init__(self, provider_name: str, model_name: str, **kwargs):
58
+ super().__init__(provider_name, model_name, **kwargs)
59
+ self.session_id: Optional[str] = None
60
+ self.websocket = None
61
+ self.event_handlers: Dict[str, List[Callable]] = {}
62
+ self.is_connected = False
63
+
64
+ async def invoke(
65
+ self,
66
+ task: str,
67
+ **kwargs
68
+ ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
69
+ """
70
+ 统一的任务分发方法 - 支持实时对话任务
71
+
72
+ Args:
73
+ task: 任务类型,支持多种实时对话任务
74
+ **kwargs: 任务特定的附加参数
75
+
76
+ Returns:
77
+ Dict containing task results
78
+ """
79
+ if task == "create_session":
80
+ return await self.create_session(**kwargs)
81
+ elif task == "connect":
82
+ return await self.connect_websocket(**kwargs)
83
+ elif task == "send_audio":
84
+ if not kwargs.get("audio_data"):
85
+ raise ValueError("audio_data is required for send_audio task")
86
+ return await self.send_audio_message(kwargs["audio_data"], **kwargs)
87
+ elif task == "send_text":
88
+ if not kwargs.get("text"):
89
+ raise ValueError("text is required for send_text task")
90
+ return await self.send_text_message(kwargs["text"], **kwargs)
91
+ elif task == "listen":
92
+ return await self.listen_for_responses(**kwargs)
93
+ elif task == "audio_chat":
94
+ return await self.simple_audio_chat(**kwargs)
95
+ elif task == "text_chat":
96
+ return await self.simple_text_chat(**kwargs)
97
+ else:
98
+ raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
99
+
100
+ def get_supported_tasks(self) -> List[str]:
101
+ """获取支持的任务列表"""
102
+ return [
103
+ "create_session", "connect", "send_audio", "send_text",
104
+ "listen", "audio_chat", "text_chat"
105
+ ]
106
+
107
+ @abstractmethod
108
+ async def create_session(
109
+ self,
110
+ instructions: str = "You are a helpful assistant.",
111
+ modalities: Optional[List[str]] = None,
112
+ voice: str = "alloy",
113
+ **kwargs
114
+ ) -> Dict[str, Any]:
115
+ """Create a new realtime session"""
116
+ pass
117
+
118
+ @abstractmethod
119
+ async def connect_websocket(self, **kwargs) -> bool:
120
+ """Connect to the realtime WebSocket"""
121
+ pass
122
+
123
+ @abstractmethod
124
+ async def send_audio_message(
125
+ self,
126
+ audio_data: bytes,
127
+ format: str = "pcm16",
128
+ **kwargs
129
+ ) -> Dict[str, Any]:
130
+ """Send audio data to the realtime session"""
131
+ pass
132
+
133
+ @abstractmethod
134
+ async def send_text_message(
135
+ self,
136
+ text: str,
137
+ **kwargs
138
+ ) -> Dict[str, Any]:
139
+ """Send text message to the realtime session"""
140
+ pass
141
+
142
+ @abstractmethod
143
+ async def listen_for_responses(
144
+ self,
145
+ message_handler: Optional[Callable] = None,
146
+ **kwargs
147
+ ) -> AsyncGenerator[Dict[str, Any], None]:
148
+ """Listen for responses from the realtime session"""
149
+ pass
150
+
151
+ @abstractmethod
152
+ async def simple_audio_chat(
153
+ self,
154
+ audio_data: bytes,
155
+ instructions: str = "You are a helpful assistant. Respond in audio.",
156
+ voice: str = "alloy",
157
+ **kwargs
158
+ ) -> Dict[str, Any]:
159
+ """Simple audio chat - send audio, get audio response"""
160
+ pass
161
+
162
+ @abstractmethod
163
+ async def simple_text_chat(
164
+ self,
165
+ text: str,
166
+ instructions: str = "You are a helpful assistant.",
167
+ voice: str = "alloy",
168
+ **kwargs
169
+ ) -> Dict[str, Any]:
170
+ """Simple text chat - send text, get audio/text response"""
171
+ pass
172
+
173
+ def add_event_handler(self, event_type: Union[str, RealtimeEventType], handler: Callable):
174
+ """Add event handler for specific event type"""
175
+ event_name = event_type.value if isinstance(event_type, RealtimeEventType) else event_type
176
+ if event_name not in self.event_handlers:
177
+ self.event_handlers[event_name] = []
178
+ self.event_handlers[event_name].append(handler)
179
+
180
+ def remove_event_handler(self, event_type: Union[str, RealtimeEventType], handler: Callable):
181
+ """Remove event handler"""
182
+ event_name = event_type.value if isinstance(event_type, RealtimeEventType) else event_type
183
+ if event_name in self.event_handlers:
184
+ self.event_handlers[event_name].remove(handler)
185
+
186
+ async def _handle_event(self, event: Dict[str, Any]):
187
+ """Handle incoming events"""
188
+ event_type = event.get("type")
189
+ if event_type in self.event_handlers:
190
+ for handler in self.event_handlers[event_type]:
191
+ try:
192
+ await handler(event)
193
+ except Exception as e:
194
+ import logging
195
+ logging.getLogger(__name__).error(f"Error in event handler for {event_type}: {e}")
196
+
197
+ @abstractmethod
198
+ def get_supported_voices(self) -> List[str]:
199
+ """Get list of supported voice options"""
200
+ pass
201
+
202
+ @abstractmethod
203
+ def get_supported_formats(self) -> List[str]:
204
+ """Get list of supported audio formats"""
205
+ pass
206
+
207
+ @abstractmethod
208
+ def get_session_limits(self) -> Dict[str, Any]:
209
+ """Get session limits and constraints"""
210
+ pass
211
+
212
+ @abstractmethod
213
+ async def update_session(self, **kwargs) -> Dict[str, Any]:
214
+ """Update session configuration"""
215
+ pass
216
+
217
+ @abstractmethod
218
+ async def disconnect(self):
219
+ """Disconnect from the realtime session"""
220
+ pass
221
+
222
+ @abstractmethod
223
+ async def close(self):
224
+ """Cleanup resources"""
225
+ pass
@@ -1,13 +1,172 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Any, List, Union, Optional, BinaryIO
3
+ import aiohttp
4
+ import asyncio
5
+ import tempfile
6
+ import os
7
+ import logging
8
+ from io import BytesIO
3
9
  from isa_model.inference.services.base_service import BaseService
4
10
 
11
+ logger = logging.getLogger(__name__)
12
+
5
13
  class BaseSTTService(BaseService):
6
- """Base class for Speech-to-Text services with unified task dispatch"""
14
+ """Base class for Speech-to-Text services with unified task dispatch and URL support"""
15
+
16
+ async def _prepare_audio_input(self, audio_input: Union[str, BinaryIO, bytes]) -> Union[str, BinaryIO]:
17
+ """
18
+ Prepare audio input by handling URLs, file paths, bytes data, and file objects
19
+
20
+ Args:
21
+ audio_input: Audio input (URL, file path, bytes data, or file object)
22
+
23
+ Returns:
24
+ Prepared audio input (local file path or file object)
25
+ """
26
+ if isinstance(audio_input, bytes):
27
+ # Handle bytes data from API uploads
28
+ logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes)")
29
+ return await self._save_bytes_to_temp_file(audio_input)
30
+ elif isinstance(audio_input, str):
31
+ # Check if it's a URL
32
+ if audio_input.startswith(('http://', 'https://')):
33
+ logger.info(f"Downloading audio from URL: {audio_input}")
34
+ return await self._download_audio_url(audio_input)
35
+ else:
36
+ # Regular file path or base64 string
37
+ return audio_input
38
+ else:
39
+ # Already a file object
40
+ return audio_input
41
+
42
+ async def _prepare_audio_input_with_context(self, audio_input: Union[str, BinaryIO, bytes], context: Dict[str, Any]) -> Union[str, BinaryIO]:
43
+ """
44
+ Prepare audio input with additional context from kwargs
45
+
46
+ Args:
47
+ audio_input: Audio input (URL, file path, bytes data, or file object)
48
+ context: Additional context including filename, content_type
49
+
50
+ Returns:
51
+ Prepared audio input (local file path or file object)
52
+ """
53
+ if isinstance(audio_input, bytes):
54
+ # Handle bytes data from API uploads
55
+ filename = context.get('filename')
56
+ content_type = context.get('content_type')
57
+ logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes), filename={filename}, content_type={content_type}")
58
+ return await self._save_bytes_to_temp_file(audio_input, filename, content_type)
59
+ else:
60
+ return await self._prepare_audio_input(audio_input)
61
+
62
+ async def _download_audio_url(self, url: str) -> str:
63
+ """
64
+ Download audio file from URL to temporary file
65
+
66
+ Args:
67
+ url: HTTP/HTTPS URL to audio file
68
+
69
+ Returns:
70
+ Path to downloaded temporary file
71
+
72
+ Raises:
73
+ Exception: If download fails
74
+ """
75
+ try:
76
+ async with aiohttp.ClientSession() as session:
77
+ async with session.get(url) as response:
78
+ if response.status != 200:
79
+ raise Exception(f"Failed to download audio: HTTP {response.status}")
80
+
81
+ # Get content type to determine file extension
82
+ content_type = response.headers.get('Content-Type', '')
83
+ file_ext = self._get_file_extension_from_content_type(content_type)
84
+
85
+ # Create temporary file
86
+ temp_file = tempfile.NamedTemporaryFile(
87
+ delete=False,
88
+ suffix=file_ext,
89
+ prefix='audio_download_'
90
+ )
91
+
92
+ # Download and save
93
+ async for chunk in response.content.iter_chunked(8192):
94
+ temp_file.write(chunk)
95
+
96
+ temp_file.close()
97
+ logger.info(f"Downloaded audio to temporary file: {temp_file.name}")
98
+ return temp_file.name
99
+
100
+ except Exception as e:
101
+ logger.error(f"Failed to download audio from URL {url}: {e}")
102
+ raise Exception(f"Audio URL download failed: {e}") from e
103
+
104
+ def _get_file_extension_from_content_type(self, content_type: str) -> str:
105
+ """Get appropriate file extension from Content-Type header"""
106
+ content_type_map = {
107
+ 'audio/mpeg': '.mp3',
108
+ 'audio/mp3': '.mp3',
109
+ 'audio/wav': '.wav',
110
+ 'audio/wave': '.wav',
111
+ 'audio/x-wav': '.wav',
112
+ 'audio/flac': '.flac',
113
+ 'audio/ogg': '.ogg',
114
+ 'audio/m4a': '.m4a',
115
+ 'audio/mp4': '.mp4',
116
+ 'audio/webm': '.webm'
117
+ }
118
+ return content_type_map.get(content_type.lower(), '.audio')
119
+
120
+ async def _save_bytes_to_temp_file(self, audio_bytes: bytes, filename: Optional[str] = None, content_type: Optional[str] = None) -> str:
121
+ """
122
+ Save audio bytes data to temporary file
123
+
124
+ Args:
125
+ audio_bytes: Audio data as bytes
126
+ filename: Optional filename to determine extension
127
+ content_type: Optional content type to determine extension
128
+
129
+ Returns:
130
+ Path to temporary file containing audio data
131
+ """
132
+ try:
133
+ # Determine file extension from filename or content type
134
+ suffix = '.mp3' # Default
135
+ if filename and '.' in filename:
136
+ suffix = '.' + filename.split('.')[-1]
137
+ elif content_type:
138
+ suffix = self._get_file_extension_from_content_type(content_type)
139
+
140
+ # Create temporary file with proper audio extension
141
+ temp_file = tempfile.NamedTemporaryFile(
142
+ delete=False,
143
+ suffix=suffix,
144
+ prefix='audio_bytes_'
145
+ )
146
+
147
+ # Write bytes data
148
+ temp_file.write(audio_bytes)
149
+ temp_file.close()
150
+
151
+ logger.info(f"Saved {len(audio_bytes)} bytes to temporary file: {temp_file.name}")
152
+ return temp_file.name
153
+
154
+ except Exception as e:
155
+ logger.error(f"Failed to save audio bytes to temporary file: {e}")
156
+ raise Exception(f"Audio bytes save failed: {e}") from e
157
+
158
+ def _cleanup_temp_file(self, file_path: str):
159
+ """Clean up temporary downloaded file"""
160
+ try:
161
+ if file_path and file_path.startswith(tempfile.gettempdir()):
162
+ os.unlink(file_path)
163
+ logger.debug(f"Cleaned up temporary file: {file_path}")
164
+ except Exception as e:
165
+ logger.warning(f"Failed to cleanup temporary file {file_path}: {e}")
7
166
 
8
167
  async def invoke(
9
168
  self,
10
- audio_input: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
169
+ audio_input: Union[str, BinaryIO, bytes, List[Union[str, BinaryIO, bytes]]],
11
170
  task: Optional[str] = None,
12
171
  **kwargs
13
172
  ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
@@ -30,33 +189,47 @@ class BaseSTTService(BaseService):
30
189
  # ==================== 语音转文本类任务 ====================
31
190
  if task == "transcribe":
32
191
  if isinstance(audio_input, list):
192
+ # Prepare all audio inputs (handle URLs)
193
+ prepared_inputs = []
194
+ for audio in audio_input:
195
+ prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
196
+ prepared_inputs.append(prepared_input)
33
197
  return await self.transcribe_batch(
34
- audio_input,
198
+ prepared_inputs,
35
199
  kwargs.get("language"),
36
200
  kwargs.get("prompt")
37
201
  )
38
202
  else:
203
+ # Prepare single audio input (handle URLs)
204
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
39
205
  return await self.transcribe(
40
- audio_input,
206
+ prepared_input,
41
207
  kwargs.get("language"),
42
208
  kwargs.get("prompt")
43
209
  )
44
210
  elif task == "translate":
45
211
  if isinstance(audio_input, list):
46
212
  raise ValueError("translate task requires single audio input")
47
- return await self.translate(audio_input)
213
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
214
+ return await self.translate(prepared_input)
48
215
  elif task == "batch_transcribe":
49
216
  if not isinstance(audio_input, list):
50
217
  audio_input = [audio_input]
218
+ # Prepare all audio inputs (handle URLs)
219
+ prepared_inputs = []
220
+ for audio in audio_input:
221
+ prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
222
+ prepared_inputs.append(prepared_input)
51
223
  return await self.transcribe_batch(
52
- audio_input,
224
+ prepared_inputs,
53
225
  kwargs.get("language"),
54
226
  kwargs.get("prompt")
55
227
  )
56
228
  elif task == "detect_language":
57
229
  if isinstance(audio_input, list):
58
230
  raise ValueError("detect_language task requires single audio input")
59
- return await self.detect_language(audio_input)
231
+ prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
232
+ return await self.detect_language(prepared_input)
60
233
  else:
61
234
  raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
62
235
 
@@ -72,7 +245,7 @@ class BaseSTTService(BaseService):
72
245
  @abstractmethod
73
246
  async def transcribe(
74
247
  self,
75
- audio_file: Union[str, BinaryIO],
248
+ audio_file: Union[str, BinaryIO, bytes],
76
249
  language: Optional[str] = None,
77
250
  prompt: Optional[str] = None
78
251
  ) -> Dict[str, Any]:
@@ -96,7 +269,7 @@ class BaseSTTService(BaseService):
96
269
  @abstractmethod
97
270
  async def translate(
98
271
  self,
99
- audio_file: Union[str, BinaryIO]
272
+ audio_file: Union[str, BinaryIO, bytes]
100
273
  ) -> Dict[str, Any]:
101
274
  """
102
275
  Translate audio file to English text
@@ -115,7 +288,7 @@ class BaseSTTService(BaseService):
115
288
  @abstractmethod
116
289
  async def transcribe_batch(
117
290
  self,
118
- audio_files: List[Union[str, BinaryIO]],
291
+ audio_files: List[Union[str, BinaryIO, bytes]],
119
292
  language: Optional[str] = None,
120
293
  prompt: Optional[str] = None
121
294
  ) -> List[Dict[str, Any]]:
@@ -133,7 +306,7 @@ class BaseSTTService(BaseService):
133
306
  pass
134
307
 
135
308
  @abstractmethod
136
- async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
309
+ async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
137
310
  """
138
311
  Detect language of audio file
139
312
 
File without changes