isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
"""
|
2
|
+
Audio Services - Speech, TTS, and Audio Processing Services
|
3
|
+
"""
|
4
|
+
|
5
|
+
from .base_stt_service import BaseSTTService
|
6
|
+
from .base_tts_service import BaseTTSService
|
7
|
+
from .base_realtime_service import BaseRealtimeService
|
8
|
+
from .openai_stt_service import OpenAISTTService
|
9
|
+
from .openai_tts_service import OpenAITTSService
|
10
|
+
from .openai_realtime_service import OpenAIRealtimeService
|
11
|
+
from .replicate_tts_service import ReplicateTTSService
|
12
|
+
|
13
|
+
__all__ = [
|
14
|
+
'BaseSTTService',
|
15
|
+
'BaseeTTSService',
|
16
|
+
'BaseRealtimeService',
|
17
|
+
'OpenAISTTService',
|
18
|
+
'OpenAITTSService',
|
19
|
+
'OpenAIRealtimeService',
|
20
|
+
'ReplicateTTSService'
|
21
|
+
]
|
@@ -0,0 +1,225 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Any, List, Union, Optional, Callable, AsyncGenerator
|
3
|
+
from enum import Enum
|
4
|
+
import asyncio
|
5
|
+
from isa_model.inference.services.base_service import BaseService
|
6
|
+
|
7
|
+
|
8
|
+
class RealtimeEventType(Enum):
|
9
|
+
"""Realtime API event types"""
|
10
|
+
# Session events
|
11
|
+
SESSION_CREATED = "session.created"
|
12
|
+
SESSION_UPDATED = "session.updated"
|
13
|
+
|
14
|
+
# Input audio events
|
15
|
+
INPUT_AUDIO_BUFFER_APPEND = "input_audio_buffer.append"
|
16
|
+
INPUT_AUDIO_BUFFER_COMMIT = "input_audio_buffer.commit"
|
17
|
+
INPUT_AUDIO_BUFFER_CLEAR = "input_audio_buffer.clear"
|
18
|
+
INPUT_AUDIO_BUFFER_COMMITTED = "input_audio_buffer.committed"
|
19
|
+
INPUT_AUDIO_BUFFER_SPEECH_STARTED = "input_audio_buffer.speech_started"
|
20
|
+
INPUT_AUDIO_BUFFER_SPEECH_STOPPED = "input_audio_buffer.speech_stopped"
|
21
|
+
|
22
|
+
# Conversation events
|
23
|
+
CONVERSATION_ITEM_CREATE = "conversation.item.create"
|
24
|
+
CONVERSATION_ITEM_CREATED = "conversation.item.created"
|
25
|
+
CONVERSATION_ITEM_DELETE = "conversation.item.delete"
|
26
|
+
CONVERSATION_ITEM_DELETED = "conversation.item.deleted"
|
27
|
+
CONVERSATION_ITEM_TRUNCATE = "conversation.item.truncate"
|
28
|
+
CONVERSATION_ITEM_TRUNCATED = "conversation.item.truncated"
|
29
|
+
|
30
|
+
# Response events
|
31
|
+
RESPONSE_CREATE = "response.create"
|
32
|
+
RESPONSE_CREATED = "response.created"
|
33
|
+
RESPONSE_DONE = "response.done"
|
34
|
+
RESPONSE_OUTPUT_ITEM_ADDED = "response.output_item.added"
|
35
|
+
RESPONSE_OUTPUT_ITEM_DONE = "response.output_item.done"
|
36
|
+
RESPONSE_CONTENT_PART_ADDED = "response.content_part.added"
|
37
|
+
RESPONSE_CONTENT_PART_DONE = "response.content_part.done"
|
38
|
+
RESPONSE_TEXT_DELTA = "response.text.delta"
|
39
|
+
RESPONSE_TEXT_DONE = "response.text.done"
|
40
|
+
RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.audio_transcript.delta"
|
41
|
+
RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.audio_transcript.done"
|
42
|
+
RESPONSE_AUDIO_DELTA = "response.audio.delta"
|
43
|
+
RESPONSE_AUDIO_DONE = "response.audio.done"
|
44
|
+
RESPONSE_FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
|
45
|
+
RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
|
46
|
+
|
47
|
+
# Rate limit events
|
48
|
+
RATE_LIMITS_UPDATED = "rate_limits.updated"
|
49
|
+
|
50
|
+
# Error events
|
51
|
+
ERROR = "error"
|
52
|
+
|
53
|
+
|
54
|
+
class BaseRealtimeService(BaseService):
|
55
|
+
"""Base class for Realtime API services"""
|
56
|
+
|
57
|
+
def __init__(self, provider_name: str, model_name: str, **kwargs):
|
58
|
+
super().__init__(provider_name, model_name, **kwargs)
|
59
|
+
self.session_id: Optional[str] = None
|
60
|
+
self.websocket = None
|
61
|
+
self.event_handlers: Dict[str, List[Callable]] = {}
|
62
|
+
self.is_connected = False
|
63
|
+
|
64
|
+
async def invoke(
|
65
|
+
self,
|
66
|
+
task: str,
|
67
|
+
**kwargs
|
68
|
+
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
|
69
|
+
"""
|
70
|
+
统一的任务分发方法 - 支持实时对话任务
|
71
|
+
|
72
|
+
Args:
|
73
|
+
task: 任务类型,支持多种实时对话任务
|
74
|
+
**kwargs: 任务特定的附加参数
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
Dict containing task results
|
78
|
+
"""
|
79
|
+
if task == "create_session":
|
80
|
+
return await self.create_session(**kwargs)
|
81
|
+
elif task == "connect":
|
82
|
+
return await self.connect_websocket(**kwargs)
|
83
|
+
elif task == "send_audio":
|
84
|
+
if not kwargs.get("audio_data"):
|
85
|
+
raise ValueError("audio_data is required for send_audio task")
|
86
|
+
return await self.send_audio_message(kwargs["audio_data"], **kwargs)
|
87
|
+
elif task == "send_text":
|
88
|
+
if not kwargs.get("text"):
|
89
|
+
raise ValueError("text is required for send_text task")
|
90
|
+
return await self.send_text_message(kwargs["text"], **kwargs)
|
91
|
+
elif task == "listen":
|
92
|
+
return await self.listen_for_responses(**kwargs)
|
93
|
+
elif task == "audio_chat":
|
94
|
+
return await self.simple_audio_chat(**kwargs)
|
95
|
+
elif task == "text_chat":
|
96
|
+
return await self.simple_text_chat(**kwargs)
|
97
|
+
else:
|
98
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
99
|
+
|
100
|
+
def get_supported_tasks(self) -> List[str]:
|
101
|
+
"""获取支持的任务列表"""
|
102
|
+
return [
|
103
|
+
"create_session", "connect", "send_audio", "send_text",
|
104
|
+
"listen", "audio_chat", "text_chat"
|
105
|
+
]
|
106
|
+
|
107
|
+
@abstractmethod
|
108
|
+
async def create_session(
|
109
|
+
self,
|
110
|
+
instructions: str = "You are a helpful assistant.",
|
111
|
+
modalities: Optional[List[str]] = None,
|
112
|
+
voice: str = "alloy",
|
113
|
+
**kwargs
|
114
|
+
) -> Dict[str, Any]:
|
115
|
+
"""Create a new realtime session"""
|
116
|
+
pass
|
117
|
+
|
118
|
+
@abstractmethod
|
119
|
+
async def connect_websocket(self, **kwargs) -> bool:
|
120
|
+
"""Connect to the realtime WebSocket"""
|
121
|
+
pass
|
122
|
+
|
123
|
+
@abstractmethod
|
124
|
+
async def send_audio_message(
|
125
|
+
self,
|
126
|
+
audio_data: bytes,
|
127
|
+
format: str = "pcm16",
|
128
|
+
**kwargs
|
129
|
+
) -> Dict[str, Any]:
|
130
|
+
"""Send audio data to the realtime session"""
|
131
|
+
pass
|
132
|
+
|
133
|
+
@abstractmethod
|
134
|
+
async def send_text_message(
|
135
|
+
self,
|
136
|
+
text: str,
|
137
|
+
**kwargs
|
138
|
+
) -> Dict[str, Any]:
|
139
|
+
"""Send text message to the realtime session"""
|
140
|
+
pass
|
141
|
+
|
142
|
+
@abstractmethod
|
143
|
+
async def listen_for_responses(
|
144
|
+
self,
|
145
|
+
message_handler: Optional[Callable] = None,
|
146
|
+
**kwargs
|
147
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
148
|
+
"""Listen for responses from the realtime session"""
|
149
|
+
pass
|
150
|
+
|
151
|
+
@abstractmethod
|
152
|
+
async def simple_audio_chat(
|
153
|
+
self,
|
154
|
+
audio_data: bytes,
|
155
|
+
instructions: str = "You are a helpful assistant. Respond in audio.",
|
156
|
+
voice: str = "alloy",
|
157
|
+
**kwargs
|
158
|
+
) -> Dict[str, Any]:
|
159
|
+
"""Simple audio chat - send audio, get audio response"""
|
160
|
+
pass
|
161
|
+
|
162
|
+
@abstractmethod
|
163
|
+
async def simple_text_chat(
|
164
|
+
self,
|
165
|
+
text: str,
|
166
|
+
instructions: str = "You are a helpful assistant.",
|
167
|
+
voice: str = "alloy",
|
168
|
+
**kwargs
|
169
|
+
) -> Dict[str, Any]:
|
170
|
+
"""Simple text chat - send text, get audio/text response"""
|
171
|
+
pass
|
172
|
+
|
173
|
+
def add_event_handler(self, event_type: Union[str, RealtimeEventType], handler: Callable):
|
174
|
+
"""Add event handler for specific event type"""
|
175
|
+
event_name = event_type.value if isinstance(event_type, RealtimeEventType) else event_type
|
176
|
+
if event_name not in self.event_handlers:
|
177
|
+
self.event_handlers[event_name] = []
|
178
|
+
self.event_handlers[event_name].append(handler)
|
179
|
+
|
180
|
+
def remove_event_handler(self, event_type: Union[str, RealtimeEventType], handler: Callable):
|
181
|
+
"""Remove event handler"""
|
182
|
+
event_name = event_type.value if isinstance(event_type, RealtimeEventType) else event_type
|
183
|
+
if event_name in self.event_handlers:
|
184
|
+
self.event_handlers[event_name].remove(handler)
|
185
|
+
|
186
|
+
async def _handle_event(self, event: Dict[str, Any]):
|
187
|
+
"""Handle incoming events"""
|
188
|
+
event_type = event.get("type")
|
189
|
+
if event_type in self.event_handlers:
|
190
|
+
for handler in self.event_handlers[event_type]:
|
191
|
+
try:
|
192
|
+
await handler(event)
|
193
|
+
except Exception as e:
|
194
|
+
import logging
|
195
|
+
logging.getLogger(__name__).error(f"Error in event handler for {event_type}: {e}")
|
196
|
+
|
197
|
+
@abstractmethod
|
198
|
+
def get_supported_voices(self) -> List[str]:
|
199
|
+
"""Get list of supported voice options"""
|
200
|
+
pass
|
201
|
+
|
202
|
+
@abstractmethod
|
203
|
+
def get_supported_formats(self) -> List[str]:
|
204
|
+
"""Get list of supported audio formats"""
|
205
|
+
pass
|
206
|
+
|
207
|
+
@abstractmethod
|
208
|
+
def get_session_limits(self) -> Dict[str, Any]:
|
209
|
+
"""Get session limits and constraints"""
|
210
|
+
pass
|
211
|
+
|
212
|
+
@abstractmethod
|
213
|
+
async def update_session(self, **kwargs) -> Dict[str, Any]:
|
214
|
+
"""Update session configuration"""
|
215
|
+
pass
|
216
|
+
|
217
|
+
@abstractmethod
|
218
|
+
async def disconnect(self):
|
219
|
+
"""Disconnect from the realtime session"""
|
220
|
+
pass
|
221
|
+
|
222
|
+
@abstractmethod
|
223
|
+
async def close(self):
|
224
|
+
"""Cleanup resources"""
|
225
|
+
pass
|
@@ -1,13 +1,172 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import Dict, Any, List, Union, Optional, BinaryIO
|
3
|
+
import aiohttp
|
4
|
+
import asyncio
|
5
|
+
import tempfile
|
6
|
+
import os
|
7
|
+
import logging
|
8
|
+
from io import BytesIO
|
3
9
|
from isa_model.inference.services.base_service import BaseService
|
4
10
|
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
5
13
|
class BaseSTTService(BaseService):
|
6
|
-
"""Base class for Speech-to-Text services with unified task dispatch"""
|
14
|
+
"""Base class for Speech-to-Text services with unified task dispatch and URL support"""
|
15
|
+
|
16
|
+
async def _prepare_audio_input(self, audio_input: Union[str, BinaryIO, bytes]) -> Union[str, BinaryIO]:
|
17
|
+
"""
|
18
|
+
Prepare audio input by handling URLs, file paths, bytes data, and file objects
|
19
|
+
|
20
|
+
Args:
|
21
|
+
audio_input: Audio input (URL, file path, bytes data, or file object)
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
Prepared audio input (local file path or file object)
|
25
|
+
"""
|
26
|
+
if isinstance(audio_input, bytes):
|
27
|
+
# Handle bytes data from API uploads
|
28
|
+
logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes)")
|
29
|
+
return await self._save_bytes_to_temp_file(audio_input)
|
30
|
+
elif isinstance(audio_input, str):
|
31
|
+
# Check if it's a URL
|
32
|
+
if audio_input.startswith(('http://', 'https://')):
|
33
|
+
logger.info(f"Downloading audio from URL: {audio_input}")
|
34
|
+
return await self._download_audio_url(audio_input)
|
35
|
+
else:
|
36
|
+
# Regular file path or base64 string
|
37
|
+
return audio_input
|
38
|
+
else:
|
39
|
+
# Already a file object
|
40
|
+
return audio_input
|
41
|
+
|
42
|
+
async def _prepare_audio_input_with_context(self, audio_input: Union[str, BinaryIO, bytes], context: Dict[str, Any]) -> Union[str, BinaryIO]:
|
43
|
+
"""
|
44
|
+
Prepare audio input with additional context from kwargs
|
45
|
+
|
46
|
+
Args:
|
47
|
+
audio_input: Audio input (URL, file path, bytes data, or file object)
|
48
|
+
context: Additional context including filename, content_type
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
Prepared audio input (local file path or file object)
|
52
|
+
"""
|
53
|
+
if isinstance(audio_input, bytes):
|
54
|
+
# Handle bytes data from API uploads
|
55
|
+
filename = context.get('filename')
|
56
|
+
content_type = context.get('content_type')
|
57
|
+
logger.info(f"Converting bytes data to temporary file ({len(audio_input)} bytes), filename={filename}, content_type={content_type}")
|
58
|
+
return await self._save_bytes_to_temp_file(audio_input, filename, content_type)
|
59
|
+
else:
|
60
|
+
return await self._prepare_audio_input(audio_input)
|
61
|
+
|
62
|
+
async def _download_audio_url(self, url: str) -> str:
|
63
|
+
"""
|
64
|
+
Download audio file from URL to temporary file
|
65
|
+
|
66
|
+
Args:
|
67
|
+
url: HTTP/HTTPS URL to audio file
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
Path to downloaded temporary file
|
71
|
+
|
72
|
+
Raises:
|
73
|
+
Exception: If download fails
|
74
|
+
"""
|
75
|
+
try:
|
76
|
+
async with aiohttp.ClientSession() as session:
|
77
|
+
async with session.get(url) as response:
|
78
|
+
if response.status != 200:
|
79
|
+
raise Exception(f"Failed to download audio: HTTP {response.status}")
|
80
|
+
|
81
|
+
# Get content type to determine file extension
|
82
|
+
content_type = response.headers.get('Content-Type', '')
|
83
|
+
file_ext = self._get_file_extension_from_content_type(content_type)
|
84
|
+
|
85
|
+
# Create temporary file
|
86
|
+
temp_file = tempfile.NamedTemporaryFile(
|
87
|
+
delete=False,
|
88
|
+
suffix=file_ext,
|
89
|
+
prefix='audio_download_'
|
90
|
+
)
|
91
|
+
|
92
|
+
# Download and save
|
93
|
+
async for chunk in response.content.iter_chunked(8192):
|
94
|
+
temp_file.write(chunk)
|
95
|
+
|
96
|
+
temp_file.close()
|
97
|
+
logger.info(f"Downloaded audio to temporary file: {temp_file.name}")
|
98
|
+
return temp_file.name
|
99
|
+
|
100
|
+
except Exception as e:
|
101
|
+
logger.error(f"Failed to download audio from URL {url}: {e}")
|
102
|
+
raise Exception(f"Audio URL download failed: {e}") from e
|
103
|
+
|
104
|
+
def _get_file_extension_from_content_type(self, content_type: str) -> str:
|
105
|
+
"""Get appropriate file extension from Content-Type header"""
|
106
|
+
content_type_map = {
|
107
|
+
'audio/mpeg': '.mp3',
|
108
|
+
'audio/mp3': '.mp3',
|
109
|
+
'audio/wav': '.wav',
|
110
|
+
'audio/wave': '.wav',
|
111
|
+
'audio/x-wav': '.wav',
|
112
|
+
'audio/flac': '.flac',
|
113
|
+
'audio/ogg': '.ogg',
|
114
|
+
'audio/m4a': '.m4a',
|
115
|
+
'audio/mp4': '.mp4',
|
116
|
+
'audio/webm': '.webm'
|
117
|
+
}
|
118
|
+
return content_type_map.get(content_type.lower(), '.audio')
|
119
|
+
|
120
|
+
async def _save_bytes_to_temp_file(self, audio_bytes: bytes, filename: Optional[str] = None, content_type: Optional[str] = None) -> str:
|
121
|
+
"""
|
122
|
+
Save audio bytes data to temporary file
|
123
|
+
|
124
|
+
Args:
|
125
|
+
audio_bytes: Audio data as bytes
|
126
|
+
filename: Optional filename to determine extension
|
127
|
+
content_type: Optional content type to determine extension
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
Path to temporary file containing audio data
|
131
|
+
"""
|
132
|
+
try:
|
133
|
+
# Determine file extension from filename or content type
|
134
|
+
suffix = '.mp3' # Default
|
135
|
+
if filename and '.' in filename:
|
136
|
+
suffix = '.' + filename.split('.')[-1]
|
137
|
+
elif content_type:
|
138
|
+
suffix = self._get_file_extension_from_content_type(content_type)
|
139
|
+
|
140
|
+
# Create temporary file with proper audio extension
|
141
|
+
temp_file = tempfile.NamedTemporaryFile(
|
142
|
+
delete=False,
|
143
|
+
suffix=suffix,
|
144
|
+
prefix='audio_bytes_'
|
145
|
+
)
|
146
|
+
|
147
|
+
# Write bytes data
|
148
|
+
temp_file.write(audio_bytes)
|
149
|
+
temp_file.close()
|
150
|
+
|
151
|
+
logger.info(f"Saved {len(audio_bytes)} bytes to temporary file: {temp_file.name}")
|
152
|
+
return temp_file.name
|
153
|
+
|
154
|
+
except Exception as e:
|
155
|
+
logger.error(f"Failed to save audio bytes to temporary file: {e}")
|
156
|
+
raise Exception(f"Audio bytes save failed: {e}") from e
|
157
|
+
|
158
|
+
def _cleanup_temp_file(self, file_path: str):
|
159
|
+
"""Clean up temporary downloaded file"""
|
160
|
+
try:
|
161
|
+
if file_path and file_path.startswith(tempfile.gettempdir()):
|
162
|
+
os.unlink(file_path)
|
163
|
+
logger.debug(f"Cleaned up temporary file: {file_path}")
|
164
|
+
except Exception as e:
|
165
|
+
logger.warning(f"Failed to cleanup temporary file {file_path}: {e}")
|
7
166
|
|
8
167
|
async def invoke(
|
9
168
|
self,
|
10
|
-
audio_input: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
|
169
|
+
audio_input: Union[str, BinaryIO, bytes, List[Union[str, BinaryIO, bytes]]],
|
11
170
|
task: Optional[str] = None,
|
12
171
|
**kwargs
|
13
172
|
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
|
@@ -30,33 +189,47 @@ class BaseSTTService(BaseService):
|
|
30
189
|
# ==================== 语音转文本类任务 ====================
|
31
190
|
if task == "transcribe":
|
32
191
|
if isinstance(audio_input, list):
|
192
|
+
# Prepare all audio inputs (handle URLs)
|
193
|
+
prepared_inputs = []
|
194
|
+
for audio in audio_input:
|
195
|
+
prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
|
196
|
+
prepared_inputs.append(prepared_input)
|
33
197
|
return await self.transcribe_batch(
|
34
|
-
|
198
|
+
prepared_inputs,
|
35
199
|
kwargs.get("language"),
|
36
200
|
kwargs.get("prompt")
|
37
201
|
)
|
38
202
|
else:
|
203
|
+
# Prepare single audio input (handle URLs)
|
204
|
+
prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
|
39
205
|
return await self.transcribe(
|
40
|
-
|
206
|
+
prepared_input,
|
41
207
|
kwargs.get("language"),
|
42
208
|
kwargs.get("prompt")
|
43
209
|
)
|
44
210
|
elif task == "translate":
|
45
211
|
if isinstance(audio_input, list):
|
46
212
|
raise ValueError("translate task requires single audio input")
|
47
|
-
|
213
|
+
prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
|
214
|
+
return await self.translate(prepared_input)
|
48
215
|
elif task == "batch_transcribe":
|
49
216
|
if not isinstance(audio_input, list):
|
50
217
|
audio_input = [audio_input]
|
218
|
+
# Prepare all audio inputs (handle URLs)
|
219
|
+
prepared_inputs = []
|
220
|
+
for audio in audio_input:
|
221
|
+
prepared_input = await self._prepare_audio_input_with_context(audio, kwargs)
|
222
|
+
prepared_inputs.append(prepared_input)
|
51
223
|
return await self.transcribe_batch(
|
52
|
-
|
224
|
+
prepared_inputs,
|
53
225
|
kwargs.get("language"),
|
54
226
|
kwargs.get("prompt")
|
55
227
|
)
|
56
228
|
elif task == "detect_language":
|
57
229
|
if isinstance(audio_input, list):
|
58
230
|
raise ValueError("detect_language task requires single audio input")
|
59
|
-
|
231
|
+
prepared_input = await self._prepare_audio_input_with_context(audio_input, kwargs)
|
232
|
+
return await self.detect_language(prepared_input)
|
60
233
|
else:
|
61
234
|
raise NotImplementedError(f"{self.__class__.__name__} does not support task: {task}")
|
62
235
|
|
@@ -72,7 +245,7 @@ class BaseSTTService(BaseService):
|
|
72
245
|
@abstractmethod
|
73
246
|
async def transcribe(
|
74
247
|
self,
|
75
|
-
audio_file: Union[str, BinaryIO],
|
248
|
+
audio_file: Union[str, BinaryIO, bytes],
|
76
249
|
language: Optional[str] = None,
|
77
250
|
prompt: Optional[str] = None
|
78
251
|
) -> Dict[str, Any]:
|
@@ -96,7 +269,7 @@ class BaseSTTService(BaseService):
|
|
96
269
|
@abstractmethod
|
97
270
|
async def translate(
|
98
271
|
self,
|
99
|
-
audio_file: Union[str, BinaryIO]
|
272
|
+
audio_file: Union[str, BinaryIO, bytes]
|
100
273
|
) -> Dict[str, Any]:
|
101
274
|
"""
|
102
275
|
Translate audio file to English text
|
@@ -115,7 +288,7 @@ class BaseSTTService(BaseService):
|
|
115
288
|
@abstractmethod
|
116
289
|
async def transcribe_batch(
|
117
290
|
self,
|
118
|
-
audio_files: List[Union[str, BinaryIO]],
|
291
|
+
audio_files: List[Union[str, BinaryIO, bytes]],
|
119
292
|
language: Optional[str] = None,
|
120
293
|
prompt: Optional[str] = None
|
121
294
|
) -> List[Dict[str, Any]]:
|
@@ -133,7 +306,7 @@ class BaseSTTService(BaseService):
|
|
133
306
|
pass
|
134
307
|
|
135
308
|
@abstractmethod
|
136
|
-
async def detect_language(self, audio_file: Union[str, BinaryIO]) -> Dict[str, Any]:
|
309
|
+
async def detect_language(self, audio_file: Union[str, BinaryIO, bytes]) -> Dict[str, Any]:
|
137
310
|
"""
|
138
311
|
Detect language of audio file
|
139
312
|
|
File without changes
|