agentscope-runtime 0.2.0b2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. agentscope_runtime/adapters/__init__.py +0 -0
  2. agentscope_runtime/adapters/agentscope/__init__.py +0 -0
  3. agentscope_runtime/adapters/agentscope/long_term_memory/__init__.py +6 -0
  4. agentscope_runtime/adapters/agentscope/long_term_memory/_long_term_memory_adapter.py +258 -0
  5. agentscope_runtime/adapters/agentscope/memory/__init__.py +6 -0
  6. agentscope_runtime/adapters/agentscope/memory/_memory_adapter.py +152 -0
  7. agentscope_runtime/adapters/agentscope/message.py +535 -0
  8. agentscope_runtime/adapters/agentscope/stream.py +506 -0
  9. agentscope_runtime/adapters/agentscope/tool/__init__.py +9 -0
  10. agentscope_runtime/adapters/agentscope/tool/sandbox_tool.py +69 -0
  11. agentscope_runtime/adapters/agentscope/tool/tool.py +233 -0
  12. agentscope_runtime/adapters/autogen/__init__.py +0 -0
  13. agentscope_runtime/adapters/autogen/tool/__init__.py +7 -0
  14. agentscope_runtime/adapters/autogen/tool/tool.py +211 -0
  15. agentscope_runtime/adapters/text/__init__.py +0 -0
  16. agentscope_runtime/adapters/text/stream.py +29 -0
  17. agentscope_runtime/common/collections/redis_mapping.py +4 -1
  18. agentscope_runtime/common/container_clients/fc_client.py +855 -0
  19. agentscope_runtime/common/utils/__init__.py +0 -0
  20. agentscope_runtime/common/utils/lazy_loader.py +57 -0
  21. agentscope_runtime/engine/__init__.py +25 -18
  22. agentscope_runtime/engine/app/agent_app.py +161 -91
  23. agentscope_runtime/engine/app/base_app.py +4 -118
  24. agentscope_runtime/engine/constant.py +8 -0
  25. agentscope_runtime/engine/deployers/__init__.py +8 -0
  26. agentscope_runtime/engine/deployers/adapter/__init__.py +2 -0
  27. agentscope_runtime/engine/deployers/adapter/a2a/a2a_adapter_utils.py +0 -21
  28. agentscope_runtime/engine/deployers/adapter/a2a/a2a_protocol_adapter.py +28 -9
  29. agentscope_runtime/engine/deployers/adapter/responses/__init__.py +2 -0
  30. agentscope_runtime/engine/deployers/adapter/responses/response_api_adapter_utils.py +5 -2
  31. agentscope_runtime/engine/deployers/adapter/responses/response_api_protocol_adapter.py +1 -1
  32. agentscope_runtime/engine/deployers/agentrun_deployer.py +2541 -0
  33. agentscope_runtime/engine/deployers/cli_fc_deploy.py +1 -1
  34. agentscope_runtime/engine/deployers/kubernetes_deployer.py +9 -21
  35. agentscope_runtime/engine/deployers/local_deployer.py +47 -74
  36. agentscope_runtime/engine/deployers/modelstudio_deployer.py +216 -50
  37. agentscope_runtime/engine/deployers/utils/app_runner_utils.py +29 -0
  38. agentscope_runtime/engine/deployers/utils/detached_app.py +510 -0
  39. agentscope_runtime/engine/deployers/utils/docker_image_utils/__init__.py +1 -1
  40. agentscope_runtime/engine/deployers/utils/docker_image_utils/dockerfile_generator.py +1 -1
  41. agentscope_runtime/engine/deployers/utils/docker_image_utils/{runner_image_factory.py → image_factory.py} +121 -61
  42. agentscope_runtime/engine/deployers/utils/package.py +693 -0
  43. agentscope_runtime/engine/deployers/utils/service_utils/__init__.py +0 -5
  44. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_factory.py +301 -282
  45. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_templates.py +2 -4
  46. agentscope_runtime/engine/deployers/utils/service_utils/process_manager.py +23 -1
  47. agentscope_runtime/engine/deployers/utils/templates/app_main.py.j2 +84 -0
  48. agentscope_runtime/engine/deployers/utils/templates/runner_main.py.j2 +95 -0
  49. agentscope_runtime/engine/deployers/utils/{service_utils → templates}/standalone_main.py.j2 +0 -45
  50. agentscope_runtime/engine/deployers/utils/wheel_packager.py +119 -18
  51. agentscope_runtime/engine/helpers/runner.py +40 -0
  52. agentscope_runtime/engine/runner.py +171 -130
  53. agentscope_runtime/engine/schemas/agent_schemas.py +114 -3
  54. agentscope_runtime/engine/schemas/modelstudio_llm.py +4 -2
  55. agentscope_runtime/engine/schemas/oai_llm.py +23 -23
  56. agentscope_runtime/engine/schemas/response_api.py +65 -0
  57. agentscope_runtime/engine/schemas/session.py +24 -0
  58. agentscope_runtime/engine/services/__init__.py +0 -9
  59. agentscope_runtime/engine/services/agent_state/__init__.py +16 -0
  60. agentscope_runtime/engine/services/agent_state/redis_state_service.py +113 -0
  61. agentscope_runtime/engine/services/agent_state/state_service.py +179 -0
  62. agentscope_runtime/engine/services/memory/__init__.py +24 -0
  63. agentscope_runtime/engine/services/{mem0_memory_service.py → memory/mem0_memory_service.py} +17 -13
  64. agentscope_runtime/engine/services/{memory_service.py → memory/memory_service.py} +28 -7
  65. agentscope_runtime/engine/services/{redis_memory_service.py → memory/redis_memory_service.py} +1 -1
  66. agentscope_runtime/engine/services/{reme_personal_memory_service.py → memory/reme_personal_memory_service.py} +9 -6
  67. agentscope_runtime/engine/services/{reme_task_memory_service.py → memory/reme_task_memory_service.py} +2 -2
  68. agentscope_runtime/engine/services/{tablestore_memory_service.py → memory/tablestore_memory_service.py} +12 -18
  69. agentscope_runtime/engine/services/sandbox/__init__.py +13 -0
  70. agentscope_runtime/engine/services/{sandbox_service.py → sandbox/sandbox_service.py} +86 -71
  71. agentscope_runtime/engine/services/session_history/__init__.py +23 -0
  72. agentscope_runtime/engine/services/{redis_session_history_service.py → session_history/redis_session_history_service.py} +3 -2
  73. agentscope_runtime/engine/services/{session_history_service.py → session_history/session_history_service.py} +44 -34
  74. agentscope_runtime/engine/services/{tablestore_session_history_service.py → session_history/tablestore_session_history_service.py} +14 -19
  75. agentscope_runtime/engine/services/utils/tablestore_service_utils.py +2 -2
  76. agentscope_runtime/engine/tracing/base.py +10 -9
  77. agentscope_runtime/engine/tracing/message_util.py +1 -1
  78. agentscope_runtime/engine/tracing/tracing_util.py +7 -2
  79. agentscope_runtime/engine/tracing/wrapper.py +49 -31
  80. agentscope_runtime/sandbox/__init__.py +10 -2
  81. agentscope_runtime/sandbox/box/agentbay/__init__.py +4 -0
  82. agentscope_runtime/sandbox/box/agentbay/agentbay_sandbox.py +559 -0
  83. agentscope_runtime/sandbox/box/base/base_sandbox.py +12 -0
  84. agentscope_runtime/sandbox/box/browser/browser_sandbox.py +115 -11
  85. agentscope_runtime/sandbox/box/cloud/__init__.py +4 -0
  86. agentscope_runtime/sandbox/box/cloud/cloud_sandbox.py +254 -0
  87. agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +66 -0
  88. agentscope_runtime/sandbox/box/gui/gui_sandbox.py +42 -0
  89. agentscope_runtime/sandbox/box/mobile/__init__.py +4 -0
  90. agentscope_runtime/sandbox/box/mobile/box/__init__.py +0 -0
  91. agentscope_runtime/sandbox/box/mobile/mobile_sandbox.py +216 -0
  92. agentscope_runtime/sandbox/box/training_box/training_box.py +2 -2
  93. agentscope_runtime/sandbox/client/http_client.py +1 -0
  94. agentscope_runtime/sandbox/enums.py +2 -0
  95. agentscope_runtime/sandbox/manager/sandbox_manager.py +15 -2
  96. agentscope_runtime/sandbox/manager/server/app.py +12 -0
  97. agentscope_runtime/sandbox/manager/server/config.py +19 -0
  98. agentscope_runtime/sandbox/model/manager_config.py +79 -2
  99. agentscope_runtime/sandbox/utils.py +0 -18
  100. agentscope_runtime/tools/RAGs/__init__.py +0 -0
  101. agentscope_runtime/tools/RAGs/modelstudio_rag.py +377 -0
  102. agentscope_runtime/tools/RAGs/modelstudio_rag_lite.py +219 -0
  103. agentscope_runtime/tools/__init__.py +119 -0
  104. agentscope_runtime/tools/_constants.py +18 -0
  105. agentscope_runtime/tools/alipay/__init__.py +4 -0
  106. agentscope_runtime/tools/alipay/base.py +334 -0
  107. agentscope_runtime/tools/alipay/payment.py +835 -0
  108. agentscope_runtime/tools/alipay/subscribe.py +551 -0
  109. agentscope_runtime/tools/base.py +264 -0
  110. agentscope_runtime/tools/cli/__init__.py +0 -0
  111. agentscope_runtime/tools/cli/modelstudio_mcp_server.py +78 -0
  112. agentscope_runtime/tools/generations/__init__.py +75 -0
  113. agentscope_runtime/tools/generations/async_image_to_video.py +350 -0
  114. agentscope_runtime/tools/generations/async_image_to_video_wan25.py +366 -0
  115. agentscope_runtime/tools/generations/async_speech_to_video.py +422 -0
  116. agentscope_runtime/tools/generations/async_text_to_video.py +320 -0
  117. agentscope_runtime/tools/generations/async_text_to_video_wan25.py +334 -0
  118. agentscope_runtime/tools/generations/image_edit.py +208 -0
  119. agentscope_runtime/tools/generations/image_edit_wan25.py +193 -0
  120. agentscope_runtime/tools/generations/image_generation.py +202 -0
  121. agentscope_runtime/tools/generations/image_generation_wan25.py +201 -0
  122. agentscope_runtime/tools/generations/image_style_repaint.py +208 -0
  123. agentscope_runtime/tools/generations/image_to_video.py +233 -0
  124. agentscope_runtime/tools/generations/qwen_image_edit.py +205 -0
  125. agentscope_runtime/tools/generations/qwen_image_generation.py +214 -0
  126. agentscope_runtime/tools/generations/qwen_text_to_speech.py +154 -0
  127. agentscope_runtime/tools/generations/speech_to_text.py +260 -0
  128. agentscope_runtime/tools/generations/speech_to_video.py +314 -0
  129. agentscope_runtime/tools/generations/text_to_video.py +221 -0
  130. agentscope_runtime/tools/mcp_wrapper.py +215 -0
  131. agentscope_runtime/tools/realtime_clients/__init__.py +13 -0
  132. agentscope_runtime/tools/realtime_clients/asr_client.py +27 -0
  133. agentscope_runtime/tools/realtime_clients/azure_asr_client.py +195 -0
  134. agentscope_runtime/tools/realtime_clients/azure_tts_client.py +383 -0
  135. agentscope_runtime/tools/realtime_clients/modelstudio_asr_client.py +151 -0
  136. agentscope_runtime/tools/realtime_clients/modelstudio_tts_client.py +199 -0
  137. agentscope_runtime/tools/realtime_clients/realtime_tool.py +55 -0
  138. agentscope_runtime/tools/realtime_clients/tts_client.py +33 -0
  139. agentscope_runtime/tools/searches/__init__.py +3 -0
  140. agentscope_runtime/tools/searches/modelstudio_search.py +877 -0
  141. agentscope_runtime/tools/searches/modelstudio_search_lite.py +310 -0
  142. agentscope_runtime/tools/utils/__init__.py +0 -0
  143. agentscope_runtime/tools/utils/api_key_util.py +45 -0
  144. agentscope_runtime/tools/utils/crypto_utils.py +99 -0
  145. agentscope_runtime/tools/utils/mcp_util.py +35 -0
  146. agentscope_runtime/version.py +1 -1
  147. {agentscope_runtime-0.2.0b2.dist-info → agentscope_runtime-1.0.0.dist-info}/METADATA +240 -168
  148. agentscope_runtime-1.0.0.dist-info/RECORD +240 -0
  149. {agentscope_runtime-0.2.0b2.dist-info → agentscope_runtime-1.0.0.dist-info}/entry_points.txt +1 -0
  150. agentscope_runtime/engine/agents/__init__.py +0 -2
  151. agentscope_runtime/engine/agents/agentscope_agent.py +0 -488
  152. agentscope_runtime/engine/agents/agno_agent.py +0 -220
  153. agentscope_runtime/engine/agents/autogen_agent.py +0 -250
  154. agentscope_runtime/engine/agents/base_agent.py +0 -29
  155. agentscope_runtime/engine/agents/langgraph_agent.py +0 -59
  156. agentscope_runtime/engine/agents/utils.py +0 -53
  157. agentscope_runtime/engine/deployers/utils/package_project_utils.py +0 -1163
  158. agentscope_runtime/engine/deployers/utils/service_utils/service_config.py +0 -75
  159. agentscope_runtime/engine/deployers/utils/service_utils/service_factory.py +0 -220
  160. agentscope_runtime/engine/helpers/helper.py +0 -179
  161. agentscope_runtime/engine/schemas/context.py +0 -54
  162. agentscope_runtime/engine/services/context_manager.py +0 -164
  163. agentscope_runtime/engine/services/environment_manager.py +0 -50
  164. agentscope_runtime/engine/services/manager.py +0 -174
  165. agentscope_runtime/engine/services/rag_service.py +0 -195
  166. agentscope_runtime/engine/services/tablestore_rag_service.py +0 -143
  167. agentscope_runtime/sandbox/tools/__init__.py +0 -12
  168. agentscope_runtime/sandbox/tools/base/__init__.py +0 -8
  169. agentscope_runtime/sandbox/tools/base/tool.py +0 -52
  170. agentscope_runtime/sandbox/tools/browser/__init__.py +0 -57
  171. agentscope_runtime/sandbox/tools/browser/tool.py +0 -597
  172. agentscope_runtime/sandbox/tools/filesystem/__init__.py +0 -32
  173. agentscope_runtime/sandbox/tools/filesystem/tool.py +0 -319
  174. agentscope_runtime/sandbox/tools/function_tool.py +0 -321
  175. agentscope_runtime/sandbox/tools/gui/__init__.py +0 -7
  176. agentscope_runtime/sandbox/tools/gui/tool.py +0 -77
  177. agentscope_runtime/sandbox/tools/mcp_tool.py +0 -195
  178. agentscope_runtime/sandbox/tools/sandbox_tool.py +0 -104
  179. agentscope_runtime/sandbox/tools/tool.py +0 -238
  180. agentscope_runtime/sandbox/tools/utils.py +0 -68
  181. agentscope_runtime-0.2.0b2.dist-info/RECORD +0 -183
  182. {agentscope_runtime-0.2.0b2.dist-info → agentscope_runtime-1.0.0.dist-info}/WHEEL +0 -0
  183. {agentscope_runtime-0.2.0b2.dist-info → agentscope_runtime-1.0.0.dist-info}/licenses/LICENSE +0 -0
  184. {agentscope_runtime-0.2.0b2.dist-info → agentscope_runtime-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,383 @@
1
+ # -*- coding: utf-8 -*-
2
+ # pylint:disable=line-too-long, arguments-renamed, unknown-option-value
3
+
4
+ import json
5
+ import logging
6
+ import os
7
+ import time
8
+ from typing import Optional, Callable, Any
9
+
10
+ import azure.cognitiveservices.speech as speech_sdk
11
+ from azure.cognitiveservices.speech import (
12
+ SpeechSynthesisEventArgs,
13
+ SpeechSynthesisVisemeEventArgs,
14
+ SpeechSynthesisWordBoundaryEventArgs,
15
+ )
16
+ from azure.cognitiveservices.speech.enums import PropertyId
17
+ from pydantic import BaseModel
18
+
19
+ from .realtime_tool import (
20
+ RealtimeState,
21
+ )
22
+ from .tts_client import TtsClient
23
+ from ...engine.schemas.realtime import AzureTtsConfig
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class AzureTtsCallbacks(BaseModel):
29
+ on_started: Optional[Callable] = None
30
+ on_complete: Optional[Callable] = None
31
+ on_canceled: Optional[Callable] = None
32
+ on_data: Optional[Callable] = None
33
+ on_synthesizing: Optional[Callable] = None
34
+
35
+
36
+ class PushStreamCallback(speech_sdk.audio.PushAudioOutputStreamCallback):
37
+ def __init__(self, on_data: Callable):
38
+ super().__init__()
39
+ self.on_data = on_data
40
+
41
+ def write(self, audio_buffer: memoryview) -> int:
42
+ data = audio_buffer.tobytes()
43
+ # logger.info(
44
+ # f"tts_on_data: length={len(data)}",
45
+ # )
46
+ self.on_data(data)
47
+ return len(data)
48
+
49
+ def close(self) -> None:
50
+ pass
51
+
52
+
53
+ class AzureTtsClient(TtsClient):
54
+ def __init__(
55
+ self,
56
+ config: AzureTtsConfig,
57
+ callbacks: AzureTtsCallbacks,
58
+ ):
59
+ super().__init__(config, callbacks)
60
+ self.tts_request_id = None
61
+ self.first_request_time = None
62
+ self.is_first_audio_data = True
63
+ self.data_index = 0
64
+ self.pre_warmed = False
65
+
66
+ region = config.region if config.region else os.getenv("AZURE_REGION")
67
+ speech_config = speech_sdk.SpeechConfig(
68
+ subscription=config.key if config.key else os.getenv("AZURE_KEY"),
69
+ endpoint=f"wss://{region}.tts.speech.microsoft.com/cognitiveservices/websocket/v2", # noqa
70
+ )
71
+ speech_config.speech_synthesis_voice_name = config.voice
72
+ speech_config.set_speech_synthesis_output_format(
73
+ AzureTtsClient.config_to_format(config),
74
+ )
75
+ speech_config.set_property(
76
+ speech_sdk.PropertyId.SpeechSynthesis_FrameTimeoutInterval,
77
+ "100000000",
78
+ )
79
+ speech_config.set_property(
80
+ speech_sdk.PropertyId.SpeechSynthesis_RtfTimeoutThreshold,
81
+ "100000000",
82
+ )
83
+
84
+ push_stream = speech_sdk.audio.PushAudioOutputStream(
85
+ PushStreamCallback(self.on_data),
86
+ )
87
+ audio_config = speech_sdk.audio.AudioOutputConfig(stream=push_stream)
88
+
89
+ self.synthesizer = speech_sdk.SpeechSynthesizer(
90
+ speech_config=speech_config,
91
+ audio_config=audio_config,
92
+ )
93
+
94
+ self.synthesizer.synthesis_started.connect(self.on_started)
95
+ self.synthesizer.synthesis_completed.connect(self.on_complete)
96
+ self.synthesizer.synthesis_canceled.connect(self.on_canceled)
97
+ self.synthesizer.synthesizing.connect(self.on_synthesizing)
98
+ self.synthesizer.viseme_received.connect(self.on_viseme_received)
99
+ self.synthesizer.synthesis_word_boundary.connect(self.on_word_boundary)
100
+
101
+ self.tts_request = None
102
+ self.tts_task = None
103
+
104
+ logger.info(
105
+ f"azure_tts_config: {json.dumps(self.config.model_dump())}",
106
+ )
107
+
108
+ self.state = RealtimeState.IDLE
109
+
110
+ def start(self, **kwargs: Any) -> None:
111
+ if self.state == RealtimeState.RUNNING:
112
+ return
113
+
114
+ begin_time = int(time.time() * 1000)
115
+ logger.info(
116
+ f"tts_start begin: chat_id={self.config.chat_id},"
117
+ f" object={id(self)}",
118
+ )
119
+ self.tts_request_id = None
120
+ self.first_request_time = None
121
+ self.is_first_audio_data = True
122
+ self.data_index = 0
123
+
124
+ connection = speech_sdk.Connection.from_speech_synthesizer(
125
+ self.synthesizer,
126
+ )
127
+ connection.open(True)
128
+
129
+ self.tts_request = speech_sdk.SpeechSynthesisRequest(
130
+ input_type=speech_sdk.SpeechSynthesisRequestInputType.TextStream,
131
+ )
132
+ self.tts_task = self.synthesizer.speak_async(self.tts_request)
133
+
134
+ self.state = RealtimeState.RUNNING
135
+
136
+ logger.info(
137
+ f"tts_start end: chat_id={self.config.chat_id},"
138
+ f" cost={int(time.time() * 1000) - begin_time}, object={id(self)}",
139
+ )
140
+
141
+ def stop(self, **kwargs: Any) -> None:
142
+ if self.state == RealtimeState.IDLE:
143
+ return
144
+
145
+ logger.info(
146
+ f"tts_stop begin: chat_id={self.config.chat_id},"
147
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
148
+ )
149
+
150
+ try:
151
+ self.tts_request.input_stream.close()
152
+ except Exception as e:
153
+ logger.warning(f"Error closing TTS input stream: {e}")
154
+
155
+ # Use try-except to safely wait for tasks completion
156
+ try:
157
+ self.wait_all_tasks_completed()
158
+ except Exception as e:
159
+ logger.warning(
160
+ f"Error waiting for TTS tasks: {e}",
161
+ )
162
+
163
+ logger.info(
164
+ f"tts_stop end: chat_id={self.config.chat_id},"
165
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
166
+ )
167
+
168
+ def async_stop(self, **kwargs: Any) -> None:
169
+ logger.info(
170
+ f"tts_async_stop begin: chat_id={self.config.chat_id},"
171
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
172
+ )
173
+
174
+ if self.state == RealtimeState.IDLE:
175
+ return
176
+
177
+ try:
178
+ self.tts_request.input_stream.close()
179
+ except Exception as e:
180
+ logger.warning(f"Error closing TTS input stream: {e}")
181
+
182
+ # Don't wait for tasks in a new thread to avoid Azure SDK
183
+ # thread-safety issues. The synthesizer will handle cleanup.
184
+
185
+ logger.info(
186
+ f"tts_async_stop end: chat_id={self.config.chat_id},"
187
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
188
+ )
189
+
190
+ def close(self, **kwargs: Any) -> None:
191
+ logger.info(
192
+ f"tts_close begin: chat_id={self.config.chat_id},"
193
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
194
+ )
195
+ if self.state == RealtimeState.IDLE:
196
+ return
197
+
198
+ try:
199
+ self.tts_request.input_stream.close()
200
+ except Exception as e:
201
+ logger.warning(f"Error closing TTS input stream: {e}")
202
+
203
+ try:
204
+ self.synthesizer.stop_speaking_async()
205
+ except Exception as e:
206
+ logger.warning(f"Error stopping TTS synthesizer: {e}")
207
+
208
+ logger.info(
209
+ f"tts_close end: chat_id={self.config.chat_id},"
210
+ f" tts_request_id={self.tts_request_id}, object={id(self)}",
211
+ )
212
+
213
+ def send_text_data(self, text: str) -> None:
214
+ if not text:
215
+ return
216
+
217
+ logger.info(f"send_text_data: {text}")
218
+
219
+ if self.first_request_time is None:
220
+ self.first_request_time = int(round(time.time() * 1000))
221
+
222
+ self.tts_request.input_stream.write(text)
223
+
224
+ def wait_all_tasks_completed(self) -> None:
225
+ if self.tts_task is not None:
226
+ result = self.tts_task.get()
227
+ properties = result.properties
228
+ prop_id = (
229
+ PropertyId.SpeechServiceResponse_SynthesisFirstByteLatencyMs
230
+ )
231
+ logger.info(
232
+ f"tts stats: first_byte_client_latency: "
233
+ f"{int(properties.get_property(prop_id))}",
234
+ # noqa
235
+ )
236
+ prop_id = PropertyId.SpeechServiceResponse_SynthesisFinishLatencyMs
237
+ logger.info(
238
+ f"tts stats: finished_client_latency: "
239
+ f"{int(properties.get_property(prop_id))}",
240
+ # noqa
241
+ )
242
+ prop_id = (
243
+ PropertyId.SpeechServiceResponse_SynthesisNetworkLatencyMs
244
+ )
245
+ logger.info(
246
+ f"tts stats: network_latency: "
247
+ f"{int(properties.get_property(prop_id))}",
248
+ # noqa
249
+ )
250
+ prop_id = (
251
+ PropertyId.SpeechServiceResponse_SynthesisServiceLatencyMs
252
+ )
253
+ logger.info(
254
+ f"tts stats: first_byte_service_latency: "
255
+ f"{int(properties.get_property(prop_id))}",
256
+ # noqa
257
+ )
258
+
259
+ def on_started(self, event: SpeechSynthesisEventArgs) -> None:
260
+ self.state = RealtimeState.RUNNING
261
+ logger.info(
262
+ f"tts_on_started: chat_id={self.config.chat_id},"
263
+ f" object={id(self)}, event={event} ",
264
+ )
265
+
266
+ self.tts_request_id = event.result.result_id
267
+
268
+ if self.callbacks and self.callbacks.on_started:
269
+ self.callbacks.on_started()
270
+
271
+ def on_complete(self, event: SpeechSynthesisEventArgs) -> None:
272
+ self.state = RealtimeState.IDLE
273
+ logger.info(
274
+ f"tts_on_complete: chat_id={self.config.chat_id},"
275
+ f" object={id(self)}, event={event} ",
276
+ )
277
+
278
+ if self.callbacks and self.callbacks.on_complete:
279
+ self.callbacks.on_complete(self.config.chat_id)
280
+
281
+ def on_canceled(self, event: SpeechSynthesisEventArgs) -> None:
282
+ self.state = RealtimeState.IDLE
283
+ logger.info(
284
+ f"tts_on_canceled: chat_id={self.config.chat_id},"
285
+ f" object={id(self)}, event={event} ",
286
+ )
287
+ details = speech_sdk.SpeechSynthesisCancellationDetails(event.result)
288
+ logger.info(
289
+ f"tts_cancellation_details: reason={details.reason},"
290
+ f" error_code={details.error_code},"
291
+ f" error_details={details.error_details}, ",
292
+ )
293
+ if self.callbacks and self.callbacks.on_canceled:
294
+ self.callbacks.on_canceled()
295
+
296
+ def on_synthesizing(self, event: SpeechSynthesisEventArgs) -> None:
297
+ # logger.info(
298
+ # f"tts_on_synthesizing: event={event} "
299
+ # )
300
+
301
+ if self.callbacks and self.callbacks.on_synthesizing:
302
+ self.callbacks.on_synthesizing(event)
303
+
304
+ def on_viseme_received(
305
+ self,
306
+ event: SpeechSynthesisVisemeEventArgs,
307
+ ) -> None:
308
+ # logger.info(
309
+ # f"tts_on_viseme_received: event={event} "
310
+ # )
311
+
312
+ if self.callbacks and self.callbacks.on_viseme_received:
313
+ self.callbacks.on_viseme_received(event)
314
+
315
+ def on_word_boundary(
316
+ self,
317
+ event: SpeechSynthesisWordBoundaryEventArgs,
318
+ ) -> None:
319
+ if self.callbacks and self.callbacks.on_word_boundary:
320
+ self.callbacks.on_word_boundary(event)
321
+
322
+ def on_data(self, data: bytes) -> None:
323
+ if (
324
+ self.is_first_audio_data is True
325
+ and self.first_request_time is not None
326
+ ):
327
+ logger.info(
328
+ f"tts_first_delay: "
329
+ f"chat_id={self.config.chat_id}, object={id(self)},"
330
+ f" delay="
331
+ f"{int(round(time.time() * 1000)) - self.first_request_time}",
332
+ )
333
+ self.is_first_audio_data = False
334
+
335
+ if self.callbacks and self.callbacks.on_data:
336
+ self.callbacks.on_data(data, self.config.chat_id, self.data_index)
337
+
338
+ self.data_index += 1
339
+
340
+ @staticmethod
341
+ def config_to_format(
342
+ config: AzureTtsConfig,
343
+ ) -> speech_sdk.SpeechSynthesisOutputFormat:
344
+ """
345
+ Convert custom TTS configuration to Azure Speech SDK's
346
+ SpeechSynthesisOutputFormat
347
+ """
348
+
349
+ if config.format and config.format.lower() != "pcm":
350
+ raise ValueError(
351
+ f"Unsupported format: {config.format}."
352
+ f" Only 'pcm' is supported in raw mode.",
353
+ )
354
+
355
+ if (
356
+ config.sample_rate == 8000
357
+ and config.bits_per_sample == 16
358
+ and config.nb_channels == 1
359
+ ):
360
+ return speech_sdk.SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm
361
+
362
+ elif (
363
+ config.sample_rate == 16000
364
+ and config.bits_per_sample == 16
365
+ and config.nb_channels == 1
366
+ ):
367
+ return speech_sdk.SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm
368
+
369
+ elif (
370
+ config.sample_rate == 24000
371
+ and config.bits_per_sample == 16
372
+ and config.nb_channels == 1
373
+ ):
374
+ return speech_sdk.SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm
375
+
376
+ elif (
377
+ config.sample_rate == 48000
378
+ and config.bits_per_sample == 16
379
+ and config.nb_channels == 1
380
+ ):
381
+ return speech_sdk.SpeechSynthesisOutputFormat.Raw48Khz16BitMonoPcm
382
+
383
+ return speech_sdk.SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm
@@ -0,0 +1,151 @@
1
+ # -*- coding: utf-8 -*-
2
+ # pylint:disable=logging-not-lazy, f-string-without-interpolation
3
+ # pylint:disable=consider-using-f-string, unused-argument
4
+
5
+ import json
6
+ import logging
7
+ from typing import Optional, Callable, Any
8
+
9
+ from dashscope.audio.asr import (
10
+ TranslationRecognizerCallback,
11
+ TranslationRecognizerRealtime,
12
+ TranscriptionResult,
13
+ TranslationResult,
14
+ )
15
+ from pydantic import BaseModel
16
+
17
+ from .asr_client import AsrClient
18
+ from .realtime_tool import (
19
+ RealtimeState,
20
+ )
21
+ from ...engine.schemas.realtime import ModelstudioAsrConfig
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class ModelstudioAsrCallbacks(BaseModel):
27
+ on_open: Optional[Callable] = None
28
+ on_complete: Optional[Callable] = None
29
+ on_error: Optional[Callable] = None
30
+ on_close: Optional[Callable] = None
31
+ on_event: Optional[Callable] = None
32
+
33
+
34
+ class ModelstudioAsrClient(AsrClient, TranslationRecognizerCallback):
35
+ def __init__(
36
+ self,
37
+ config: ModelstudioAsrConfig,
38
+ callbacks: ModelstudioAsrCallbacks,
39
+ ):
40
+ super().__init__(config, callbacks)
41
+ self.asr_request_id = None
42
+ self.is_first_audio_data = True
43
+ self.recognition = TranslationRecognizerRealtime(
44
+ model=config.model,
45
+ format=config.format,
46
+ sample_rate=config.sample_rate,
47
+ callback=self,
48
+ )
49
+ self.callbacks = callbacks
50
+ self.state = RealtimeState.IDLE
51
+
52
+ logger.info(
53
+ f"modelstudio_asr_config: {json.dumps(self.config.model_dump())}",
54
+ )
55
+
56
+ def start(self, **kwargs: Any) -> None:
57
+ logger.info(
58
+ "asr_start: max_end_silence=%d"
59
+ % self.config.fast_vad_min_duration,
60
+ )
61
+ self.recognition.start(
62
+ max_end_silence=(
63
+ self.config.fast_vad_min_duration
64
+ if self.config.fast_vad_min_duration is not None
65
+ else self.config.max_end_silence
66
+ ),
67
+ )
68
+
69
+ def stop(self, **kwargs: Any) -> None:
70
+ if self.state == RealtimeState.IDLE:
71
+ return
72
+
73
+ self.state = RealtimeState.IDLE
74
+
75
+ logger.info(f"asr_stop: asr_request_id={self.asr_request_id}")
76
+
77
+ self.recognition.stop()
78
+
79
+ def close(self, **kwargs: Any) -> None:
80
+ self.callbacks = None
81
+ if self.state == RealtimeState.IDLE:
82
+ return
83
+
84
+ self.state = RealtimeState.IDLE
85
+
86
+ logger.info(f"asr_close: asr_request_id={self.asr_request_id}")
87
+
88
+ self.recognition.stop()
89
+
90
+ def send_audio_data(self, data: bytes) -> None:
91
+ if self.state == RealtimeState.IDLE:
92
+ return
93
+
94
+ self.recognition.send_audio_frame(data)
95
+
96
+ def on_open(self) -> None:
97
+ self.state = RealtimeState.RUNNING
98
+
99
+ logger.info("asr_on_open")
100
+
101
+ if self.callbacks and self.callbacks.on_open:
102
+ self.callbacks.on_open()
103
+
104
+ def on_complete(self) -> None:
105
+ self.state = RealtimeState.IDLE
106
+
107
+ logger.info(f"asr_on_complete: asr_request_id={self.asr_request_id}")
108
+
109
+ if self.callbacks and self.callbacks.on_complete:
110
+ self.callbacks.on_complete()
111
+
112
+ def on_error(self, message: Any) -> None:
113
+ self.state = RealtimeState.IDLE
114
+
115
+ logger.error(
116
+ f"asr_on_on_error: asr_request_id={self.asr_request_id}, "
117
+ f"message={message}",
118
+ )
119
+
120
+ if self.callbacks and self.callbacks.on_error:
121
+ self.callbacks.on_error(message)
122
+
123
+ def on_close(self) -> None:
124
+ self.state = RealtimeState.IDLE
125
+
126
+ logger.info(f"asr_on_close: asr_request_id={self.asr_request_id}")
127
+
128
+ if self.callbacks and self.callbacks.on_close:
129
+ self.callbacks.on_close()
130
+
131
+ def on_event(
132
+ self,
133
+ request_id: str,
134
+ transcription_result: TranscriptionResult,
135
+ translation_result: TranslationResult,
136
+ usage: Any,
137
+ ) -> None:
138
+ # logger.info(
139
+ # f"asr_on_event: asr_request_id={self.asr_request_id}, "
140
+ # f"message={str(transcription_result)}",
141
+ # )
142
+
143
+ asr_request_id = request_id
144
+ sentence_end = transcription_result.is_sentence_end
145
+ sentence_text = transcription_result.text
146
+
147
+ if self.asr_request_id != asr_request_id:
148
+ self.asr_request_id = asr_request_id
149
+
150
+ if self.callbacks and self.callbacks.on_event:
151
+ self.callbacks.on_event(sentence_end, sentence_text)