agentscope-runtime 0.2.0b1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. agentscope_runtime/adapters/__init__.py +0 -0
  2. agentscope_runtime/adapters/agentscope/__init__.py +0 -0
  3. agentscope_runtime/adapters/agentscope/long_term_memory/__init__.py +6 -0
  4. agentscope_runtime/adapters/agentscope/long_term_memory/_long_term_memory_adapter.py +258 -0
  5. agentscope_runtime/adapters/agentscope/memory/__init__.py +6 -0
  6. agentscope_runtime/adapters/agentscope/memory/_memory_adapter.py +152 -0
  7. agentscope_runtime/adapters/agentscope/message.py +535 -0
  8. agentscope_runtime/adapters/agentscope/stream.py +506 -0
  9. agentscope_runtime/adapters/agentscope/tool/__init__.py +9 -0
  10. agentscope_runtime/adapters/agentscope/tool/sandbox_tool.py +69 -0
  11. agentscope_runtime/adapters/agentscope/tool/tool.py +233 -0
  12. agentscope_runtime/adapters/autogen/__init__.py +0 -0
  13. agentscope_runtime/adapters/autogen/tool/__init__.py +7 -0
  14. agentscope_runtime/adapters/autogen/tool/tool.py +211 -0
  15. agentscope_runtime/adapters/text/__init__.py +0 -0
  16. agentscope_runtime/adapters/text/stream.py +29 -0
  17. agentscope_runtime/common/collections/redis_mapping.py +4 -1
  18. agentscope_runtime/common/container_clients/fc_client.py +855 -0
  19. agentscope_runtime/common/container_clients/kubernetes_client.py +6 -13
  20. agentscope_runtime/common/utils/__init__.py +0 -0
  21. agentscope_runtime/common/utils/lazy_loader.py +57 -0
  22. agentscope_runtime/engine/__init__.py +25 -18
  23. agentscope_runtime/engine/app/agent_app.py +161 -91
  24. agentscope_runtime/engine/app/base_app.py +4 -118
  25. agentscope_runtime/engine/constant.py +8 -0
  26. agentscope_runtime/engine/deployers/__init__.py +8 -0
  27. agentscope_runtime/engine/deployers/adapter/__init__.py +2 -0
  28. agentscope_runtime/engine/deployers/adapter/a2a/a2a_adapter_utils.py +0 -21
  29. agentscope_runtime/engine/deployers/adapter/a2a/a2a_protocol_adapter.py +28 -9
  30. agentscope_runtime/engine/deployers/adapter/responses/__init__.py +2 -0
  31. agentscope_runtime/engine/deployers/adapter/responses/response_api_adapter_utils.py +5 -2
  32. agentscope_runtime/engine/deployers/adapter/responses/response_api_protocol_adapter.py +1 -1
  33. agentscope_runtime/engine/deployers/agentrun_deployer.py +2541 -0
  34. agentscope_runtime/engine/deployers/cli_fc_deploy.py +1 -1
  35. agentscope_runtime/engine/deployers/kubernetes_deployer.py +9 -21
  36. agentscope_runtime/engine/deployers/local_deployer.py +47 -74
  37. agentscope_runtime/engine/deployers/modelstudio_deployer.py +216 -50
  38. agentscope_runtime/engine/deployers/utils/app_runner_utils.py +29 -0
  39. agentscope_runtime/engine/deployers/utils/detached_app.py +510 -0
  40. agentscope_runtime/engine/deployers/utils/docker_image_utils/__init__.py +1 -1
  41. agentscope_runtime/engine/deployers/utils/docker_image_utils/dockerfile_generator.py +1 -1
  42. agentscope_runtime/engine/deployers/utils/docker_image_utils/{runner_image_factory.py → image_factory.py} +121 -61
  43. agentscope_runtime/engine/deployers/utils/package.py +693 -0
  44. agentscope_runtime/engine/deployers/utils/service_utils/__init__.py +0 -5
  45. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_factory.py +301 -282
  46. agentscope_runtime/engine/deployers/utils/service_utils/fastapi_templates.py +2 -4
  47. agentscope_runtime/engine/deployers/utils/service_utils/process_manager.py +23 -1
  48. agentscope_runtime/engine/deployers/utils/templates/app_main.py.j2 +84 -0
  49. agentscope_runtime/engine/deployers/utils/templates/runner_main.py.j2 +95 -0
  50. agentscope_runtime/engine/deployers/utils/{service_utils → templates}/standalone_main.py.j2 +0 -45
  51. agentscope_runtime/engine/deployers/utils/wheel_packager.py +119 -18
  52. agentscope_runtime/engine/helpers/runner.py +40 -0
  53. agentscope_runtime/engine/runner.py +171 -130
  54. agentscope_runtime/engine/schemas/agent_schemas.py +114 -3
  55. agentscope_runtime/engine/schemas/modelstudio_llm.py +4 -2
  56. agentscope_runtime/engine/schemas/oai_llm.py +23 -23
  57. agentscope_runtime/engine/schemas/response_api.py +65 -0
  58. agentscope_runtime/engine/schemas/session.py +24 -0
  59. agentscope_runtime/engine/services/__init__.py +0 -9
  60. agentscope_runtime/engine/services/agent_state/__init__.py +16 -0
  61. agentscope_runtime/engine/services/agent_state/redis_state_service.py +113 -0
  62. agentscope_runtime/engine/services/agent_state/state_service.py +179 -0
  63. agentscope_runtime/engine/services/memory/__init__.py +24 -0
  64. agentscope_runtime/engine/services/{mem0_memory_service.py → memory/mem0_memory_service.py} +17 -13
  65. agentscope_runtime/engine/services/{memory_service.py → memory/memory_service.py} +28 -7
  66. agentscope_runtime/engine/services/{redis_memory_service.py → memory/redis_memory_service.py} +1 -1
  67. agentscope_runtime/engine/services/{reme_personal_memory_service.py → memory/reme_personal_memory_service.py} +9 -6
  68. agentscope_runtime/engine/services/{reme_task_memory_service.py → memory/reme_task_memory_service.py} +2 -2
  69. agentscope_runtime/engine/services/{tablestore_memory_service.py → memory/tablestore_memory_service.py} +16 -19
  70. agentscope_runtime/engine/services/sandbox/__init__.py +13 -0
  71. agentscope_runtime/engine/services/{sandbox_service.py → sandbox/sandbox_service.py} +86 -71
  72. agentscope_runtime/engine/services/session_history/__init__.py +23 -0
  73. agentscope_runtime/engine/services/{redis_session_history_service.py → session_history/redis_session_history_service.py} +3 -2
  74. agentscope_runtime/engine/services/{session_history_service.py → session_history/session_history_service.py} +44 -34
  75. agentscope_runtime/engine/services/{tablestore_session_history_service.py → session_history/tablestore_session_history_service.py} +14 -19
  76. agentscope_runtime/engine/services/utils/tablestore_service_utils.py +2 -2
  77. agentscope_runtime/engine/tracing/base.py +10 -9
  78. agentscope_runtime/engine/tracing/message_util.py +1 -1
  79. agentscope_runtime/engine/tracing/tracing_util.py +7 -2
  80. agentscope_runtime/engine/tracing/wrapper.py +49 -31
  81. agentscope_runtime/sandbox/__init__.py +10 -2
  82. agentscope_runtime/sandbox/box/agentbay/__init__.py +4 -0
  83. agentscope_runtime/sandbox/box/agentbay/agentbay_sandbox.py +559 -0
  84. agentscope_runtime/sandbox/box/base/base_sandbox.py +12 -0
  85. agentscope_runtime/sandbox/box/browser/browser_sandbox.py +115 -11
  86. agentscope_runtime/sandbox/box/cloud/__init__.py +4 -0
  87. agentscope_runtime/sandbox/box/cloud/cloud_sandbox.py +254 -0
  88. agentscope_runtime/sandbox/box/filesystem/filesystem_sandbox.py +66 -0
  89. agentscope_runtime/sandbox/box/gui/gui_sandbox.py +42 -0
  90. agentscope_runtime/sandbox/box/mobile/__init__.py +4 -0
  91. agentscope_runtime/sandbox/box/mobile/box/__init__.py +0 -0
  92. agentscope_runtime/sandbox/box/mobile/mobile_sandbox.py +216 -0
  93. agentscope_runtime/sandbox/box/training_box/training_box.py +2 -44
  94. agentscope_runtime/sandbox/client/http_client.py +1 -0
  95. agentscope_runtime/sandbox/enums.py +2 -1
  96. agentscope_runtime/sandbox/manager/sandbox_manager.py +15 -2
  97. agentscope_runtime/sandbox/manager/server/app.py +12 -0
  98. agentscope_runtime/sandbox/manager/server/config.py +19 -0
  99. agentscope_runtime/sandbox/model/manager_config.py +79 -2
  100. agentscope_runtime/sandbox/utils.py +0 -18
  101. agentscope_runtime/tools/RAGs/__init__.py +0 -0
  102. agentscope_runtime/tools/RAGs/modelstudio_rag.py +377 -0
  103. agentscope_runtime/tools/RAGs/modelstudio_rag_lite.py +219 -0
  104. agentscope_runtime/tools/__init__.py +119 -0
  105. agentscope_runtime/tools/_constants.py +18 -0
  106. agentscope_runtime/tools/alipay/__init__.py +4 -0
  107. agentscope_runtime/tools/alipay/base.py +334 -0
  108. agentscope_runtime/tools/alipay/payment.py +835 -0
  109. agentscope_runtime/tools/alipay/subscribe.py +551 -0
  110. agentscope_runtime/tools/base.py +264 -0
  111. agentscope_runtime/tools/cli/__init__.py +0 -0
  112. agentscope_runtime/tools/cli/modelstudio_mcp_server.py +78 -0
  113. agentscope_runtime/tools/generations/__init__.py +75 -0
  114. agentscope_runtime/tools/generations/async_image_to_video.py +350 -0
  115. agentscope_runtime/tools/generations/async_image_to_video_wan25.py +366 -0
  116. agentscope_runtime/tools/generations/async_speech_to_video.py +422 -0
  117. agentscope_runtime/tools/generations/async_text_to_video.py +320 -0
  118. agentscope_runtime/tools/generations/async_text_to_video_wan25.py +334 -0
  119. agentscope_runtime/tools/generations/image_edit.py +208 -0
  120. agentscope_runtime/tools/generations/image_edit_wan25.py +193 -0
  121. agentscope_runtime/tools/generations/image_generation.py +202 -0
  122. agentscope_runtime/tools/generations/image_generation_wan25.py +201 -0
  123. agentscope_runtime/tools/generations/image_style_repaint.py +208 -0
  124. agentscope_runtime/tools/generations/image_to_video.py +233 -0
  125. agentscope_runtime/tools/generations/qwen_image_edit.py +205 -0
  126. agentscope_runtime/tools/generations/qwen_image_generation.py +214 -0
  127. agentscope_runtime/tools/generations/qwen_text_to_speech.py +154 -0
  128. agentscope_runtime/tools/generations/speech_to_text.py +260 -0
  129. agentscope_runtime/tools/generations/speech_to_video.py +314 -0
  130. agentscope_runtime/tools/generations/text_to_video.py +221 -0
  131. agentscope_runtime/tools/mcp_wrapper.py +215 -0
  132. agentscope_runtime/tools/realtime_clients/__init__.py +13 -0
  133. agentscope_runtime/tools/realtime_clients/asr_client.py +27 -0
  134. agentscope_runtime/tools/realtime_clients/azure_asr_client.py +195 -0
  135. agentscope_runtime/tools/realtime_clients/azure_tts_client.py +383 -0
  136. agentscope_runtime/tools/realtime_clients/modelstudio_asr_client.py +151 -0
  137. agentscope_runtime/tools/realtime_clients/modelstudio_tts_client.py +199 -0
  138. agentscope_runtime/tools/realtime_clients/realtime_tool.py +55 -0
  139. agentscope_runtime/tools/realtime_clients/tts_client.py +33 -0
  140. agentscope_runtime/tools/searches/__init__.py +3 -0
  141. agentscope_runtime/tools/searches/modelstudio_search.py +877 -0
  142. agentscope_runtime/tools/searches/modelstudio_search_lite.py +310 -0
  143. agentscope_runtime/tools/utils/__init__.py +0 -0
  144. agentscope_runtime/tools/utils/api_key_util.py +45 -0
  145. agentscope_runtime/tools/utils/crypto_utils.py +99 -0
  146. agentscope_runtime/tools/utils/mcp_util.py +35 -0
  147. agentscope_runtime/version.py +1 -1
  148. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/METADATA +244 -168
  149. agentscope_runtime-1.0.0.dist-info/RECORD +240 -0
  150. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/entry_points.txt +1 -0
  151. agentscope_runtime/engine/agents/__init__.py +0 -2
  152. agentscope_runtime/engine/agents/agentscope_agent.py +0 -488
  153. agentscope_runtime/engine/agents/agno_agent.py +0 -222
  154. agentscope_runtime/engine/agents/autogen_agent.py +0 -250
  155. agentscope_runtime/engine/agents/base_agent.py +0 -29
  156. agentscope_runtime/engine/agents/langgraph_agent.py +0 -59
  157. agentscope_runtime/engine/agents/utils.py +0 -53
  158. agentscope_runtime/engine/deployers/utils/package_project_utils.py +0 -1163
  159. agentscope_runtime/engine/deployers/utils/service_utils/service_config.py +0 -75
  160. agentscope_runtime/engine/deployers/utils/service_utils/service_factory.py +0 -220
  161. agentscope_runtime/engine/helpers/helper.py +0 -179
  162. agentscope_runtime/engine/schemas/context.py +0 -54
  163. agentscope_runtime/engine/services/context_manager.py +0 -164
  164. agentscope_runtime/engine/services/environment_manager.py +0 -50
  165. agentscope_runtime/engine/services/manager.py +0 -174
  166. agentscope_runtime/engine/services/rag_service.py +0 -195
  167. agentscope_runtime/engine/services/tablestore_rag_service.py +0 -143
  168. agentscope_runtime/sandbox/tools/__init__.py +0 -12
  169. agentscope_runtime/sandbox/tools/base/__init__.py +0 -8
  170. agentscope_runtime/sandbox/tools/base/tool.py +0 -52
  171. agentscope_runtime/sandbox/tools/browser/__init__.py +0 -57
  172. agentscope_runtime/sandbox/tools/browser/tool.py +0 -597
  173. agentscope_runtime/sandbox/tools/filesystem/__init__.py +0 -32
  174. agentscope_runtime/sandbox/tools/filesystem/tool.py +0 -319
  175. agentscope_runtime/sandbox/tools/function_tool.py +0 -321
  176. agentscope_runtime/sandbox/tools/gui/__init__.py +0 -7
  177. agentscope_runtime/sandbox/tools/gui/tool.py +0 -77
  178. agentscope_runtime/sandbox/tools/mcp_tool.py +0 -195
  179. agentscope_runtime/sandbox/tools/sandbox_tool.py +0 -104
  180. agentscope_runtime/sandbox/tools/tool.py +0 -238
  181. agentscope_runtime/sandbox/tools/utils.py +0 -68
  182. agentscope_runtime-0.2.0b1.dist-info/RECORD +0 -183
  183. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/WHEEL +0 -0
  184. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/licenses/LICENSE +0 -0
  185. {agentscope_runtime-0.2.0b1.dist-info → agentscope_runtime-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,260 @@
1
+ # -*- coding: utf-8 -*-
2
+ # pylint:disable=abstract-method, too-many-branches
3
+ # pylint:disable=too-many-statements
4
+
5
+ import asyncio
6
+ import os
7
+ import time
8
+ import uuid
9
+ from http import HTTPStatus
10
+ from typing import Any, Optional
11
+
12
+ import requests
13
+ from dashscope.audio.asr import Transcription
14
+ from dashscope.common.constants import TaskStatus
15
+ from mcp.server.fastmcp import Context
16
+ from pydantic import BaseModel, Field
17
+
18
+ from ..base import Tool
19
+ from ..utils.api_key_util import get_api_key, ApiNames
20
+ from ...engine.tracing import trace, TracingUtil
21
+
22
+
23
+ class SpeechToTextInput(BaseModel):
24
+ """
25
+ Speech to text transcription input model
26
+ """
27
+
28
+ file_urls: list[str] = Field(
29
+ ...,
30
+ description="音频文件的URL列表,支持公网可访问的HTTPS/HTTP链接",
31
+ )
32
+ language_hints: Optional[list[str]] = Field(
33
+ default=None,
34
+ description="指定待识别语音的语言代码。该参数仅适用于paraformer-v2模型。"
35
+ "支持的语言代码:zh: 中文, en: 英文, ja: 日语, yue: 粤语, ko: 韩语,"
36
+ " de:德语, fr:法语, ru:俄语。默认为['zh', 'en']",
37
+ )
38
+ ctx: Optional[Context] = Field(
39
+ default=None,
40
+ description="HTTP request context containing headers for mcp only, "
41
+ "don't generate it",
42
+ )
43
+
44
+
45
+ class SpeechToTextOutput(BaseModel):
46
+ """
47
+ Speech to text transcription output model
48
+ """
49
+
50
+ results: list[str] = Field(
51
+ default_factory=list,
52
+ description="识别出的文本内容列表,每个元素对应一个音频文件的识别结果",
53
+ )
54
+ request_id: Optional[str] = Field(
55
+ default=None,
56
+ title="Request ID",
57
+ description="请求ID",
58
+ )
59
+
60
+
61
+ class SpeechToText(Tool[SpeechToTextInput, SpeechToTextOutput]):
62
+ """
63
+ Speech to text transcription service that converts audio files to text
64
+ using DashScope's Paraformer ASR API.
65
+ """
66
+
67
+ name: str = "modelstudio_speech_to_text"
68
+ description: str = (
69
+ "录音文件识别(也称为录音文件转写)是指对音视频文件进行语音识别,将语音转换为文本。"
70
+ "支持单个文件识别和批量文件识别,适用于处理不需要即时返回结果的场景。"
71
+ )
72
+
73
+ @trace(trace_type="AIGC", trace_name="speech_to_text")
74
+ async def arun(
75
+ self,
76
+ args: SpeechToTextInput,
77
+ **kwargs: Any,
78
+ ) -> SpeechToTextOutput:
79
+ """
80
+ Transcribe audio files to text using DashScope Paraformer ASR
81
+
82
+ This method wraps DashScope's Transcription service to convert audio
83
+ files to text. It uses async call pattern for better performance
84
+ and supports polling for task completion.
85
+
86
+ Args:
87
+ args: SpeechToTextInput containing file URLs and parameters
88
+ **kwargs: Additional keyword arguments including:
89
+ - request_id: Optional request ID for tracking
90
+ - model_name: Model name to use (defaults to paraformer-v2)
91
+ - api_key: DashScope API key for authentication
92
+
93
+ Returns:
94
+ SpeechToTextOutput containing the transcribed text and request ID
95
+
96
+ Raises:
97
+ ValueError: If DASHSCOPE_API_KEY is not set or invalid
98
+ TimeoutError: If transcription takes too long
99
+ RuntimeError: If transcription fails
100
+ """
101
+ trace_event = kwargs.pop("trace_event", None)
102
+ request_id = TracingUtil.get_request_id()
103
+
104
+ try:
105
+ api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
106
+ except AssertionError as e:
107
+ raise ValueError("Please set valid DASHSCOPE_API_KEY!") from e
108
+
109
+ model_name = kwargs.get(
110
+ "model_name",
111
+ os.getenv("SPEECH_TO_TEXT_MODEL_NAME", "paraformer-v2"),
112
+ )
113
+
114
+ # Prepare parameters
115
+ parameters = {}
116
+ if args.language_hints:
117
+ parameters["language_hints"] = args.language_hints
118
+
119
+ # Submit async transcription task
120
+ task = Transcription.async_call(
121
+ api_key=api_key,
122
+ model=model_name,
123
+ file_urls=args.file_urls,
124
+ **parameters,
125
+ )
126
+
127
+ if (
128
+ task.status_code != HTTPStatus.OK
129
+ or not task.output
130
+ or (
131
+ hasattr(task.output, "task_status")
132
+ and task.output.task_status
133
+ in [
134
+ TaskStatus.FAILED,
135
+ TaskStatus.CANCELED,
136
+ ]
137
+ )
138
+ ):
139
+ raise RuntimeError(f"Failed to submit task: {task}")
140
+
141
+ # Poll for task completion
142
+ max_wait_time = 300 # 5 minutes timeout for transcription
143
+ poll_interval = 2 # 2 seconds polling interval
144
+ start_time = time.time()
145
+
146
+ results = task
147
+ if task.status_code == HTTPStatus.OK:
148
+ while True:
149
+ # Fetch task result
150
+ results = Transcription.fetch(task.output.task_id)
151
+
152
+ if (
153
+ results.status_code != HTTPStatus.OK
154
+ or not results.output
155
+ or (
156
+ hasattr(results.output, "task_status")
157
+ and results.output.task_status
158
+ in [
159
+ TaskStatus.FAILED,
160
+ TaskStatus.CANCELED,
161
+ ]
162
+ )
163
+ ):
164
+ raise RuntimeError(f"Failed to fetch result: {results}")
165
+
166
+ if results.status_code == HTTPStatus.OK:
167
+ if (
168
+ results.output is not None
169
+ and results.output.task_status
170
+ in [TaskStatus.PENDING, TaskStatus.RUNNING]
171
+ ):
172
+ # Wait before next poll
173
+ await asyncio.sleep(poll_interval)
174
+
175
+ # Check timeout
176
+ if time.time() - start_time > max_wait_time:
177
+ raise TimeoutError(
178
+ f"Speech transcription timeout after"
179
+ f" {max_wait_time}s",
180
+ )
181
+ continue
182
+ break
183
+
184
+ # Check final status
185
+ if results.status_code != HTTPStatus.OK:
186
+ raise RuntimeError(
187
+ f"Transcription request failed: {results.message}",
188
+ )
189
+
190
+ if results.output is None:
191
+ raise RuntimeError("No output received from transcription service")
192
+
193
+ if results.output.task_status == TaskStatus.FAILED:
194
+ raise RuntimeError(f"Transcription task failed: {results.output}")
195
+
196
+ if results.output.task_status != TaskStatus.SUCCEEDED:
197
+ raise RuntimeError(
198
+ f"Transcription task not completed successfully: "
199
+ f"status={results.output.task_status}",
200
+ )
201
+
202
+ # Handle request ID
203
+ if not request_id:
204
+ request_id = (
205
+ results.request_id if results.request_id else str(uuid.uuid4())
206
+ )
207
+
208
+ # Log trace event if provided
209
+ if trace_event:
210
+ trace_event.on_log(
211
+ "",
212
+ **{
213
+ "step_suffix": "results",
214
+ "payload": {
215
+ "request_id": request_id,
216
+ "speech_to_text_result": results,
217
+ },
218
+ },
219
+ )
220
+
221
+ # Extract transcription results for each file
222
+ text_results = []
223
+
224
+ if hasattr(results.output, "results") and results.output.results:
225
+ for result in results.output.results:
226
+ # Get transcription from URL for each file
227
+ if isinstance(result, dict) and "transcription_url" in result:
228
+ transcription_url = result["transcription_url"]
229
+ try:
230
+ response = requests.get(transcription_url)
231
+ if response.status_code == 200:
232
+ transcription_data = response.json()
233
+
234
+ # Extract text from each file's transcription
235
+ file_text_parts = []
236
+ if "transcripts" in transcription_data:
237
+ for transcript in transcription_data[
238
+ "transcripts"
239
+ ]:
240
+ if "text" in transcript:
241
+ file_text_parts.append(
242
+ transcript["text"],
243
+ )
244
+
245
+ # Combine text parts for this file
246
+ file_text = (
247
+ "".join(file_text_parts)
248
+ if file_text_parts
249
+ else ""
250
+ )
251
+ text_results.append(file_text)
252
+ except Exception as e:
253
+ print(f"Failed to fetch transcription from URL: {e}")
254
+ # Add empty string for failed file
255
+ text_results.append("")
256
+
257
+ return SpeechToTextOutput(
258
+ results=text_results,
259
+ request_id=request_id,
260
+ )
@@ -0,0 +1,314 @@
1
+ # -*- coding: utf-8 -*-
2
+ # pylint:disable=abstract-method, redefined-builtin, no-else-break
3
+ # pylint:disable=too-many-branches, too-many-statements
4
+
5
+ import asyncio
6
+ import os
7
+ import time
8
+ import uuid
9
+ from http import HTTPStatus
10
+ from typing import Any, Optional
11
+
12
+ from dashscope.client.base_api import BaseAsyncAioApi
13
+ from mcp.server.fastmcp import Context
14
+ from pydantic import BaseModel, Field
15
+
16
+ from ..base import Tool
17
+ from ..utils.api_key_util import get_api_key, ApiNames
18
+ from ...engine.tracing import trace, TracingUtil
19
+
20
+
21
+ class SpeechToVideoInput(BaseModel):
22
+ """
23
+ Speech to video generation input model
24
+ """
25
+
26
+ image_url: str = Field(
27
+ ...,
28
+ description="上传的图片URL。图像格式:支持jpg,jpeg,png,bmp,webp。"
29
+ "图像分辨率:图像的宽度和高度范围为[400, 7000]像素。"
30
+ "上传图片仅支持公网可访问的HTTP/HTTPS链接。",
31
+ )
32
+ audio_url: str = Field(
33
+ ...,
34
+ description="上传的音频文件URL。音频格式:格式为wav、mp3。"
35
+ "音频限制:文件<15M,时长<20s。"
36
+ "音频内容:音频中需包含清晰、响亮的人声语音,并去除了环境噪音、"
37
+ "背景音乐等声音干扰信息。上传音频仅支持公网可访问的HTTP/HTTPS链接。",
38
+ )
39
+ resolution: Optional[str] = Field(
40
+ default=None,
41
+ description="视频分辨率,默认不设置",
42
+ )
43
+ ctx: Optional[Context] = Field(
44
+ default=None,
45
+ description="HTTP request context containing headers for mcp only, "
46
+ "don't generate it",
47
+ )
48
+
49
+
50
+ class SpeechToVideoOutput(BaseModel):
51
+ """
52
+ Speech to video generation output model
53
+ """
54
+
55
+ video_url: str = Field(
56
+ title="Video URL",
57
+ description="生成的视频文件URL",
58
+ )
59
+ request_id: Optional[str] = Field(
60
+ default=None,
61
+ title="Request ID",
62
+ description="请求ID",
63
+ )
64
+ video_duration: Optional[float] = Field(
65
+ default=None,
66
+ title="Video Duration",
67
+ description="视频时长(秒),用于计费",
68
+ )
69
+
70
+
71
+ class SpeechToVideo(Tool[SpeechToVideoInput, SpeechToVideoOutput]):
72
+ """
73
+ Speech to video generation service that converts speech and image into
74
+ videos using DashScope's wan2.2-s2v API.
75
+ """
76
+
77
+ name: str = "modelstudio_speech_to_video"
78
+ description: str = (
79
+ "数字人wan2.2-s2v模型能基于单张图片和音频,生成动作自然的说话、唱歌或表演视频。"
80
+ "通过输入的人声音频,驱动静态图片中的人物实现口型、表情和动作与音频同步。"
81
+ "支持说话、唱歌、表演三种对口型场景,支持真人及卡通人物,"
82
+ "提供480P、720P两档分辨率选项。"
83
+ )
84
+
85
+ @staticmethod
86
+ async def _async_call(
87
+ model: str,
88
+ api_key: str,
89
+ image_url: str,
90
+ audio_url: str,
91
+ **parameters: Any,
92
+ ) -> Any:
93
+ """
94
+ Submit async task for speech to video generation using BaseAsyncAioApi
95
+
96
+ Args:
97
+ model: Model name to use
98
+ api_key: DashScope API key for authentication
99
+ image_url: URL of the input image
100
+ audio_url: URL of the input audio
101
+ **parameters: Additional parameters like resolution
102
+
103
+ Returns:
104
+ Response containing task_id for polling
105
+ """
106
+ # Prepare input data
107
+ input = {
108
+ "image_url": image_url,
109
+ "audio_url": audio_url,
110
+ }
111
+
112
+ result = await BaseAsyncAioApi.async_call(
113
+ model=model,
114
+ input=input,
115
+ task_group="aigc",
116
+ task="image2video",
117
+ function="video-synthesis",
118
+ api_key=api_key,
119
+ **parameters,
120
+ )
121
+
122
+ return result
123
+
124
+ @staticmethod
125
+ async def _fetch(
126
+ api_key: str,
127
+ task: Any,
128
+ ) -> Any:
129
+ """
130
+ Fetch task result using BaseAsyncAioApi
131
+
132
+ Args:
133
+ api_key: DashScope API key for authentication
134
+ task: Task response containing task_id
135
+
136
+ Returns:
137
+ Response containing task status and result
138
+ """
139
+ # Use BaseAsyncAioApi.fetch directly with await
140
+ result = await BaseAsyncAioApi.fetch(
141
+ api_key=api_key,
142
+ task=task,
143
+ )
144
+
145
+ return result
146
+
147
+ @trace(trace_type="AIGC", trace_name="speech_to_video")
148
+ async def arun(
149
+ self,
150
+ args: SpeechToVideoInput,
151
+ **kwargs: Any,
152
+ ) -> SpeechToVideoOutput:
153
+ """
154
+ Generate video from speech and image using DashScope wan2.2-s2v API
155
+
156
+ This method wraps DashScope's wan2.2-s2v service to generate videos
157
+ based on input image and audio. It uses async call pattern for better
158
+ performance and supports polling for task completion.
159
+
160
+ Args:
161
+ args: SpeechToVideoInput containing image_url, audio_url and
162
+ optional parameters
163
+ **kwargs: Additional keyword arguments including:
164
+ - request_id: Optional request ID for tracking
165
+ - model_name: Model name to use (defaults to wan2.2-s2v)
166
+ - api_key: DashScope API key for authentication
167
+
168
+ Returns:
169
+ SpeechToVideoOutput containing the generated video URL,
170
+ request ID and video duration
171
+
172
+ Raises:
173
+ ValueError: If DASHSCOPE_API_KEY is not set or invalid
174
+ TimeoutError: If video generation takes too long
175
+ RuntimeError: If video generation fails
176
+ """
177
+ trace_event = kwargs.pop("trace_event", None)
178
+ request_id = TracingUtil.get_request_id()
179
+
180
+ try:
181
+ api_key = get_api_key(ApiNames.dashscope_api_key, **kwargs)
182
+ except AssertionError as e:
183
+ raise ValueError("Please set valid DASHSCOPE_API_KEY!") from e
184
+
185
+ model_name = kwargs.get(
186
+ "model_name",
187
+ os.getenv("SPEECH_TO_VIDEO_MODEL_NAME", "wan2.2-s2v"),
188
+ )
189
+
190
+ parameters = {}
191
+ if args.resolution:
192
+ parameters["resolution"] = args.resolution
193
+
194
+ # Submit async task
195
+ task_response = await self._async_call(
196
+ model=model_name,
197
+ api_key=api_key,
198
+ image_url=args.image_url,
199
+ audio_url=args.audio_url,
200
+ **parameters,
201
+ )
202
+
203
+ if (
204
+ task_response.status_code != HTTPStatus.OK
205
+ or not task_response.output
206
+ or (
207
+ isinstance(task_response.output, dict)
208
+ and task_response.output.get("task_status", "UNKNOWN")
209
+ in ["FAILED", "CANCELED"]
210
+ )
211
+ ):
212
+ raise RuntimeError(f"Failed to submit task: {task_response}")
213
+
214
+ # Poll for task completion using async methods
215
+ max_wait_time = 15 * 60 # 10 minutes timeout for video generation
216
+ poll_interval = 5 # 5 seconds polling interval
217
+ start_time = time.time()
218
+
219
+ while True:
220
+ # Wait before polling
221
+ await asyncio.sleep(poll_interval)
222
+
223
+ # Fetch task result using async method
224
+ res = await self._fetch(
225
+ api_key=api_key,
226
+ task=task_response,
227
+ )
228
+
229
+ if (
230
+ res.status_code != HTTPStatus.OK
231
+ or not res.output
232
+ or (
233
+ isinstance(res.output, dict)
234
+ and res.output.get("task_status", "UNKNOWN")
235
+ in ["FAILED", "CANCELED"]
236
+ )
237
+ ):
238
+ raise RuntimeError(f"Failed to fetch result: {res}")
239
+
240
+ # Check task completion status
241
+ if res.status_code == HTTPStatus.OK:
242
+ # res.output is a dict when using BaseAsyncAioApi
243
+ if (
244
+ isinstance(res.output, dict)
245
+ and "task_status" in res.output
246
+ ):
247
+ if res.output["task_status"] == "SUCCEEDED":
248
+ break
249
+ elif res.output["task_status"] in ["FAILED", "CANCELED"]:
250
+ raise RuntimeError(f"Failed to generate: {res}")
251
+ # Continue polling for PENDING, RUNNING, etc.
252
+ else:
253
+ # If no task_status field, assume completed
254
+ break
255
+
256
+ # Check timeout
257
+ if time.time() - start_time > max_wait_time:
258
+ raise TimeoutError(
259
+ f"Video generation timeout after {max_wait_time}s",
260
+ )
261
+
262
+ # Handle request ID
263
+ if not request_id:
264
+ request_id = (
265
+ res.request_id if res.request_id else str(uuid.uuid4())
266
+ )
267
+
268
+ # Log trace event if provided
269
+ if trace_event:
270
+ trace_event.on_log(
271
+ "",
272
+ **{
273
+ "step_suffix": "results",
274
+ "payload": {
275
+ "request_id": request_id,
276
+ "speech_to_video_result": res,
277
+ },
278
+ },
279
+ )
280
+
281
+ # Extract video URL and duration from response
282
+ if res.status_code == HTTPStatus.OK:
283
+ # Handle results as dict (BaseAsyncAioApi response format)
284
+ if isinstance(res.output, dict) and "results" in res.output:
285
+ results = res.output["results"]
286
+ if isinstance(results, dict):
287
+ video_url = results.get("video_url")
288
+ else:
289
+ video_url = getattr(results, "video_url", None)
290
+ else:
291
+ raise RuntimeError(
292
+ f"No results found in response: {res.output}",
293
+ )
294
+
295
+ # Extract video duration from usage
296
+ video_duration = None
297
+ if hasattr(res, "usage") and res.usage:
298
+ if isinstance(res.usage, dict):
299
+ video_duration = res.usage.get("duration")
300
+ else:
301
+ video_duration = getattr(res.usage, "duration", None)
302
+
303
+ if not video_url:
304
+ raise RuntimeError(
305
+ f"Failed to extract video URL from response: {res}",
306
+ )
307
+
308
+ return SpeechToVideoOutput(
309
+ video_url=video_url,
310
+ request_id=request_id,
311
+ video_duration=video_duration,
312
+ )
313
+ else:
314
+ raise RuntimeError(f"Failed to get video URL: {res.message}")