dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,648 @@
1
+ import json
2
+ import platform
3
+ import time
4
+ import threading
5
+ from abc import abstractmethod
6
+
7
+ import websocket
8
+
9
+ import dashscope
10
+ from dashscope.common.logging import logger
11
+ from dashscope.common.error import InputRequired
12
+ from dashscope.multimodal import dialog_state
13
+ from dashscope.multimodal.multimodal_constants import *
14
+ from dashscope.multimodal.multimodal_request_params import RequestParameters, get_random_uuid, DashHeader, \
15
+ RequestBodyInput, DashPayload, RequestToRespondParameters, RequestToRespondBodyInput
16
+ from dashscope.protocol.websocket import ActionType
17
+
18
+
19
+ class MultiModalCallback:
20
+ """
21
+ 语音聊天回调类,用于处理语音聊天过程中的各种事件。
22
+ """
23
+
24
+ def on_started(self, dialog_id: str) -> None:
25
+ """
26
+ 通知对话开始
27
+
28
+ :param dialog_id: 回调对话ID
29
+ """
30
+ pass
31
+
32
+ def on_stopped(self) -> None:
33
+ """
34
+ 通知对话停止
35
+ """
36
+ pass
37
+
38
+ def on_state_changed(self, state: 'dialog_state.DialogState') -> None:
39
+ """
40
+ 对话状态改变
41
+
42
+ :param state: 新的对话状态
43
+ """
44
+ pass
45
+
46
+ def on_speech_audio_data(self, data: bytes) -> None:
47
+ """
48
+ 合成音频数据回调
49
+
50
+ :param data: 音频数据
51
+ """
52
+ pass
53
+
54
+ def on_error(self, error) -> None:
55
+ """
56
+ 发生错误时调用此方法。
57
+
58
+ :param error: 错误信息
59
+ """
60
+ pass
61
+
62
+ def on_connected(self) -> None:
63
+ """
64
+ 成功连接到服务器后调用此方法。
65
+ """
66
+ pass
67
+
68
+ def on_responding_started(self):
69
+ """
70
+ 回复开始回调
71
+ """
72
+ pass
73
+
74
+ def on_responding_ended(self, payload):
75
+ """
76
+ 回复结束
77
+ """
78
+ pass
79
+
80
+ def on_speech_started(self):
81
+ """
82
+ 检测到语音输入结束
83
+ """
84
+ pass
85
+
86
+ def on_speech_ended(self):
87
+ """
88
+ 检测到语音输入结束
89
+ """
90
+ pass
91
+
92
+ def on_speech_content(self, payload):
93
+ """
94
+ 语音识别文本
95
+
96
+ :param payload: text
97
+ """
98
+ pass
99
+
100
+ def on_responding_content(self, payload):
101
+ """
102
+ 大模型回复文本。
103
+
104
+ :param payload: text
105
+ """
106
+ pass
107
+
108
+ def on_request_accepted(self):
109
+ """
110
+ 打断请求被接受。
111
+ """
112
+ pass
113
+
114
+ def on_close(self, close_status_code, close_msg):
115
+ """
116
+ 连接关闭时调用此方法。
117
+
118
+ :param close_status_code: 关闭状态码
119
+ :param close_msg: 关闭消息
120
+ """
121
+ pass
122
+
123
+
124
+ class MultiModalDialog:
125
+ """
126
+ 用于管理WebSocket连接以进行语音聊天的服务类。
127
+ """
128
+
129
+ def __init__(self,
130
+ app_id: str,
131
+ request_params: RequestParameters,
132
+ multimodal_callback: MultiModalCallback,
133
+ workspace_id: str = None,
134
+ url: str = None,
135
+ api_key: str = None,
136
+ dialog_id: str = None,
137
+ model: str = None
138
+ ):
139
+ """
140
+ 创建一个语音对话会话。
141
+
142
+ 此方法用于初始化一个新的voice_chat会话,设置必要的参数以准备开始与模型的交互。
143
+ :param workspace_id: 客户的workspace_id 主工作空间id,非必填字段
144
+ :param app_id: 客户在管控台创建的应用id,可以根据值规律确定使用哪个对话系统
145
+ :param request_params: 请求参数集合
146
+ :param url: (str) API的URL地址。
147
+ :param multimodal_callback: (MultimodalCallback) 回调对象,用于处理来自服务器的消息。
148
+ :param api_key: (str) 应用程序接入的唯一key
149
+ :param dialog_id:对话id,如果传入表示承接上下文继续聊
150
+ :param model: 模型
151
+ """
152
+ if request_params is None:
153
+ raise InputRequired('request_params is required!')
154
+ if url is None:
155
+ url = dashscope.base_websocket_api_url
156
+ if api_key is None:
157
+ api_key = dashscope.api_key
158
+
159
+ self.request_params = request_params
160
+ self.model = model
161
+ self._voice_detection = None
162
+ self.thread = None
163
+ self.ws = None
164
+ self.request = _Request()
165
+ self._callback = multimodal_callback
166
+ self.url = url
167
+ self.api_key = api_key
168
+ self.workspace_id = workspace_id
169
+ self.app_id = app_id
170
+ self.dialog_id = dialog_id
171
+ self.dialog_state = dialog_state.StateMachine()
172
+ self.response = _Response(self.dialog_state, self._callback, self.close) # 传递 self.close 作为回调
173
+
174
+ def _on_message(self, ws, message):
175
+ logger.debug(f"<<<<<<< Received message: {message}")
176
+ if isinstance(message, str):
177
+ self.response.handle_text_response(message)
178
+ elif isinstance(message, (bytes, bytearray)):
179
+ self.response.handle_binary_response(message)
180
+
181
+ def _on_error(self, ws, error):
182
+ logger.error(f"Error: {error}")
183
+ if self._callback:
184
+ self._callback.on_error(error)
185
+
186
+ def _on_close(self, ws, close_status_code, close_msg):
187
+ try:
188
+ logger.debug(
189
+ "WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
190
+ if close_status_code is None:
191
+ close_status_code = 1000
192
+ if close_msg is None:
193
+ close_msg = "websocket is closed"
194
+ self._callback.on_close(close_status_code, close_msg)
195
+ except Exception as e:
196
+ logger.error(f"Error: {e}")
197
+
198
+ def _on_open(self, ws):
199
+ self._callback.on_connected()
200
+
201
+ # def _on_pong(self, _):
202
+ # _log.debug("on pong")
203
+
204
+ def start(self, dialog_id, enable_voice_detection=False, task_id=None):
205
+ """
206
+ 初始化WebSocket连接并发送启动请求
207
+ :param dialog_id: 上下位继承标志位。新对话无需设置。
208
+ 如果继承之前的对话历史,则需要记录之前的dialog_id并传入
209
+ :param enable_voice_detection: 是否开启语音检测,可选参数 默认False
210
+ :param task_id: 百炼请求任务 Id,默认会自动生成。您可以指定此 ID 来跟踪请求。
211
+ """
212
+ self._voice_detection = enable_voice_detection
213
+ self._connect(self.api_key)
214
+ logger.debug("connected with server.")
215
+ self._send_start_request(dialog_id, self.request_params, task_id=task_id)
216
+
217
+ def start_speech(self):
218
+ """开始上传语音数据"""
219
+ _send_speech_json = self.request.generate_common_direction_request("SendSpeech", self.dialog_id)
220
+ self._send_text_frame(_send_speech_json)
221
+
222
+ def send_audio_data(self, speech_data: bytes):
223
+ """发送语音数据"""
224
+ self.__send_binary_frame(speech_data)
225
+
226
+ def stop_speech(self):
227
+ """停止上传语音数据"""
228
+ _send_speech_json = self.request.generate_common_direction_request("StopSpeech", self.dialog_id)
229
+ self._send_text_frame(_send_speech_json)
230
+
231
+ def interrupt(self):
232
+ """请求服务端开始说话"""
233
+ _send_speech_json = self.request.generate_common_direction_request("RequestToSpeak", self.dialog_id)
234
+ self._send_text_frame(_send_speech_json)
235
+
236
+ def request_to_respond(self,
237
+ request_type: str,
238
+ text: str,
239
+ parameters: RequestToRespondParameters = None):
240
+ """请求服务端直接文本合成语音"""
241
+ _send_speech_json = self.request.generate_request_to_response_json(direction_name="RequestToRespond",
242
+ dialog_id=self.dialog_id,
243
+ request_type=request_type, text=text,
244
+ parameters=parameters)
245
+ self._send_text_frame(_send_speech_json)
246
+
247
+ @abstractmethod
248
+ def request_to_respond_prompt(self, text):
249
+ """请求服务端通过文本请求回复文本答复"""
250
+ return
251
+
252
+ def local_responding_started(self):
253
+ """本地tts播放开始"""
254
+ _send_speech_json = self.request.generate_common_direction_request("LocalRespondingStarted", self.dialog_id)
255
+ self._send_text_frame(_send_speech_json)
256
+
257
+ def local_responding_ended(self):
258
+ """本地tts播放结束"""
259
+ _send_speech_json = self.request.generate_common_direction_request("LocalRespondingEnded", self.dialog_id)
260
+ self._send_text_frame(_send_speech_json)
261
+
262
+ def send_heart_beat(self):
263
+ """发送心跳"""
264
+ _send_speech_json = self.request.generate_common_direction_request("HeartBeat", self.dialog_id)
265
+ self._send_text_frame(_send_speech_json)
266
+
267
+ def update_info(self, parameters: RequestToRespondParameters = None):
268
+ """更新信息"""
269
+ _send_speech_json = self.request.generate_update_info_json(direction_name="UpdateInfo", dialog_id=self.dialog_id, parameters=parameters)
270
+ self._send_text_frame(_send_speech_json)
271
+
272
+ def stop(self):
273
+ if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
274
+ self._callback.on_close(1001, "websocket is not connected")
275
+ return
276
+ _send_speech_json = self.request.generate_stop_request("Stop", self.dialog_id)
277
+ self._send_text_frame(_send_speech_json)
278
+
279
+ def get_dialog_state(self) -> dialog_state.DialogState:
280
+ return self.dialog_state.get_current_state()
281
+
282
+ def get_conversation_mode(self) -> str:
283
+ """get mode of conversation: support tap2talk/push2talk/duplex"""
284
+ return self.request_params.upstream.mode
285
+
286
+ """内部方法"""
287
+
288
+ def _send_start_request(self, dialog_id: str, request_params: RequestParameters, task_id: str = None):
289
+ """发送'Start'请求"""
290
+ _start_json = self.request.generate_start_request(
291
+ workspace_id=self.workspace_id,
292
+ direction_name="Start",
293
+ dialog_id=dialog_id,
294
+ app_id=self.app_id,
295
+ request_params=request_params,
296
+ model=self.model,
297
+ task_id=task_id
298
+ )
299
+ # send start request
300
+ self._send_text_frame(_start_json)
301
+
302
+ def _run_forever(self):
303
+ self.ws.run_forever(ping_interval=10, ping_timeout=10)
304
+
305
+ def _connect(self, api_key: str):
306
+ """初始化WebSocket连接并发送启动请求。"""
307
+ self.ws = websocket.WebSocketApp(self.url, header=self.request.get_websocket_header(api_key),
308
+ on_open=self._on_open,
309
+ on_message=self._on_message,
310
+ on_error=self._on_error,
311
+ on_close=self._on_close)
312
+ self.thread = threading.Thread(target=self._run_forever)
313
+ self.ws.ping_interval = 3
314
+ self.thread.daemon = True
315
+ self.thread.start()
316
+
317
+ self._wait_for_connection()
318
+
319
+ def close(self):
320
+ if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
321
+ return
322
+ self.ws.close()
323
+
324
+ def _wait_for_connection(self):
325
+ """等待WebSocket连接建立"""
326
+ timeout = 5
327
+ start_time = time.time()
328
+ while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
329
+ time.sleep(0.1) # 短暂休眠,避免密集轮询
330
+
331
+ def _send_text_frame(self, text: str):
332
+ logger.info('>>>>>> send text frame : %s' % text)
333
+ self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
334
+
335
+ def __send_binary_frame(self, binary: bytes):
336
+ # _log.info('send binary frame length: %d' % len(binary))
337
+ self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
338
+
339
+ def __del__(self):
340
+ self.cleanup()
341
+
342
+ def cleanup(self):
343
+ """清理所有资源"""
344
+ try:
345
+ if self.ws:
346
+ self.ws.close()
347
+ if self.thread and self.thread.is_alive():
348
+ # 设置标志位通知线程退出
349
+ self.thread.join(timeout=2)
350
+ # 清除引用
351
+ self.ws = None
352
+ self.thread = None
353
+ self._callback = None
354
+ self.response = None
355
+ except Exception as e:
356
+ logger.error(f"Error in cleanup: {e}")
357
+
358
+
359
+ class _Request:
360
+ def __init__(self):
361
+ # websocket header
362
+ self.ws_headers = None
363
+ # request body for voice chat
364
+ self.header = None
365
+ self.payload = None
366
+ # params
367
+ self.task_id = None
368
+ self.app_id = None
369
+ self.workspace_id = None
370
+
371
+ def get_websocket_header(self, api_key):
372
+ ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
373
+ '1.18.0', # dashscope version
374
+ platform.python_version(),
375
+ platform.platform(),
376
+ platform.processor(),
377
+ )
378
+ self.ws_headers = {
379
+ "User-Agent": ua,
380
+ "Authorization": f"bearer {api_key}",
381
+ "Accept": "application/json"
382
+ }
383
+ logger.info('websocket header: {}'.format(self.ws_headers))
384
+ return self.ws_headers
385
+
386
+ def generate_start_request(self, direction_name: str,
387
+ dialog_id: str,
388
+ app_id: str,
389
+ request_params: RequestParameters,
390
+ model: str = None,
391
+ workspace_id: str = None,
392
+ task_id: str = None
393
+ ) -> str:
394
+ """
395
+ 构建语音聊天服务的启动请求数据.
396
+ :param app_id: 管控台应用id
397
+ :param request_params: start请求body中的parameters
398
+ :param direction_name:
399
+ :param dialog_id: 对话ID.
400
+ :param workspace_id: 管控台工作空间id, 非必填字段。
401
+ :param model: 模型
402
+ :param task_id: 百炼请求任务 Id,默认会自动生成。您可以指定此 ID 来跟踪请求。
403
+ :return: 启动请求字典.
404
+ """
405
+ self.task_id = task_id
406
+ self._get_dash_request_header(ActionType.START)
407
+ self._get_dash_request_payload(direction_name, dialog_id, app_id, workspace_id=workspace_id,
408
+ request_params=request_params, model=model)
409
+
410
+ cmd = {
411
+ "header": self.header,
412
+ "payload": self.payload
413
+ }
414
+ return json.dumps(cmd)
415
+
416
+ def generate_common_direction_request(self, direction_name: str, dialog_id: str) -> str:
417
+ """
418
+ 构建语音聊天服务的命令请求数据.
419
+ :param direction_name: 命令.
420
+ :param dialog_id: 对话ID.
421
+ :return: 命令请求json.
422
+ """
423
+ self._get_dash_request_header(ActionType.CONTINUE)
424
+ self._get_dash_request_payload(direction_name, dialog_id, self.app_id)
425
+ cmd = {
426
+ "header": self.header,
427
+ "payload": self.payload
428
+ }
429
+ return json.dumps(cmd)
430
+
431
+ def generate_stop_request(self, direction_name: str, dialog_id: str) -> str:
432
+ """
433
+ 构建语音聊天服务的启动请求数据.
434
+ :param direction_name:指令名称
435
+ :param dialog_id: 对话ID.
436
+ :return: 启动请求json.
437
+ """
438
+ self._get_dash_request_header(ActionType.FINISHED)
439
+ self._get_dash_request_payload(direction_name, dialog_id, self.app_id)
440
+
441
+ cmd = {
442
+ "header": self.header,
443
+ "payload": self.payload
444
+ }
445
+ return json.dumps(cmd)
446
+
447
+ def generate_request_to_response_json(self, direction_name: str, dialog_id: str, request_type: str, text: str,
448
+ parameters: RequestToRespondParameters = None) -> str:
449
+ """
450
+ 构建语音聊天服务的命令请求数据.
451
+ :param direction_name: 命令.
452
+ :param dialog_id: 对话ID.
453
+ :param request_type: 服务应该采取的交互类型,transcript 表示直接把文本转语音,prompt 表示把文本送大模型回答
454
+ :param text: 文本.
455
+ :param parameters: 命令请求body中的parameters
456
+ :return: 命令请求字典.
457
+ """
458
+ self._get_dash_request_header(ActionType.CONTINUE)
459
+
460
+ custom_input = RequestToRespondBodyInput(
461
+ app_id=self.app_id,
462
+ directive=direction_name,
463
+ dialog_id=dialog_id,
464
+ type_=request_type,
465
+ text=text
466
+ )
467
+
468
+ self._get_dash_request_payload(direction_name, dialog_id, self.app_id, request_params=parameters,
469
+ custom_input=custom_input)
470
+ cmd = {
471
+ "header": self.header,
472
+ "payload": self.payload
473
+ }
474
+ return json.dumps(cmd)
475
+
476
+ def generate_update_info_json(self, direction_name: str, dialog_id: str,parameters: RequestToRespondParameters = None) -> str:
477
+ """
478
+ 构建语音聊天服务的命令请求数据.
479
+ :param direction_name: 命令.
480
+ :param parameters: 命令请求body中的parameters
481
+ :return: 命令请求字典.
482
+ """
483
+ self._get_dash_request_header(ActionType.CONTINUE)
484
+
485
+ custom_input = RequestToRespondBodyInput(
486
+ app_id=self.app_id,
487
+ directive=direction_name,
488
+ dialog_id=dialog_id,
489
+ )
490
+
491
+ self._get_dash_request_payload(direction_name, dialog_id, self.app_id, request_params=parameters,
492
+ custom_input=custom_input)
493
+ cmd = {
494
+ "header": self.header,
495
+ "payload": self.payload
496
+ }
497
+ return json.dumps(cmd)
498
+
499
+ def _get_dash_request_header(self, action: str):
500
+ """
501
+ 构建多模对话请求的请求协议Header
502
+ :param action: ActionType 百炼协议action 支持:run-task, continue-task, finish-task
503
+ """
504
+ if self.task_id is None:
505
+ self.task_id = get_random_uuid()
506
+ self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
507
+
508
+ def _get_dash_request_payload(self, direction_name: str,
509
+ dialog_id: str, app_id: str, workspace_id: str = None,
510
+ request_params: RequestParameters = None, custom_input=None, model: str = None):
511
+ """
512
+ 构建多模对话请求的请求协议payload
513
+ :param direction_name: 对话协议内部的指令名称
514
+ :param dialog_id: 对话ID.
515
+ :param app_id: 管控台应用id
516
+ :param request_params: start请求body中的parameters
517
+ :param custom_input: 自定义输入
518
+ :param model: 模型
519
+ """
520
+ if custom_input is not None:
521
+ input = custom_input
522
+ else:
523
+ input = RequestBodyInput(
524
+ workspace_id=workspace_id,
525
+ app_id=app_id,
526
+ directive=direction_name,
527
+ dialog_id=dialog_id
528
+ )
529
+
530
+ self.payload = DashPayload(
531
+ model=model,
532
+ input=input,
533
+ parameters=request_params
534
+ ).to_dict()
535
+
536
+
537
+ class _Response:
538
+ def __init__(self, state: dialog_state.StateMachine, callback: MultiModalCallback, close_callback=None):
539
+ super().__init__()
540
+ self.dialog_id = None # 对话ID.
541
+ self.dialog_state = state
542
+ self._callback = callback
543
+ self._close_callback = close_callback # 保存关闭回调函数
544
+
545
+ def handle_text_response(self, response_json: str):
546
+ """
547
+ 处理语音聊天服务的响应数据.
548
+ :param response_json: 从服务接收到的原始JSON字符串响应。
549
+ """
550
+ logger.info("<<<<<< server response: %s" % response_json)
551
+ try:
552
+ # 尝试将消息解析为JSON
553
+ json_data = json.loads(response_json)
554
+ if "status_code" in json_data["header"] and json_data["header"]["status_code"] != 200:
555
+ logger.error("Server returned invalid message: %s" % response_json)
556
+ if self._callback:
557
+ self._callback.on_error(response_json)
558
+ return
559
+ if "event" in json_data["header"] and json_data["header"]["event"] == "task-failed":
560
+ logger.error("Server returned invalid message: %s" % response_json)
561
+ if self._callback:
562
+ self._callback.on_error(response_json)
563
+ return None
564
+
565
+ payload = json_data["payload"]
566
+ if "output" in payload and payload["output"] is not None:
567
+ response_event = payload["output"]["event"]
568
+ logger.info("Server response event: %s" % response_event)
569
+ self._handle_text_response_in_conversation(response_event=response_event, response_json=json_data)
570
+ del json_data
571
+
572
+ except json.JSONDecodeError:
573
+ logger.error("Failed to parse message as JSON.")
574
+
575
+ def _handle_text_response_in_conversation(self, response_event: str, response_json: dict):
576
+ payload = response_json["payload"]
577
+ try:
578
+ if response_event == RESPONSE_NAME_STARTED:
579
+ self._handle_started(payload["output"])
580
+ elif response_event == RESPONSE_NAME_STOPPED:
581
+ self._handle_stopped()
582
+ elif response_event == RESPONSE_NAME_STATE_CHANGED:
583
+ self._handle_state_changed(payload["output"]["state"])
584
+ logger.debug("service response change state: %s" % payload["output"]["state"])
585
+ elif response_event == RESPONSE_NAME_REQUEST_ACCEPTED:
586
+ self._handle_request_accepted()
587
+ elif response_event == RESPONSE_NAME_SPEECH_STARTED:
588
+ self._handle_speech_started()
589
+ elif response_event == RESPONSE_NAME_SPEECH_ENDED:
590
+ self._handle_speech_ended()
591
+ elif response_event == RESPONSE_NAME_RESPONDING_STARTED:
592
+ self._handle_responding_started()
593
+ elif response_event == RESPONSE_NAME_RESPONDING_ENDED:
594
+ self._handle_responding_ended(payload)
595
+ elif response_event == RESPONSE_NAME_SPEECH_CONTENT:
596
+ self._handle_speech_content(payload)
597
+ elif response_event == RESPONSE_NAME_RESPONDING_CONTENT:
598
+ self._handle_responding_content(payload)
599
+ elif response_event == RESPONSE_NAME_ERROR:
600
+ self._callback.on_error(json.dumps(response_json))
601
+ elif response_event == RESPONSE_NAME_HEART_BEAT:
602
+ logger.debug("Server response heart beat")
603
+ else:
604
+ logger.error("Unknown response name: {}", response_event)
605
+ except json.JSONDecodeError:
606
+ logger.error("Failed to parse message as JSON.")
607
+
608
+ def handle_binary_response(self, message: bytes):
609
+ # logger.debug('<<<recv binary {}'.format(len(message)))
610
+ self._callback.on_speech_audio_data(message)
611
+
612
+ def _handle_request_accepted(self):
613
+ self._callback.on_request_accepted()
614
+
615
+ def _handle_started(self, payload: dict):
616
+ self.dialog_id = payload["dialog_id"]
617
+ self._callback.on_started(self.dialog_id)
618
+
619
+ def _handle_stopped(self):
620
+ self._callback.on_stopped()
621
+ if self._close_callback is not None:
622
+ self._close_callback()
623
+
624
+ def _handle_state_changed(self, state: str):
625
+ """
626
+ 处理语音聊天状态流转.
627
+ :param state: 状态.
628
+ """
629
+ self.dialog_state.change_state(state)
630
+ self._callback.on_state_changed(self.dialog_state.get_current_state())
631
+
632
+ def _handle_speech_started(self):
633
+ self._callback.on_speech_started()
634
+
635
+ def _handle_speech_ended(self):
636
+ self._callback.on_speech_ended()
637
+
638
+ def _handle_responding_started(self):
639
+ self._callback.on_responding_started()
640
+
641
+ def _handle_responding_ended(self, payload: dict):
642
+ self._callback.on_responding_ended(payload)
643
+
644
+ def _handle_speech_content(self, payload: dict):
645
+ self._callback.on_speech_content(payload)
646
+
647
+ def _handle_responding_content(self, payload: dict):
648
+ self._callback.on_responding_content(payload)