dashscope 1.23.3__py3-none-any.whl → 1.23.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/aigc/image_synthesis.py +1 -1
- dashscope/audio/asr/vocabulary.py +8 -7
- dashscope/audio/tts_v2/enrollment.py +13 -12
- dashscope/audio/tts_v2/speech_synthesizer.py +6 -6
- dashscope/multimodal/__init__.py +0 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +27 -0
- dashscope/multimodal/multimodal_dialog.py +608 -0
- dashscope/multimodal/multimodal_request_params.py +269 -0
- dashscope/version.py +1 -1
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/METADATA +1 -1
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/RECORD +16 -11
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/LICENSE +0 -0
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/WHEEL +0 -0
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/entry_points.txt +0 -0
- {dashscope-1.23.3.dist-info → dashscope-1.23.5.dist-info}/top_level.txt +0 -0
|
@@ -171,7 +171,7 @@ class ImageSynthesis(BaseAsyncApi):
|
|
|
171
171
|
model, mask_image_url, api_key)
|
|
172
172
|
if is_upload:
|
|
173
173
|
has_upload = True
|
|
174
|
-
input['mask_image_url'] =
|
|
174
|
+
input['mask_image_url'] = res_mask_image_url
|
|
175
175
|
|
|
176
176
|
if base_image_url is not None and base_image_url:
|
|
177
177
|
is_upload, res_base_image_url = check_and_upload_local(
|
|
@@ -12,14 +12,15 @@ from dashscope.common.logging import logger
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class VocabularyServiceException(Exception):
|
|
15
|
-
def __init__(self, status_code: int, code: str,
|
|
15
|
+
def __init__(self, request_id: str, status_code: int, code: str,
|
|
16
16
|
error_message: str) -> None:
|
|
17
|
+
self._request_id = request_id
|
|
17
18
|
self._status_code = status_code
|
|
18
19
|
self._code = code
|
|
19
20
|
self._error_message = error_message
|
|
20
21
|
|
|
21
22
|
def __str__(self):
|
|
22
|
-
return f'Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
|
23
|
+
return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class VocabularyService(BaseApi):
|
|
@@ -86,7 +87,7 @@ class VocabularyService(BaseApi):
|
|
|
86
87
|
self._last_request_id = response.request_id
|
|
87
88
|
return response.output['vocabulary_id']
|
|
88
89
|
else:
|
|
89
|
-
raise VocabularyServiceException(response.status_code,
|
|
90
|
+
raise VocabularyServiceException(response.request_id, response.status_code,
|
|
90
91
|
response.code, response.message)
|
|
91
92
|
|
|
92
93
|
def list_vocabularies(self,
|
|
@@ -117,7 +118,7 @@ class VocabularyService(BaseApi):
|
|
|
117
118
|
self._last_request_id = response.request_id
|
|
118
119
|
return response.output['vocabulary_list']
|
|
119
120
|
else:
|
|
120
|
-
raise VocabularyServiceException(response.status_code,
|
|
121
|
+
raise VocabularyServiceException(response.request_id, response.status_code,
|
|
121
122
|
response.code, response.message)
|
|
122
123
|
|
|
123
124
|
def query_vocabulary(self, vocabulary_id: str) -> List[dict]:
|
|
@@ -134,7 +135,7 @@ class VocabularyService(BaseApi):
|
|
|
134
135
|
self._last_request_id = response.request_id
|
|
135
136
|
return response.output
|
|
136
137
|
else:
|
|
137
|
-
raise VocabularyServiceException(response.status_code,
|
|
138
|
+
raise VocabularyServiceException(response.request_id, response.status_code,
|
|
138
139
|
response.code, response.message)
|
|
139
140
|
|
|
140
141
|
def update_vocabulary(self, vocabulary_id: str,
|
|
@@ -153,7 +154,7 @@ class VocabularyService(BaseApi):
|
|
|
153
154
|
self._last_request_id = response.request_id
|
|
154
155
|
return
|
|
155
156
|
else:
|
|
156
|
-
raise VocabularyServiceException(response.status_code,
|
|
157
|
+
raise VocabularyServiceException(response.request_id, response.status_code,
|
|
157
158
|
response.code, response.message)
|
|
158
159
|
|
|
159
160
|
def delete_vocabulary(self, vocabulary_id: str) -> None:
|
|
@@ -169,7 +170,7 @@ class VocabularyService(BaseApi):
|
|
|
169
170
|
self._last_request_id = response.request_id
|
|
170
171
|
return
|
|
171
172
|
else:
|
|
172
|
-
raise VocabularyServiceException(response.status_code,
|
|
173
|
+
raise VocabularyServiceException(response.request_id, response.status_code,
|
|
173
174
|
response.code, response.message)
|
|
174
175
|
|
|
175
176
|
def get_last_request_id(self):
|
|
@@ -12,14 +12,15 @@ from dashscope.common.logging import logger
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class VoiceEnrollmentException(Exception):
|
|
15
|
-
def __init__(self, status_code: int, code: str,
|
|
15
|
+
def __init__(self, request_id: str, status_code: int, code: str,
|
|
16
16
|
error_message: str) -> None:
|
|
17
|
+
self._request_id = request_id
|
|
17
18
|
self._status_code = status_code
|
|
18
19
|
self._code = code
|
|
19
20
|
self._error_message = error_message
|
|
20
21
|
|
|
21
22
|
def __str__(self):
|
|
22
|
-
return f'Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
|
23
|
+
return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class VoiceEnrollmentService(BaseApi):
|
|
@@ -81,11 +82,11 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
81
82
|
'prefix': prefix,
|
|
82
83
|
'url': url,
|
|
83
84
|
}, )
|
|
85
|
+
self._last_request_id = response.request_id
|
|
84
86
|
if response.status_code == 200:
|
|
85
|
-
self._last_request_id = response.request_id
|
|
86
87
|
return response.output['voice_id']
|
|
87
88
|
else:
|
|
88
|
-
raise VoiceEnrollmentException(response.status_code, response.code,
|
|
89
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
89
90
|
response.message)
|
|
90
91
|
|
|
91
92
|
def list_voices(self,
|
|
@@ -111,11 +112,11 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
111
112
|
'page_index': page_index,
|
|
112
113
|
'page_size': page_size,
|
|
113
114
|
}, )
|
|
115
|
+
self._last_request_id = response.request_id
|
|
114
116
|
if response.status_code == 200:
|
|
115
|
-
self._last_request_id = response.request_id
|
|
116
117
|
return response.output['voice_list']
|
|
117
118
|
else:
|
|
118
|
-
raise VoiceEnrollmentException(response.status_code, response.code,
|
|
119
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
119
120
|
response.message)
|
|
120
121
|
|
|
121
122
|
def query_voice(self, voice_id: str) -> List[str]:
|
|
@@ -128,11 +129,11 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
128
129
|
'action': 'query_voice',
|
|
129
130
|
'voice_id': voice_id,
|
|
130
131
|
}, )
|
|
132
|
+
self._last_request_id = response.request_id
|
|
131
133
|
if response.status_code == 200:
|
|
132
|
-
self._last_request_id = response.request_id
|
|
133
134
|
return response.output
|
|
134
135
|
else:
|
|
135
|
-
raise VoiceEnrollmentException(response.status_code, response.code,
|
|
136
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
136
137
|
response.message)
|
|
137
138
|
|
|
138
139
|
def update_voice(self, voice_id: str, url: str) -> None:
|
|
@@ -146,11 +147,11 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
146
147
|
'voice_id': voice_id,
|
|
147
148
|
'url': url,
|
|
148
149
|
}, )
|
|
150
|
+
self._last_request_id = response.request_id
|
|
149
151
|
if response.status_code == 200:
|
|
150
|
-
self._last_request_id = response.request_id
|
|
151
152
|
return
|
|
152
153
|
else:
|
|
153
|
-
raise VoiceEnrollmentException(response.status_code, response.code,
|
|
154
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
154
155
|
response.message)
|
|
155
156
|
|
|
156
157
|
def delete_voice(self, voice_id: str) -> None:
|
|
@@ -162,11 +163,11 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
162
163
|
'action': 'delete_voice',
|
|
163
164
|
'voice_id': voice_id,
|
|
164
165
|
}, )
|
|
166
|
+
self._last_request_id = response.request_id
|
|
165
167
|
if response.status_code == 200:
|
|
166
|
-
self._last_request_id = response.request_id
|
|
167
168
|
return
|
|
168
169
|
else:
|
|
169
|
-
raise VoiceEnrollmentException(response.status_code, response.code,
|
|
170
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
170
171
|
response.message)
|
|
171
172
|
|
|
172
173
|
def get_last_request_id(self):
|
|
@@ -134,9 +134,7 @@ class Request:
|
|
|
134
134
|
'task_group': 'audio',
|
|
135
135
|
'task': 'tts',
|
|
136
136
|
'function': 'SpeechSynthesizer',
|
|
137
|
-
'input': {
|
|
138
|
-
'text': ''
|
|
139
|
-
},
|
|
137
|
+
'input': {},
|
|
140
138
|
'parameters': {
|
|
141
139
|
'voice': self.voice,
|
|
142
140
|
'volume': self.volume,
|
|
@@ -179,9 +177,7 @@ class Request:
|
|
|
179
177
|
'streaming': WebsocketStreamingMode.DUPLEX,
|
|
180
178
|
},
|
|
181
179
|
'payload': {
|
|
182
|
-
'input': {
|
|
183
|
-
'text': ''
|
|
184
|
-
},
|
|
180
|
+
'input': {},
|
|
185
181
|
},
|
|
186
182
|
}
|
|
187
183
|
return json.dumps(cmd)
|
|
@@ -514,6 +510,10 @@ class SpeechSynthesizer:
|
|
|
514
510
|
otherwise, it will wait indefinitely.
|
|
515
511
|
"""
|
|
516
512
|
# print('还不支持非流式语音合成sdk调用大模型,使用流式模拟')
|
|
513
|
+
if self.additional_params is None:
|
|
514
|
+
self.additional_params = {"enable_ssml":True}
|
|
515
|
+
else:
|
|
516
|
+
self.additional_params["enable_ssml"] = True
|
|
517
517
|
if not self.callback:
|
|
518
518
|
self.callback = ResultCallback()
|
|
519
519
|
self.__start_stream()
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# dialog_state.py
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DialogState(Enum):
|
|
7
|
+
"""
|
|
8
|
+
对话状态枚举类,定义了对话机器人可能处于的不同状态。
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
IDLE (str): 表示机器人处于空闲状态。
|
|
12
|
+
LISTENING (str): 表示机器人正在监听用户输入。
|
|
13
|
+
THINKING (str): 表示机器人正在思考。
|
|
14
|
+
RESPONDING (str): 表示机器人正在生成或回复中。
|
|
15
|
+
"""
|
|
16
|
+
IDLE = 'Idle'
|
|
17
|
+
LISTENING = 'Listening'
|
|
18
|
+
THINKING = 'Thinking'
|
|
19
|
+
RESPONDING = 'Responding'
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class StateMachine:
|
|
23
|
+
"""
|
|
24
|
+
状态机类,用于管理机器人的状态转换。
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
current_state (DialogState): 当前状态。
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
# 初始化状态机时设置初始状态为IDLE
|
|
32
|
+
self.current_state = DialogState.IDLE
|
|
33
|
+
|
|
34
|
+
def change_state(self, new_state: str) -> None:
|
|
35
|
+
"""
|
|
36
|
+
更改当前状态到指定的新状态。
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
new_state (str): 要切换到的新状态。
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: 如果尝试切换到一个无效的状态,则抛出此异常。
|
|
43
|
+
"""
|
|
44
|
+
if new_state in [state.value for state in DialogState]:
|
|
45
|
+
self.current_state = DialogState(new_state)
|
|
46
|
+
else:
|
|
47
|
+
raise ValueError("无效的状态类型")
|
|
48
|
+
|
|
49
|
+
def get_current_state(self) -> DialogState:
|
|
50
|
+
"""
|
|
51
|
+
获取当前状态。
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
DialogState: 当前状态。
|
|
55
|
+
"""
|
|
56
|
+
return self.current_state
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
# multimodal conversation request directive
|
|
5
|
+
|
|
6
|
+
class RequestToRespondType:
|
|
7
|
+
TRANSCRIPT = 'transcript'
|
|
8
|
+
PROMPT = 'prompt'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# multimodal conversation response directive
|
|
12
|
+
RESPONSE_NAME_TASK_STARTED = "task-started"
|
|
13
|
+
RESPONSE_NAME_RESULT_GENERATED = "result-generated"
|
|
14
|
+
RESPONSE_NAME_TASK_FINISHED = "task-finished"
|
|
15
|
+
|
|
16
|
+
RESPONSE_NAME_TASK_FAILED = "TaskFailed"
|
|
17
|
+
RESPONSE_NAME_STARTED = "Started"
|
|
18
|
+
RESPONSE_NAME_STOPPED = "Stopped"
|
|
19
|
+
RESPONSE_NAME_STATE_CHANGED = "DialogStateChanged"
|
|
20
|
+
RESPONSE_NAME_REQUEST_ACCEPTED = "RequestAccepted"
|
|
21
|
+
RESPONSE_NAME_SPEECH_STARTED = "SpeechStarted"
|
|
22
|
+
RESPONSE_NAME_SPEECH_ENDED = "SpeechEnded" # 服务端检测到asr语音尾点时下发此事件,可选事件
|
|
23
|
+
RESPONSE_NAME_RESPONDING_STARTED = "RespondingStarted" # AI语音应答开始,sdk要准备接收服务端下发的语音数据
|
|
24
|
+
RESPONSE_NAME_RESPONDING_ENDED = "RespondingEnded" # AI语音应答结束
|
|
25
|
+
RESPONSE_NAME_SPEECH_CONTENT = "SpeechContent" # 用户语音识别出的文本,流式全量输出
|
|
26
|
+
RESPONSE_NAME_RESPONDING_CONTENT = "RespondingContent" # 统对外输出的文本,流式全量输出
|
|
27
|
+
RESPONSE_NAME_ERROR = "Error" # 服务端对话中报错
|
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import platform
|
|
3
|
+
import time
|
|
4
|
+
import threading
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
|
|
7
|
+
import websocket
|
|
8
|
+
|
|
9
|
+
import dashscope
|
|
10
|
+
from dashscope.common.logging import logger
|
|
11
|
+
from dashscope.common.error import InputRequired
|
|
12
|
+
from dashscope.multimodal import dialog_state
|
|
13
|
+
from dashscope.multimodal.multimodal_constants import *
|
|
14
|
+
from dashscope.multimodal.multimodal_request_params import RequestParameters, get_random_uuid, DashHeader, \
|
|
15
|
+
RequestBodyInput, DashPayload, RequestToRespondParameters, RequestToRespondBodyInput
|
|
16
|
+
from dashscope.protocol.websocket import ActionType
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MultiModalCallback:
|
|
20
|
+
"""
|
|
21
|
+
语音聊天回调类,用于处理语音聊天过程中的各种事件。
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def on_started(self, dialog_id: str) -> None:
|
|
25
|
+
"""
|
|
26
|
+
通知对话开始
|
|
27
|
+
|
|
28
|
+
:param dialog_id: 回调对话ID
|
|
29
|
+
"""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def on_stopped(self) -> None:
|
|
33
|
+
"""
|
|
34
|
+
通知对话停止
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def on_state_changed(self, state: 'dialog_state.DialogState') -> None:
|
|
39
|
+
"""
|
|
40
|
+
对话状态改变
|
|
41
|
+
|
|
42
|
+
:param state: 新的对话状态
|
|
43
|
+
"""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
def on_speech_audio_data(self, data: bytes) -> None:
|
|
47
|
+
"""
|
|
48
|
+
合成音频数据回调
|
|
49
|
+
|
|
50
|
+
:param data: 音频数据
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def on_error(self, error) -> None:
|
|
55
|
+
"""
|
|
56
|
+
发生错误时调用此方法。
|
|
57
|
+
|
|
58
|
+
:param error: 错误信息
|
|
59
|
+
"""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def on_connected(self) -> None:
|
|
63
|
+
"""
|
|
64
|
+
成功连接到服务器后调用此方法。
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def on_responding_started(self):
|
|
69
|
+
"""
|
|
70
|
+
回复开始回调
|
|
71
|
+
"""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
def on_responding_ended(self, payload):
|
|
75
|
+
"""
|
|
76
|
+
回复结束
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
def on_speech_started(self):
|
|
81
|
+
"""
|
|
82
|
+
检测到语音输入结束
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def on_speech_ended(self):
|
|
87
|
+
"""
|
|
88
|
+
检测到语音输入结束
|
|
89
|
+
"""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def on_speech_content(self, payload):
|
|
93
|
+
"""
|
|
94
|
+
语音识别文本
|
|
95
|
+
|
|
96
|
+
:param payload: text
|
|
97
|
+
"""
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
def on_responding_content(self, payload):
|
|
101
|
+
"""
|
|
102
|
+
大模型回复文本。
|
|
103
|
+
|
|
104
|
+
:param payload: text
|
|
105
|
+
"""
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
def on_request_accepted(self):
|
|
109
|
+
"""
|
|
110
|
+
打断请求被接受。
|
|
111
|
+
"""
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
def on_close(self, close_status_code, close_msg):
|
|
115
|
+
"""
|
|
116
|
+
连接关闭时调用此方法。
|
|
117
|
+
|
|
118
|
+
:param close_status_code: 关闭状态码
|
|
119
|
+
:param close_msg: 关闭消息
|
|
120
|
+
"""
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class MultiModalDialog:
|
|
125
|
+
"""
|
|
126
|
+
用于管理WebSocket连接以进行语音聊天的服务类。
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(self,
|
|
130
|
+
workspace_id: str,
|
|
131
|
+
app_id: str,
|
|
132
|
+
request_params: RequestParameters,
|
|
133
|
+
multimodal_callback: MultiModalCallback,
|
|
134
|
+
url: str = None,
|
|
135
|
+
api_key: str = None,
|
|
136
|
+
dialog_id: str = None,
|
|
137
|
+
model: str = None
|
|
138
|
+
):
|
|
139
|
+
"""
|
|
140
|
+
创建一个语音对话会话。
|
|
141
|
+
|
|
142
|
+
此方法用于初始化一个新的voice_chat会话,设置必要的参数以准备开始与模型的交互。
|
|
143
|
+
:param workspace_id: 客户的workspace_id
|
|
144
|
+
:param app_id: 客户在管控台创建的应用id,可以根据值规律确定使用哪个对话系统
|
|
145
|
+
:param request_params: 请求参数集合
|
|
146
|
+
:param url: (str) API的URL地址。
|
|
147
|
+
:param multimodal_callback: (MultimodalCallback) 回调对象,用于处理来自服务器的消息。
|
|
148
|
+
:param api_key: (str) 应用程序接入的唯一key
|
|
149
|
+
:param dialog_id:对话id,如果传入表示承接上下文继续聊
|
|
150
|
+
:param model: 模型
|
|
151
|
+
"""
|
|
152
|
+
if request_params is None:
|
|
153
|
+
raise InputRequired('request_params is required!')
|
|
154
|
+
if url is None:
|
|
155
|
+
url = dashscope.base_websocket_api_url
|
|
156
|
+
if api_key is None:
|
|
157
|
+
api_key = dashscope.api_key
|
|
158
|
+
|
|
159
|
+
self.request_params = request_params
|
|
160
|
+
self.model = model
|
|
161
|
+
self._voice_detection = None
|
|
162
|
+
self.thread = None
|
|
163
|
+
self.ws = None
|
|
164
|
+
self.request = _Request()
|
|
165
|
+
self._callback = multimodal_callback
|
|
166
|
+
self.url = url
|
|
167
|
+
self.api_key = api_key
|
|
168
|
+
self.workspace_id = workspace_id
|
|
169
|
+
self.app_id = app_id
|
|
170
|
+
self.dialog_id = dialog_id
|
|
171
|
+
self.dialog_state = dialog_state.StateMachine()
|
|
172
|
+
self.response = _Response(self.dialog_state, self._callback, self.close) # 传递 self.close 作为回调
|
|
173
|
+
|
|
174
|
+
def _on_message(self, ws, message):
|
|
175
|
+
logger.debug(f"<<<<<<< Received message: {message}")
|
|
176
|
+
if isinstance(message, str):
|
|
177
|
+
self.response.handle_text_response(message)
|
|
178
|
+
elif isinstance(message, (bytes, bytearray)):
|
|
179
|
+
self.response.handle_binary_response(message)
|
|
180
|
+
|
|
181
|
+
def _on_error(self, ws, error):
|
|
182
|
+
logger.error(f"Error: {error}")
|
|
183
|
+
if self._callback:
|
|
184
|
+
self._callback.on_error(error)
|
|
185
|
+
|
|
186
|
+
def _on_close(self, ws, close_status_code, close_msg):
|
|
187
|
+
try:
|
|
188
|
+
logger.debug("WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
|
|
189
|
+
if close_status_code is None:
|
|
190
|
+
close_status_code = 1000
|
|
191
|
+
if close_msg is None:
|
|
192
|
+
close_msg = "websocket is closed"
|
|
193
|
+
self._callback.on_close(close_status_code, close_msg)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"Error: {e}")
|
|
196
|
+
|
|
197
|
+
def _on_open(self, ws):
|
|
198
|
+
self._callback.on_connected()
|
|
199
|
+
|
|
200
|
+
# def _on_pong(self, _):
|
|
201
|
+
# _log.debug("on pong")
|
|
202
|
+
|
|
203
|
+
def start(self, dialog_id, enable_voice_detection=False):
|
|
204
|
+
"""
|
|
205
|
+
初始化WebSocket连接并发送启动请求
|
|
206
|
+
:param dialog_id: 上下位继承标志位。新对话无需设置。
|
|
207
|
+
如果继承之前的对话历史,则需要记录之前的dialog_id并传入
|
|
208
|
+
:param enable_voice_detection: 是否开启语音检测,可选参数 默认False
|
|
209
|
+
"""
|
|
210
|
+
self._voice_detection = enable_voice_detection
|
|
211
|
+
self._connect(self.api_key)
|
|
212
|
+
logger.debug("connected with server.")
|
|
213
|
+
self._send_start_request(dialog_id, self.request_params)
|
|
214
|
+
|
|
215
|
+
def start_speech(self):
|
|
216
|
+
"""开始上传语音数据"""
|
|
217
|
+
_send_speech_json = self.request.generate_common_direction_request("SendSpeech", self.dialog_id)
|
|
218
|
+
self._send_text_frame(_send_speech_json)
|
|
219
|
+
|
|
220
|
+
def send_audio_data(self, speech_data: bytes):
|
|
221
|
+
"""发送语音数据"""
|
|
222
|
+
self.__send_binary_frame(speech_data)
|
|
223
|
+
|
|
224
|
+
def stop_speech(self):
|
|
225
|
+
"""停止上传语音数据"""
|
|
226
|
+
_send_speech_json = self.request.generate_common_direction_request("StopSpeech", self.dialog_id)
|
|
227
|
+
self._send_text_frame(_send_speech_json)
|
|
228
|
+
|
|
229
|
+
def interrupt(self):
|
|
230
|
+
"""请求服务端开始说话"""
|
|
231
|
+
_send_speech_json = self.request.generate_common_direction_request("RequestToSpeak", self.dialog_id)
|
|
232
|
+
self._send_text_frame(_send_speech_json)
|
|
233
|
+
|
|
234
|
+
def request_to_respond(self,
|
|
235
|
+
request_type: str,
|
|
236
|
+
text: str,
|
|
237
|
+
parameters: RequestToRespondParameters = None):
|
|
238
|
+
"""请求服务端直接文本合成语音"""
|
|
239
|
+
_send_speech_json = self.request.generate_request_to_response_json(direction_name="RequestToRespond",
|
|
240
|
+
dialog_id=self.dialog_id,
|
|
241
|
+
request_type=request_type, text=text,
|
|
242
|
+
parameters=parameters)
|
|
243
|
+
self._send_text_frame(_send_speech_json)
|
|
244
|
+
|
|
245
|
+
@abstractmethod
|
|
246
|
+
def request_to_respond_prompt(self, text):
|
|
247
|
+
"""请求服务端通过文本请求回复文本答复"""
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
def local_responding_started(self):
|
|
251
|
+
"""本地tts播放开始"""
|
|
252
|
+
_send_speech_json = self.request.generate_common_direction_request("LocalRespondingStarted", self.dialog_id)
|
|
253
|
+
self._send_text_frame(_send_speech_json)
|
|
254
|
+
|
|
255
|
+
def local_responding_ended(self):
|
|
256
|
+
"""本地tts播放结束"""
|
|
257
|
+
_send_speech_json = self.request.generate_common_direction_request("LocalRespondingEnded", self.dialog_id)
|
|
258
|
+
self._send_text_frame(_send_speech_json)
|
|
259
|
+
|
|
260
|
+
def stop(self):
|
|
261
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
262
|
+
self._callback.on_close(1001, "websocket is not connected")
|
|
263
|
+
return
|
|
264
|
+
_send_speech_json = self.request.generate_stop_request("Stop", self.dialog_id)
|
|
265
|
+
self._send_text_frame(_send_speech_json)
|
|
266
|
+
|
|
267
|
+
def get_dialog_state(self) -> dialog_state.DialogState:
|
|
268
|
+
return self.dialog_state.get_current_state()
|
|
269
|
+
|
|
270
|
+
def get_conversation_mode(self) -> str:
|
|
271
|
+
"""get mode of conversation: support tap2talk/push2talk/duplex"""
|
|
272
|
+
return self.request_params.upstream.mode
|
|
273
|
+
|
|
274
|
+
"""内部方法"""
|
|
275
|
+
|
|
276
|
+
def _send_start_request(self, dialog_id: str, request_params: RequestParameters):
|
|
277
|
+
"""发送'Start'请求"""
|
|
278
|
+
_start_json = self.request.generate_start_request(
|
|
279
|
+
workspace_id=self.workspace_id,
|
|
280
|
+
direction_name="Start",
|
|
281
|
+
dialog_id=dialog_id,
|
|
282
|
+
app_id=self.app_id,
|
|
283
|
+
request_params=request_params,
|
|
284
|
+
model=self.model
|
|
285
|
+
)
|
|
286
|
+
# send start request
|
|
287
|
+
self._send_text_frame(_start_json)
|
|
288
|
+
|
|
289
|
+
def _run_forever(self):
|
|
290
|
+
self.ws.run_forever(ping_interval=5, ping_timeout=4)
|
|
291
|
+
|
|
292
|
+
def _connect(self, api_key: str):
|
|
293
|
+
"""初始化WebSocket连接并发送启动请求。"""
|
|
294
|
+
self.ws = websocket.WebSocketApp(self.url, header=self.request.get_websocket_header(api_key),
|
|
295
|
+
on_open=self._on_open,
|
|
296
|
+
on_message=self._on_message,
|
|
297
|
+
on_error=self._on_error,
|
|
298
|
+
on_close=self._on_close)
|
|
299
|
+
self.thread = threading.Thread(target=self._run_forever)
|
|
300
|
+
self.ws.ping_interval = 3
|
|
301
|
+
self.thread.daemon = True
|
|
302
|
+
self.thread.start()
|
|
303
|
+
|
|
304
|
+
self._wait_for_connection()
|
|
305
|
+
|
|
306
|
+
def close(self):
|
|
307
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
308
|
+
return
|
|
309
|
+
self.ws.close()
|
|
310
|
+
|
|
311
|
+
def _wait_for_connection(self):
|
|
312
|
+
"""等待WebSocket连接建立"""
|
|
313
|
+
timeout = 5
|
|
314
|
+
start_time = time.time()
|
|
315
|
+
while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
|
|
316
|
+
time.sleep(0.1) # 短暂休眠,避免密集轮询
|
|
317
|
+
|
|
318
|
+
def _send_text_frame(self, text: str):
|
|
319
|
+
logger.info('>>>>>> send text frame : %s' % text)
|
|
320
|
+
self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
|
|
321
|
+
|
|
322
|
+
def __send_binary_frame(self, binary: bytes):
|
|
323
|
+
# _log.info('send binary frame length: %d' % len(binary))
|
|
324
|
+
self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
|
|
325
|
+
|
|
326
|
+
def __del__(self):
|
|
327
|
+
self.cleanup()
|
|
328
|
+
|
|
329
|
+
def cleanup(self):
|
|
330
|
+
"""清理所有资源"""
|
|
331
|
+
try:
|
|
332
|
+
if self.ws:
|
|
333
|
+
self.ws.close()
|
|
334
|
+
if self.thread and self.thread.is_alive():
|
|
335
|
+
# 设置标志位通知线程退出
|
|
336
|
+
self.thread.join(timeout=2)
|
|
337
|
+
# 清除引用
|
|
338
|
+
self.ws = None
|
|
339
|
+
self.thread = None
|
|
340
|
+
self._callback = None
|
|
341
|
+
self.response = None
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logger.error(f"Error in cleanup: {e}")
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class _Request:
|
|
347
|
+
def __init__(self):
|
|
348
|
+
# websocket header
|
|
349
|
+
self.ws_headers = None
|
|
350
|
+
# request body for voice chat
|
|
351
|
+
self.header = None
|
|
352
|
+
self.payload = None
|
|
353
|
+
# params
|
|
354
|
+
self.task_id = None
|
|
355
|
+
self.app_id = None
|
|
356
|
+
self.workspace_id = None
|
|
357
|
+
|
|
358
|
+
def get_websocket_header(self, api_key):
|
|
359
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
360
|
+
'1.18.0', # dashscope version
|
|
361
|
+
platform.python_version(),
|
|
362
|
+
platform.platform(),
|
|
363
|
+
platform.processor(),
|
|
364
|
+
)
|
|
365
|
+
self.ws_headers = {
|
|
366
|
+
"User-Agent": ua,
|
|
367
|
+
"Authorization": f"bearer {api_key}",
|
|
368
|
+
"Accept": "application/json"
|
|
369
|
+
}
|
|
370
|
+
logger.info('websocket header: {}'.format(self.ws_headers))
|
|
371
|
+
return self.ws_headers
|
|
372
|
+
|
|
373
|
+
def generate_start_request(self, direction_name: str,
|
|
374
|
+
dialog_id: str,
|
|
375
|
+
workspace_id: str,
|
|
376
|
+
app_id: str,
|
|
377
|
+
request_params: RequestParameters,
|
|
378
|
+
model: str = None
|
|
379
|
+
) -> str:
|
|
380
|
+
"""
|
|
381
|
+
构建语音聊天服务的启动请求数据.
|
|
382
|
+
:param app_id: 管控台应用id
|
|
383
|
+
:param request_params: start请求body中的parameters
|
|
384
|
+
:param direction_name:
|
|
385
|
+
:param dialog_id: 对话ID.
|
|
386
|
+
:param workspace_id: 管控台工作区id
|
|
387
|
+
:param model: 模型
|
|
388
|
+
:return: 启动请求字典.
|
|
389
|
+
"""
|
|
390
|
+
self._get_dash_request_header(ActionType.START)
|
|
391
|
+
self._get_dash_request_payload(direction_name, dialog_id, app_id, workspace_id=workspace_id,
|
|
392
|
+
request_params=request_params, model=model)
|
|
393
|
+
|
|
394
|
+
cmd = {
|
|
395
|
+
"header": self.header,
|
|
396
|
+
"payload": self.payload
|
|
397
|
+
}
|
|
398
|
+
return json.dumps(cmd)
|
|
399
|
+
|
|
400
|
+
def generate_common_direction_request(self, direction_name: str, dialog_id: str) -> str:
|
|
401
|
+
"""
|
|
402
|
+
构建语音聊天服务的命令请求数据.
|
|
403
|
+
:param direction_name: 命令.
|
|
404
|
+
:param dialog_id: 对话ID.
|
|
405
|
+
:return: 命令请求json.
|
|
406
|
+
"""
|
|
407
|
+
self._get_dash_request_header(ActionType.CONTINUE)
|
|
408
|
+
self._get_dash_request_payload(direction_name, dialog_id, self.app_id)
|
|
409
|
+
cmd = {
|
|
410
|
+
"header": self.header,
|
|
411
|
+
"payload": self.payload
|
|
412
|
+
}
|
|
413
|
+
return json.dumps(cmd)
|
|
414
|
+
|
|
415
|
+
def generate_stop_request(self, direction_name: str, dialog_id: str) -> str:
|
|
416
|
+
"""
|
|
417
|
+
构建语音聊天服务的启动请求数据.
|
|
418
|
+
:param direction_name:指令名称
|
|
419
|
+
:param dialog_id: 对话ID.
|
|
420
|
+
:return: 启动请求json.
|
|
421
|
+
"""
|
|
422
|
+
self._get_dash_request_header(ActionType.FINISHED)
|
|
423
|
+
self._get_dash_request_payload(direction_name, dialog_id, self.app_id)
|
|
424
|
+
|
|
425
|
+
cmd = {
|
|
426
|
+
"header": self.header,
|
|
427
|
+
"payload": self.payload
|
|
428
|
+
}
|
|
429
|
+
return json.dumps(cmd)
|
|
430
|
+
|
|
431
|
+
@abstractmethod
|
|
432
|
+
def generate_request_to_response_json(self, direction_name: str, dialog_id: str, request_type: str, text: str,
|
|
433
|
+
parameters: RequestToRespondParameters = None) -> str:
|
|
434
|
+
"""
|
|
435
|
+
构建语音聊天服务的命令请求数据.
|
|
436
|
+
:param direction_name: 命令.
|
|
437
|
+
:param dialog_id: 对话ID.
|
|
438
|
+
:param request_type: 服务应该采取的交互类型,transcript 表示直接把文本转语音,prompt 表示把文本送大模型回答
|
|
439
|
+
:param text: 文本.
|
|
440
|
+
:param parameters: 命令请求body中的parameters
|
|
441
|
+
:return: 命令请求字典.
|
|
442
|
+
"""
|
|
443
|
+
self._get_dash_request_header(ActionType.CONTINUE)
|
|
444
|
+
|
|
445
|
+
custom_input = RequestToRespondBodyInput(
|
|
446
|
+
app_id=self.app_id,
|
|
447
|
+
directive=direction_name,
|
|
448
|
+
dialog_id=dialog_id,
|
|
449
|
+
type_=request_type,
|
|
450
|
+
text=text
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
self._get_dash_request_payload(direction_name, dialog_id, self.app_id, request_params=parameters,
|
|
454
|
+
custom_input=custom_input)
|
|
455
|
+
cmd = {
|
|
456
|
+
"header": self.header,
|
|
457
|
+
"payload": self.payload
|
|
458
|
+
}
|
|
459
|
+
return json.dumps(cmd)
|
|
460
|
+
|
|
461
|
+
def _get_dash_request_header(self, action: str):
|
|
462
|
+
"""
|
|
463
|
+
构建多模对话请求的请求协议Header
|
|
464
|
+
:param action: ActionType 百炼协议action 支持:run-task, continue-task, finish-task
|
|
465
|
+
"""
|
|
466
|
+
if self.task_id is None:
|
|
467
|
+
self.task_id = get_random_uuid()
|
|
468
|
+
self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
|
|
469
|
+
|
|
470
|
+
def _get_dash_request_payload(self, direction_name: str,
|
|
471
|
+
dialog_id: str, app_id: str, workspace_id: str = None,
|
|
472
|
+
request_params: RequestParameters = None, custom_input=None, model: str = None):
|
|
473
|
+
"""
|
|
474
|
+
构建多模对话请求的请求协议payload
|
|
475
|
+
:param direction_name: 对话协议内部的指令名称
|
|
476
|
+
:param dialog_id: 对话ID.
|
|
477
|
+
:param app_id: 管控台应用id
|
|
478
|
+
:param request_params: start请求body中的parameters
|
|
479
|
+
:param custom_input: 自定义输入
|
|
480
|
+
:param model: 模型
|
|
481
|
+
"""
|
|
482
|
+
if custom_input is not None:
|
|
483
|
+
input = custom_input
|
|
484
|
+
else:
|
|
485
|
+
input = RequestBodyInput(
|
|
486
|
+
workspace_id=workspace_id,
|
|
487
|
+
app_id=app_id,
|
|
488
|
+
directive=direction_name,
|
|
489
|
+
dialog_id=dialog_id
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
self.payload = DashPayload(
|
|
493
|
+
model=model,
|
|
494
|
+
input=input,
|
|
495
|
+
parameters=request_params
|
|
496
|
+
).to_dict()
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
class _Response:
|
|
500
|
+
def __init__(self, state: dialog_state.StateMachine, callback: MultiModalCallback, close_callback=None):
|
|
501
|
+
super().__init__()
|
|
502
|
+
self.dialog_id = None # 对话ID.
|
|
503
|
+
self.dialog_state = state
|
|
504
|
+
self._callback = callback
|
|
505
|
+
self._close_callback = close_callback # 保存关闭回调函数
|
|
506
|
+
|
|
507
|
+
def handle_text_response(self, response_json: str):
|
|
508
|
+
"""
|
|
509
|
+
处理语音聊天服务的响应数据.
|
|
510
|
+
:param response_json: 从服务接收到的原始JSON字符串响应。
|
|
511
|
+
"""
|
|
512
|
+
logger.info("<<<<<< server response: %s" % response_json)
|
|
513
|
+
try:
|
|
514
|
+
# 尝试将消息解析为JSON
|
|
515
|
+
json_data = json.loads(response_json)
|
|
516
|
+
if "status_code" in json_data["header"] and json_data["header"]["status_code"] != 200:
|
|
517
|
+
logger.error("Server returned invalid message: %s" % response_json)
|
|
518
|
+
if self._callback:
|
|
519
|
+
self._callback.on_error(response_json)
|
|
520
|
+
return
|
|
521
|
+
if "event" in json_data["header"] and json_data["header"]["event"] == "task-failed":
|
|
522
|
+
logger.error("Server returned invalid message: %s" % response_json)
|
|
523
|
+
if self._callback:
|
|
524
|
+
self._callback.on_error(response_json)
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
payload = json_data["payload"]
|
|
528
|
+
if "output" in payload and payload["output"] is not None:
|
|
529
|
+
response_event = payload["output"]["event"]
|
|
530
|
+
logger.info("Server response event: %s" % response_event)
|
|
531
|
+
self._handle_text_response_in_conversation(response_event=response_event, response_json=json_data)
|
|
532
|
+
del json_data
|
|
533
|
+
|
|
534
|
+
except json.JSONDecodeError:
|
|
535
|
+
logger.error("Failed to parse message as JSON.")
|
|
536
|
+
|
|
537
|
+
def _handle_text_response_in_conversation(self, response_event: str, response_json: dict):
|
|
538
|
+
payload = response_json["payload"]
|
|
539
|
+
try:
|
|
540
|
+
if response_event == RESPONSE_NAME_STARTED:
|
|
541
|
+
self._handle_started(payload["output"])
|
|
542
|
+
elif response_event == RESPONSE_NAME_STOPPED:
|
|
543
|
+
self._handle_stopped()
|
|
544
|
+
elif response_event == RESPONSE_NAME_STATE_CHANGED:
|
|
545
|
+
self._handle_state_changed(payload["output"]["state"])
|
|
546
|
+
logger.debug("service response change state: %s" % payload["output"]["state"])
|
|
547
|
+
elif response_event == RESPONSE_NAME_REQUEST_ACCEPTED:
|
|
548
|
+
self._handle_request_accepted()
|
|
549
|
+
elif response_event == RESPONSE_NAME_SPEECH_STARTED:
|
|
550
|
+
self._handle_speech_started()
|
|
551
|
+
elif response_event == RESPONSE_NAME_SPEECH_ENDED:
|
|
552
|
+
self._handle_speech_ended()
|
|
553
|
+
elif response_event == RESPONSE_NAME_RESPONDING_STARTED:
|
|
554
|
+
self._handle_responding_started()
|
|
555
|
+
elif response_event == RESPONSE_NAME_RESPONDING_ENDED:
|
|
556
|
+
self._handle_responding_ended(payload)
|
|
557
|
+
elif response_event == RESPONSE_NAME_SPEECH_CONTENT:
|
|
558
|
+
self._handle_speech_content(payload)
|
|
559
|
+
elif response_event == RESPONSE_NAME_RESPONDING_CONTENT:
|
|
560
|
+
self._handle_responding_content(payload)
|
|
561
|
+
elif response_event == RESPONSE_NAME_ERROR:
|
|
562
|
+
self._callback.on_error(json.dumps(response_json))
|
|
563
|
+
else:
|
|
564
|
+
logger.error("Unknown response name: {}", response_event)
|
|
565
|
+
except json.JSONDecodeError:
|
|
566
|
+
logger.error("Failed to parse message as JSON.")
|
|
567
|
+
|
|
568
|
+
def handle_binary_response(self, message: bytes):
|
|
569
|
+
# logger.debug('<<<recv binary {}'.format(len(message)))
|
|
570
|
+
self._callback.on_speech_audio_data(message)
|
|
571
|
+
|
|
572
|
+
def _handle_request_accepted(self):
|
|
573
|
+
self._callback.on_request_accepted()
|
|
574
|
+
|
|
575
|
+
def _handle_started(self, payload: dict):
|
|
576
|
+
self.dialog_id = payload["dialog_id"]
|
|
577
|
+
self._callback.on_started(self.dialog_id)
|
|
578
|
+
|
|
579
|
+
def _handle_stopped(self):
|
|
580
|
+
self._callback.on_stopped()
|
|
581
|
+
if self._close_callback is not None:
|
|
582
|
+
self._close_callback()
|
|
583
|
+
|
|
584
|
+
def _handle_state_changed(self, state: str):
|
|
585
|
+
"""
|
|
586
|
+
处理语音聊天状态流转.
|
|
587
|
+
:param state: 状态.
|
|
588
|
+
"""
|
|
589
|
+
self.dialog_state.change_state(state)
|
|
590
|
+
self._callback.on_state_changed(self.dialog_state.get_current_state())
|
|
591
|
+
|
|
592
|
+
def _handle_speech_started(self):
|
|
593
|
+
self._callback.on_speech_started()
|
|
594
|
+
|
|
595
|
+
def _handle_speech_ended(self):
|
|
596
|
+
self._callback.on_speech_ended()
|
|
597
|
+
|
|
598
|
+
def _handle_responding_started(self):
|
|
599
|
+
self._callback.on_responding_started()
|
|
600
|
+
|
|
601
|
+
def _handle_responding_ended(self, payload: dict):
|
|
602
|
+
self._callback.on_responding_ended(payload)
|
|
603
|
+
|
|
604
|
+
def _handle_speech_content(self, payload: dict):
|
|
605
|
+
self._callback.on_speech_content(payload)
|
|
606
|
+
|
|
607
|
+
def _handle_responding_content(self, payload: dict):
|
|
608
|
+
self._callback.on_responding_content(payload)
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
from dataclasses import dataclass, field, asdict
|
|
2
|
+
import uuid
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_random_uuid() -> str:
|
|
6
|
+
"""生成并返回32位UUID字符串"""
|
|
7
|
+
return uuid.uuid4().hex
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class DashHeader:
|
|
12
|
+
action: str
|
|
13
|
+
task_id: str = field(default=get_random_uuid())
|
|
14
|
+
streaming: str = field(default="duplex") # 默认为 duplex
|
|
15
|
+
|
|
16
|
+
def to_dict(self):
|
|
17
|
+
return {
|
|
18
|
+
"action": self.action,
|
|
19
|
+
"task_id": self.task_id,
|
|
20
|
+
"request_id": self.task_id,
|
|
21
|
+
"streaming": self.streaming
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DashPayloadParameters:
|
|
26
|
+
def to_dict(self):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DashPayloadInput:
|
|
31
|
+
def to_dict(self):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DashPayload:
|
|
37
|
+
task_group: str = field(default="aigc")
|
|
38
|
+
function: str = field(default="generation")
|
|
39
|
+
model: str = field(default="")
|
|
40
|
+
task: str = field(default="multimodal-generation")
|
|
41
|
+
parameters: DashPayloadParameters = field(default=None)
|
|
42
|
+
input: DashPayloadInput = field(default=None)
|
|
43
|
+
|
|
44
|
+
def to_dict(self):
|
|
45
|
+
payload = {
|
|
46
|
+
"task_group": self.task_group,
|
|
47
|
+
"function": self.function,
|
|
48
|
+
"model": self.model,
|
|
49
|
+
"task": self.task,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if self.parameters is not None:
|
|
53
|
+
payload["parameters"] = self.parameters.to_dict()
|
|
54
|
+
|
|
55
|
+
if self.input is not None:
|
|
56
|
+
payload["input"] = self.input.to_dict()
|
|
57
|
+
|
|
58
|
+
return payload
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class RequestBodyInput(DashPayloadInput):
|
|
63
|
+
workspace_id: str
|
|
64
|
+
app_id: str
|
|
65
|
+
directive: str
|
|
66
|
+
dialog_id: str
|
|
67
|
+
|
|
68
|
+
def to_dict(self):
|
|
69
|
+
return {
|
|
70
|
+
"workspace_id": self.workspace_id,
|
|
71
|
+
"app_id": self.app_id,
|
|
72
|
+
"directive": self.directive,
|
|
73
|
+
"dialog_id": self.dialog_id
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class Upstream:
|
|
79
|
+
"""struct for upstream"""
|
|
80
|
+
audio_format: str = field(default="pcm") # 上行语音格式,默认pcm.支持pcm/opus
|
|
81
|
+
type: str = field(default="AudioOnly") # 上行类型:AudioOnly 仅语音通话; AudioAndVideo 上传视频
|
|
82
|
+
mode: str = field(default="tap2talk") # 客户端交互模式 push2talk/tap2talk/duplex
|
|
83
|
+
# sample_rate: int # 合成音频采样率
|
|
84
|
+
|
|
85
|
+
def to_dict(self):
|
|
86
|
+
return {
|
|
87
|
+
"type": self.type,
|
|
88
|
+
"mode": self.mode,
|
|
89
|
+
"audio_format": self.audio_format,
|
|
90
|
+
# "sample_rate": self.sample_rate
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class Downstream:
|
|
96
|
+
# transcript 返回用户语音识别结果
|
|
97
|
+
# dialog 返回对话系统回答中间结果
|
|
98
|
+
# 可以设置多种,以逗号分割,默认为transcript
|
|
99
|
+
voice: str = field(default="") # 语音音色
|
|
100
|
+
sample_rate: int = field(default=0) # 语音音色 # 合成音频采样率
|
|
101
|
+
intermediate_text: str = field(default="transcript") # 控制返回给用户那些中间文本:
|
|
102
|
+
debug: bool = field(default=False) # 控制是否返回debug信息
|
|
103
|
+
# type_: str = field(default="Audio", metadata={"alias": "type"}) # 下行类型:Text:不需要下发语音;Audio:输出语音,默认值
|
|
104
|
+
audio_format: str = field(default="pcm") # 下行语音格式,默认pcm 。支持pcm/mp3
|
|
105
|
+
volume: int = field(default=50) # 语音音量 0-100
|
|
106
|
+
pitch_rate: int = field(default=100) # 语音语调 50-200
|
|
107
|
+
speech_rate: int = field(default=100) # 语音语速 50-200
|
|
108
|
+
|
|
109
|
+
def to_dict(self):
|
|
110
|
+
stream: dict = {
|
|
111
|
+
"intermediate_text": self.intermediate_text,
|
|
112
|
+
"debug": self.debug,
|
|
113
|
+
# "type": self.type_,
|
|
114
|
+
"audio_format": self.audio_format,
|
|
115
|
+
"volume": self.volume,
|
|
116
|
+
"pitch_rate": self.pitch_rate,
|
|
117
|
+
"speech_rate": self.speech_rate
|
|
118
|
+
}
|
|
119
|
+
if self.voice != "":
|
|
120
|
+
stream["voice"] = self.voice
|
|
121
|
+
if self.sample_rate != 0:
|
|
122
|
+
stream["sample_rate"] = self.sample_rate
|
|
123
|
+
return stream
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class DialogAttributes:
|
|
128
|
+
agent_id: str = field(default=None)
|
|
129
|
+
prompt: str = field(default=None)
|
|
130
|
+
vocabulary_id: str = field(default=None)
|
|
131
|
+
|
|
132
|
+
def to_dict(self):
|
|
133
|
+
return {
|
|
134
|
+
"agent_id": self.agent_id,
|
|
135
|
+
"prompt": self.prompt,
|
|
136
|
+
"vocabulary_id": self.vocabulary_id
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@dataclass
|
|
141
|
+
class Locations:
|
|
142
|
+
city_name: str = field(default=None)
|
|
143
|
+
latitude: str = field(default=None)
|
|
144
|
+
longitude: str = field(default=None)
|
|
145
|
+
|
|
146
|
+
def to_dict(self):
|
|
147
|
+
return {
|
|
148
|
+
"city_name": self.city_name,
|
|
149
|
+
"latitude": self.latitude,
|
|
150
|
+
"longitude": self.longitude
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class Network:
|
|
156
|
+
ip: str = field(default=None)
|
|
157
|
+
|
|
158
|
+
def to_dict(self):
|
|
159
|
+
return {
|
|
160
|
+
"ip": self.ip
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass
|
|
165
|
+
class Device:
|
|
166
|
+
uuid: str = field(default=None)
|
|
167
|
+
|
|
168
|
+
def to_dict(self):
|
|
169
|
+
return {
|
|
170
|
+
"uuid": self.uuid
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class ClientInfo:
|
|
176
|
+
user_id: str
|
|
177
|
+
device: Device = field(default=None)
|
|
178
|
+
network: Network = field(default=None)
|
|
179
|
+
location: Locations = field(default=None)
|
|
180
|
+
|
|
181
|
+
def to_dict(self):
|
|
182
|
+
info = {
|
|
183
|
+
"user_id": self.user_id
|
|
184
|
+
}
|
|
185
|
+
if self.device is not None:
|
|
186
|
+
info["device"] = self.device.to_dict()
|
|
187
|
+
if self.network is not None:
|
|
188
|
+
info["network"] = self.network.to_dict()
|
|
189
|
+
if self.location is not None:
|
|
190
|
+
info["location"] = self.location.to_dict()
|
|
191
|
+
return info
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class BizParams:
|
|
196
|
+
user_defined_params: dict = field(default=None)
|
|
197
|
+
user_defined_tokens: dict = field(default=None)
|
|
198
|
+
tool_prompts: dict = field(default=None)
|
|
199
|
+
user_prompt_params: dict = field(default=None)
|
|
200
|
+
user_query_params: dict = field(default=None)
|
|
201
|
+
|
|
202
|
+
def to_dict(self):
|
|
203
|
+
params = {}
|
|
204
|
+
if self.user_defined_params is not None:
|
|
205
|
+
params["user_defined_params"] = self.user_defined_params
|
|
206
|
+
if self.user_defined_tokens is not None:
|
|
207
|
+
params["user_defined_tokens"] = self.user_defined_tokens
|
|
208
|
+
if self.tool_prompts is not None:
|
|
209
|
+
params["tool_prompts"] = self.tool_prompts
|
|
210
|
+
if self.user_prompt_params is not None:
|
|
211
|
+
params["user_prompt_params"] = self.user_prompt_params
|
|
212
|
+
if self.user_query_params is not None:
|
|
213
|
+
params["user_query_params"] = self.user_query_params
|
|
214
|
+
return params
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class RequestParameters(DashPayloadParameters):
|
|
219
|
+
upstream: Upstream
|
|
220
|
+
downstream: Downstream
|
|
221
|
+
client_info: ClientInfo
|
|
222
|
+
dialog_attributes: DialogAttributes = field(default=None)
|
|
223
|
+
biz_params: BizParams = field(default=None)
|
|
224
|
+
|
|
225
|
+
def to_dict(self):
|
|
226
|
+
params = {
|
|
227
|
+
"upstream": self.upstream.to_dict(),
|
|
228
|
+
"downstream": self.downstream.to_dict(),
|
|
229
|
+
"client_info": self.client_info.to_dict(),
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if self.dialog_attributes is not None:
|
|
233
|
+
params["dialog_attributes"] = self.dialog_attributes.to_dict()
|
|
234
|
+
if self.biz_params is not None:
|
|
235
|
+
params["biz_params"] = self.biz_params.to_dict()
|
|
236
|
+
return params
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
@dataclass
|
|
240
|
+
class RequestToRespondParameters(DashPayloadParameters):
|
|
241
|
+
images: list = field(default=None)
|
|
242
|
+
biz_params: BizParams = field(default=None)
|
|
243
|
+
|
|
244
|
+
def to_dict(self):
|
|
245
|
+
params = {
|
|
246
|
+
}
|
|
247
|
+
if self.images is not None:
|
|
248
|
+
params["images"] = self.images
|
|
249
|
+
if self.biz_params is not None:
|
|
250
|
+
params["biz_params"] = self.biz_params.to_dict()
|
|
251
|
+
return params
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@dataclass
|
|
255
|
+
class RequestToRespondBodyInput(DashPayloadInput):
|
|
256
|
+
app_id: str
|
|
257
|
+
directive: str
|
|
258
|
+
dialog_id: str
|
|
259
|
+
type_: str = field(metadata={"alias": "type"})
|
|
260
|
+
text: str = field(default="")
|
|
261
|
+
|
|
262
|
+
def to_dict(self):
|
|
263
|
+
return {
|
|
264
|
+
"app_id": self.app_id,
|
|
265
|
+
"directive": self.directive,
|
|
266
|
+
"dialog_id": self.dialog_id,
|
|
267
|
+
"type": self.type_,
|
|
268
|
+
"text": self.text
|
|
269
|
+
}
|
dashscope/version.py
CHANGED
|
@@ -3,13 +3,13 @@ dashscope/cli.py,sha256=amegoTkGOs6TlHMdoo4JVOqBePo3lGs745rc7leEyrE,24020
|
|
|
3
3
|
dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
|
|
4
4
|
dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
|
|
5
5
|
dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
|
|
6
|
-
dashscope/version.py,sha256=
|
|
6
|
+
dashscope/version.py,sha256=AEebMkhuRmXDos0SQUqvNS4ui-KBHqJSwnTbSjynlLU,74
|
|
7
7
|
dashscope/aigc/__init__.py,sha256=AuRhu_vA1K0tbs_C6DgcZYhTvxMuzDgpwHJNHzEPIHg,442
|
|
8
8
|
dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
|
|
9
9
|
dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
|
|
10
10
|
dashscope/aigc/conversation.py,sha256=95xEEY4ThZJysj5zy3aMw7ql9KLJVfD_1iHv9QZ17Ew,14282
|
|
11
11
|
dashscope/aigc/generation.py,sha256=xMcMu16rICTdjZiD_sPqYV_Ltdp4ewGzzfC7JD9VApY,17948
|
|
12
|
-
dashscope/aigc/image_synthesis.py,sha256=
|
|
12
|
+
dashscope/aigc/image_synthesis.py,sha256=Jgqmyv4jRxikgX7J18QrKKQ4OZAMxs6Q6YXObae3DhI,13363
|
|
13
13
|
dashscope/aigc/multimodal_conversation.py,sha256=1rZZRk_1lCdbVs7Rx1kJ5LvwWE1put5p_dQKdCX0ysY,5574
|
|
14
14
|
dashscope/aigc/video_synthesis.py,sha256=XQ3-NKYFmj5cIbUbLTbI0-FyC_fQp8eds6QmD1ZHj_0,13015
|
|
15
15
|
dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -34,14 +34,14 @@ dashscope/audio/asr/asr_phrase_manager.py,sha256=vHOLExaKCtjedkihIu7gyfQyarR9rN5
|
|
|
34
34
|
dashscope/audio/asr/recognition.py,sha256=b_aAPvOKjpWdSiYhM_hp30sZ06QdmNBSDJwhiv78kHM,20932
|
|
35
35
|
dashscope/audio/asr/transcription.py,sha256=lYzPjh7jJQwjMoxx8-AY0YCMBKNKO0bi7xd5tZGSHPc,9094
|
|
36
36
|
dashscope/audio/asr/translation_recognizer.py,sha256=JgBmhkIl_kqH8uVwop6Fba5KlXccftKFrhaygN9PKjU,39680
|
|
37
|
-
dashscope/audio/asr/vocabulary.py,sha256=
|
|
37
|
+
dashscope/audio/asr/vocabulary.py,sha256=N0pMS2x1lDxqJ14FgTGKctfuVkR2_hlEsCNWFcgYpTY,6717
|
|
38
38
|
dashscope/audio/qwen_tts/__init__.py,sha256=JS3axY1grqO0aTIJufZ3KS1JsU6yf6y4K2CQlNvUK9I,132
|
|
39
39
|
dashscope/audio/qwen_tts/speech_synthesizer.py,sha256=7LHR-PXhn-VE1cCOp_82Jq0zE9rMc3xy3dszUeyLLNs,2927
|
|
40
40
|
dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO0I,245
|
|
41
41
|
dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
|
|
42
42
|
dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
|
|
43
|
-
dashscope/audio/tts_v2/enrollment.py,sha256
|
|
44
|
-
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=
|
|
43
|
+
dashscope/audio/tts_v2/enrollment.py,sha256=-nrlywYSOP73Bm9ETTSxNnlp-B8ezJcUmd59mVvyvgk,6361
|
|
44
|
+
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=qUoLga8HpvNVdbN5n_orxrgZ28yD6Lhwuwqeoi1T7yA,20056
|
|
45
45
|
dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
46
|
dashscope/client/base_api.py,sha256=aWNy_xm02GXuLKVgWnYJht2nI4ZHSGfYIcr52SML15A,41239
|
|
47
47
|
dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -64,6 +64,11 @@ dashscope/embeddings/multimodal_embedding.py,sha256=NwjQsdkKgUz51ozGjqFDzVlLcZjY
|
|
|
64
64
|
dashscope/embeddings/text_embedding.py,sha256=4W1V-Ggj0WJhY5MdP2xoUTteXlWk8TUtI6y2gRUVCUk,2060
|
|
65
65
|
dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
|
|
67
|
+
dashscope/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
+
dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
|
|
69
|
+
dashscope/multimodal/multimodal_constants.py,sha256=MlKJkOIGoEzp3c3jXzeEE4zw0oNHv4HkEQeJGx7kLaM,1228
|
|
70
|
+
dashscope/multimodal/multimodal_dialog.py,sha256=21yIQyrRzRcfDSbo0IMcEu-KYMji70cEH1p3NLsSR-Q,22372
|
|
71
|
+
dashscope/multimodal/multimodal_request_params.py,sha256=chFDxPvbKcEyLEkVdsBFWUChsHaKnBAvPcG-IOjVRq4,7775
|
|
67
72
|
dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
73
|
dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
|
|
69
74
|
dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -87,9 +92,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
|
|
|
87
92
|
dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
|
|
88
93
|
dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
94
|
dashscope/utils/oss_utils.py,sha256=L5LN3lN8etVxSL_jkZydstvEKpnTG9CY0zcvPGQ5LBo,7383
|
|
90
|
-
dashscope-1.23.
|
|
91
|
-
dashscope-1.23.
|
|
92
|
-
dashscope-1.23.
|
|
93
|
-
dashscope-1.23.
|
|
94
|
-
dashscope-1.23.
|
|
95
|
-
dashscope-1.23.
|
|
95
|
+
dashscope-1.23.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
96
|
+
dashscope-1.23.5.dist-info/METADATA,sha256=tERMxFGxdS3EOMhTiNl3jaQCAf-k40aUNJXZsMERc5s,6798
|
|
97
|
+
dashscope-1.23.5.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
98
|
+
dashscope-1.23.5.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
|
|
99
|
+
dashscope-1.23.5.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
|
|
100
|
+
dashscope-1.23.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|