dashscope 1.24.3__py3-none-any.whl → 1.24.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/__init__.py +2 -1
- dashscope/aigc/__init__.py +6 -4
- dashscope/aigc/multimodal_conversation.py +41 -20
- dashscope/api_entities/dashscope_response.py +31 -3
- dashscope/audio/tts_v2/speech_synthesizer.py +30 -0
- dashscope/embeddings/multimodal_embedding.py +70 -1
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/version.py +1 -1
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/METADATA +1 -1
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/RECORD +16 -15
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/WHEEL +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/entry_points.txt +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/licenses/LICENSE +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/top_level.txt +0 -0
dashscope/__init__.py
CHANGED
|
@@ -24,7 +24,7 @@ from dashscope.embeddings.batch_text_embedding_response import \
|
|
|
24
24
|
BatchTextEmbeddingResponse
|
|
25
25
|
from dashscope.embeddings.multimodal_embedding import (
|
|
26
26
|
MultiModalEmbedding, MultiModalEmbeddingItemAudio,
|
|
27
|
-
MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText)
|
|
27
|
+
MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText, AioMultiModalEmbedding)
|
|
28
28
|
from dashscope.embeddings.text_embedding import TextEmbedding
|
|
29
29
|
from dashscope.files import Files
|
|
30
30
|
from dashscope.models import Models
|
|
@@ -55,6 +55,7 @@ __all__ = [
|
|
|
55
55
|
Models,
|
|
56
56
|
TextEmbedding,
|
|
57
57
|
MultiModalEmbedding,
|
|
58
|
+
AioMultiModalEmbedding,
|
|
58
59
|
MultiModalEmbeddingItemAudio,
|
|
59
60
|
MultiModalEmbeddingItemImage,
|
|
60
61
|
MultiModalEmbeddingItemText,
|
dashscope/aigc/__init__.py
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
-
|
|
3
2
|
from .conversation import Conversation, History, HistoryItem
|
|
4
|
-
from .generation import Generation
|
|
5
|
-
from .image_synthesis import ImageSynthesis
|
|
3
|
+
from .generation import Generation, AioGeneration
|
|
4
|
+
from .image_synthesis import ImageSynthesis, AioImageSynthesis
|
|
6
5
|
from .multimodal_conversation import MultiModalConversation, AioMultiModalConversation
|
|
7
|
-
from .video_synthesis import VideoSynthesis
|
|
6
|
+
from .video_synthesis import VideoSynthesis, AioVideoSynthesis
|
|
8
7
|
|
|
9
8
|
__all__ = [
|
|
10
9
|
Generation,
|
|
10
|
+
AioGeneration,
|
|
11
11
|
Conversation,
|
|
12
12
|
HistoryItem,
|
|
13
13
|
History,
|
|
14
14
|
ImageSynthesis,
|
|
15
|
+
AioImageSynthesis,
|
|
15
16
|
MultiModalConversation,
|
|
16
17
|
AioMultiModalConversation,
|
|
17
18
|
VideoSynthesis,
|
|
19
|
+
AioVideoSynthesis,
|
|
18
20
|
]
|
|
@@ -24,9 +24,10 @@ class MultiModalConversation(BaseApi):
|
|
|
24
24
|
def call(
|
|
25
25
|
cls,
|
|
26
26
|
model: str,
|
|
27
|
-
messages: List,
|
|
27
|
+
messages: List = None,
|
|
28
28
|
api_key: str = None,
|
|
29
29
|
workspace: str = None,
|
|
30
|
+
text: str = None,
|
|
30
31
|
**kwargs
|
|
31
32
|
) -> Union[MultiModalConversationResponse, Generator[
|
|
32
33
|
MultiModalConversationResponse, None, None]]:
|
|
@@ -55,6 +56,7 @@ class MultiModalConversation(BaseApi):
|
|
|
55
56
|
if None, will retrieve by rule [1].
|
|
56
57
|
[1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
|
|
57
58
|
workspace (str): The dashscope workspace id.
|
|
59
|
+
text (str): The text to generate.
|
|
58
60
|
**kwargs:
|
|
59
61
|
stream(bool, `optional`): Enable server-sent events
|
|
60
62
|
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
|
|
@@ -68,8 +70,11 @@ class MultiModalConversation(BaseApi):
|
|
|
68
70
|
tokens with top_p probability mass. So 0.1 means only
|
|
69
71
|
the tokens comprising the top 10% probability mass are
|
|
70
72
|
considered[qwen-turbo,bailian-v1].
|
|
73
|
+
voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
|
|
74
|
+
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
|
|
71
75
|
top_k(float, `optional`):
|
|
72
76
|
|
|
77
|
+
|
|
73
78
|
Raises:
|
|
74
79
|
InvalidInput: The history and auto_history are mutually exclusive.
|
|
75
80
|
|
|
@@ -78,18 +83,24 @@ class MultiModalConversation(BaseApi):
|
|
|
78
83
|
Generator[MultiModalConversationResponse, None, None]]: If
|
|
79
84
|
stream is True, return Generator, otherwise MultiModalConversationResponse.
|
|
80
85
|
"""
|
|
81
|
-
if (messages is None or not messages):
|
|
82
|
-
raise InputRequired('prompt or messages is required!')
|
|
83
86
|
if model is None or not model:
|
|
84
87
|
raise ModelRequired('Model is required!')
|
|
85
88
|
task_group, _ = _get_task_group_and_task(__name__)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
input = {}
|
|
90
|
+
msg_copy = None
|
|
91
|
+
|
|
92
|
+
if messages is not None and messages:
|
|
93
|
+
msg_copy = copy.deepcopy(messages)
|
|
94
|
+
has_upload = cls._preprocess_messages(model, msg_copy, api_key)
|
|
95
|
+
if has_upload:
|
|
96
|
+
headers = kwargs.pop('headers', {})
|
|
97
|
+
headers['X-DashScope-OssResourceResolve'] = 'enable'
|
|
98
|
+
kwargs['headers'] = headers
|
|
99
|
+
|
|
100
|
+
if text is not None and text:
|
|
101
|
+
input.update({'text': text})
|
|
102
|
+
if msg_copy is not None:
|
|
103
|
+
input.update({'messages': msg_copy})
|
|
93
104
|
response = super().call(model=model,
|
|
94
105
|
task_group=task_group,
|
|
95
106
|
task=MultiModalConversation.task,
|
|
@@ -145,9 +156,10 @@ class AioMultiModalConversation(BaseAioApi):
|
|
|
145
156
|
async def call(
|
|
146
157
|
cls,
|
|
147
158
|
model: str,
|
|
148
|
-
messages: List,
|
|
159
|
+
messages: List = None,
|
|
149
160
|
api_key: str = None,
|
|
150
161
|
workspace: str = None,
|
|
162
|
+
text: str = None,
|
|
151
163
|
**kwargs
|
|
152
164
|
) -> Union[MultiModalConversationResponse, Generator[
|
|
153
165
|
MultiModalConversationResponse, None, None]]:
|
|
@@ -176,6 +188,7 @@ class AioMultiModalConversation(BaseAioApi):
|
|
|
176
188
|
if None, will retrieve by rule [1].
|
|
177
189
|
[1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
|
|
178
190
|
workspace (str): The dashscope workspace id.
|
|
191
|
+
text (str): The text to generate.
|
|
179
192
|
**kwargs:
|
|
180
193
|
stream(bool, `optional`): Enable server-sent events
|
|
181
194
|
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
|
|
@@ -189,6 +202,8 @@ class AioMultiModalConversation(BaseAioApi):
|
|
|
189
202
|
tokens with top_p probability mass. So 0.1 means only
|
|
190
203
|
the tokens comprising the top 10% probability mass are
|
|
191
204
|
considered[qwen-turbo,bailian-v1].
|
|
205
|
+
voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
|
|
206
|
+
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
|
|
192
207
|
top_k(float, `optional`):
|
|
193
208
|
|
|
194
209
|
Raises:
|
|
@@ -199,18 +214,24 @@ class AioMultiModalConversation(BaseAioApi):
|
|
|
199
214
|
Generator[MultiModalConversationResponse, None, None]]: If
|
|
200
215
|
stream is True, return Generator, otherwise MultiModalConversationResponse.
|
|
201
216
|
"""
|
|
202
|
-
if (messages is None or not messages):
|
|
203
|
-
raise InputRequired('prompt or messages is required!')
|
|
204
217
|
if model is None or not model:
|
|
205
218
|
raise ModelRequired('Model is required!')
|
|
206
219
|
task_group, _ = _get_task_group_and_task(__name__)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
220
|
+
input = {}
|
|
221
|
+
msg_copy = None
|
|
222
|
+
|
|
223
|
+
if messages is not None and messages:
|
|
224
|
+
msg_copy = copy.deepcopy(messages)
|
|
225
|
+
has_upload = cls._preprocess_messages(model, msg_copy, api_key)
|
|
226
|
+
if has_upload:
|
|
227
|
+
headers = kwargs.pop('headers', {})
|
|
228
|
+
headers['X-DashScope-OssResourceResolve'] = 'enable'
|
|
229
|
+
kwargs['headers'] = headers
|
|
230
|
+
|
|
231
|
+
if text is not None and text:
|
|
232
|
+
input.update({'text': text})
|
|
233
|
+
if msg_copy is not None:
|
|
234
|
+
input.update({'messages': msg_copy})
|
|
214
235
|
response = await super().call(model=model,
|
|
215
236
|
task_group=task_group,
|
|
216
237
|
task=AioMultiModalConversation.task,
|
|
@@ -152,6 +152,26 @@ class Choice(DictMixin):
|
|
|
152
152
|
**kwargs)
|
|
153
153
|
|
|
154
154
|
|
|
155
|
+
@dataclass(init=False)
|
|
156
|
+
class Audio(DictMixin):
|
|
157
|
+
data: str
|
|
158
|
+
url: str
|
|
159
|
+
id: str
|
|
160
|
+
expires_at: int
|
|
161
|
+
|
|
162
|
+
def __init__(self,
|
|
163
|
+
data: str = None,
|
|
164
|
+
url: str = None,
|
|
165
|
+
id: str = None,
|
|
166
|
+
expires_at: int = None,
|
|
167
|
+
**kwargs):
|
|
168
|
+
super().__init__(data=data,
|
|
169
|
+
url=url,
|
|
170
|
+
id=id,
|
|
171
|
+
expires_at=expires_at,
|
|
172
|
+
**kwargs)
|
|
173
|
+
|
|
174
|
+
|
|
155
175
|
@dataclass(init=False)
|
|
156
176
|
class GenerationOutput(DictMixin):
|
|
157
177
|
text: str
|
|
@@ -217,20 +237,25 @@ class GenerationResponse(DashScopeAPIResponse):
|
|
|
217
237
|
@dataclass(init=False)
|
|
218
238
|
class MultiModalConversationOutput(DictMixin):
|
|
219
239
|
choices: List[Choice]
|
|
240
|
+
audio: Audio
|
|
220
241
|
|
|
221
242
|
def __init__(self,
|
|
222
243
|
text: str = None,
|
|
223
244
|
finish_reason: str = None,
|
|
224
245
|
choices: List[Choice] = None,
|
|
246
|
+
audio: Audio = None,
|
|
225
247
|
**kwargs):
|
|
226
248
|
chs = None
|
|
227
249
|
if choices is not None:
|
|
228
250
|
chs = []
|
|
229
251
|
for choice in choices:
|
|
230
252
|
chs.append(Choice(**choice))
|
|
253
|
+
if audio is not None:
|
|
254
|
+
audio = Audio(**audio)
|
|
231
255
|
super().__init__(text=text,
|
|
232
256
|
finish_reason=finish_reason,
|
|
233
257
|
choices=chs,
|
|
258
|
+
audio=audio,
|
|
234
259
|
**kwargs)
|
|
235
260
|
|
|
236
261
|
|
|
@@ -238,15 +263,18 @@ class MultiModalConversationOutput(DictMixin):
|
|
|
238
263
|
class MultiModalConversationUsage(DictMixin):
|
|
239
264
|
input_tokens: int
|
|
240
265
|
output_tokens: int
|
|
266
|
+
characters: int
|
|
241
267
|
|
|
242
268
|
# TODO add image usage info.
|
|
243
269
|
|
|
244
270
|
def __init__(self,
|
|
245
271
|
input_tokens: int = 0,
|
|
246
272
|
output_tokens: int = 0,
|
|
273
|
+
characters: int = 0,
|
|
247
274
|
**kwargs):
|
|
248
275
|
super().__init__(input_tokens=input_tokens,
|
|
249
276
|
output_tokens=output_tokens,
|
|
277
|
+
characters=characters,
|
|
250
278
|
**kwargs)
|
|
251
279
|
|
|
252
280
|
|
|
@@ -378,7 +406,7 @@ class RecognitionResponse(DashScopeAPIResponse):
|
|
|
378
406
|
"""
|
|
379
407
|
result = False
|
|
380
408
|
if sentence is not None and 'end_time' in sentence and sentence[
|
|
381
|
-
|
|
409
|
+
'end_time'] is not None:
|
|
382
410
|
result = True
|
|
383
411
|
return result
|
|
384
412
|
|
|
@@ -445,8 +473,8 @@ class ImageSynthesisOutput(DictMixin):
|
|
|
445
473
|
results: List[ImageSynthesisResult]
|
|
446
474
|
|
|
447
475
|
def __init__(self,
|
|
448
|
-
task_id: str =
|
|
449
|
-
task_status: str =
|
|
476
|
+
task_id: str = None,
|
|
477
|
+
task_status: str = None,
|
|
450
478
|
results: List[ImageSynthesisResult] = [],
|
|
451
479
|
**kwargs):
|
|
452
480
|
res = []
|
|
@@ -98,6 +98,10 @@ class Request:
|
|
|
98
98
|
volume=50,
|
|
99
99
|
speech_rate=1.0,
|
|
100
100
|
pitch_rate=1.0,
|
|
101
|
+
seed=0,
|
|
102
|
+
synthesis_type=0,
|
|
103
|
+
instruction=None,
|
|
104
|
+
language_hints: list = None,
|
|
101
105
|
):
|
|
102
106
|
self.task_id = self.genUid()
|
|
103
107
|
self.apikey = apikey
|
|
@@ -109,6 +113,10 @@ class Request:
|
|
|
109
113
|
self.volume = volume
|
|
110
114
|
self.speech_rate = speech_rate
|
|
111
115
|
self.pitch_rate = pitch_rate
|
|
116
|
+
self.seed = seed
|
|
117
|
+
self.synthesis_type = synthesis_type
|
|
118
|
+
self.instruction = instruction
|
|
119
|
+
self.language_hints = language_hints
|
|
112
120
|
|
|
113
121
|
def genUid(self):
|
|
114
122
|
# 生成随机UUID
|
|
@@ -156,6 +164,8 @@ class Request:
|
|
|
156
164
|
'rate': self.speech_rate,
|
|
157
165
|
'format': self.format,
|
|
158
166
|
'pitch': self.pitch_rate,
|
|
167
|
+
'seed': self.seed,
|
|
168
|
+
'type': self.synthesis_type
|
|
159
169
|
},
|
|
160
170
|
},
|
|
161
171
|
}
|
|
@@ -163,6 +173,10 @@ class Request:
|
|
|
163
173
|
cmd['payload']['parameters']['bit_rate'] = self.bit_rate
|
|
164
174
|
if additional_params:
|
|
165
175
|
cmd['payload']['parameters'].update(additional_params)
|
|
176
|
+
if self.instruction is not None:
|
|
177
|
+
cmd['payload']['parameters']['instruction'] = self.instruction
|
|
178
|
+
if self.language_hints is not None:
|
|
179
|
+
cmd['payload']['parameters']['language_hints'] = self.language_hints
|
|
166
180
|
return json.dumps(cmd)
|
|
167
181
|
|
|
168
182
|
def getContinueRequest(self, text):
|
|
@@ -207,6 +221,10 @@ class SpeechSynthesizer:
|
|
|
207
221
|
volume=50,
|
|
208
222
|
speech_rate=1.0,
|
|
209
223
|
pitch_rate=1.0,
|
|
224
|
+
seed=0,
|
|
225
|
+
synthesis_type=0,
|
|
226
|
+
instruction=None,
|
|
227
|
+
language_hints: list = None,
|
|
210
228
|
headers=None,
|
|
211
229
|
callback: ResultCallback = None,
|
|
212
230
|
workspace=None,
|
|
@@ -237,6 +255,14 @@ class SpeechSynthesizer:
|
|
|
237
255
|
Dashscope workspace ID.
|
|
238
256
|
url: str
|
|
239
257
|
Dashscope WebSocket URL.
|
|
258
|
+
seed: int
|
|
259
|
+
The seed of the synthesizer, with a range from 0 to 65535. Default is 0.
|
|
260
|
+
synthesis_type: int
|
|
261
|
+
The type of the synthesizer, Default is 0.
|
|
262
|
+
instruction: str
|
|
263
|
+
The instruction of the synthesizer, max length is 128.
|
|
264
|
+
language_hints: list
|
|
265
|
+
The language hints of the synthesizer. supported language: zh, en.
|
|
240
266
|
additional_params: Dict
|
|
241
267
|
Additional parameters for the Dashscope API.
|
|
242
268
|
"""
|
|
@@ -271,6 +297,10 @@ class SpeechSynthesizer:
|
|
|
271
297
|
volume=volume,
|
|
272
298
|
speech_rate=speech_rate,
|
|
273
299
|
pitch_rate=pitch_rate,
|
|
300
|
+
seed=seed,
|
|
301
|
+
synthesis_type=synthesis_type,
|
|
302
|
+
instruction=instruction,
|
|
303
|
+
language_hints=language_hints
|
|
274
304
|
)
|
|
275
305
|
self.last_request_id = self.request.task_id
|
|
276
306
|
self.start_event = threading.Event()
|
|
@@ -5,7 +5,7 @@ from typing import List
|
|
|
5
5
|
|
|
6
6
|
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
|
|
7
7
|
DictMixin)
|
|
8
|
-
from dashscope.client.base_api import BaseApi
|
|
8
|
+
from dashscope.client.base_api import BaseApi, BaseAioApi
|
|
9
9
|
from dashscope.common.error import InputRequired, ModelRequired
|
|
10
10
|
from dashscope.common.utils import _get_task_group_and_task
|
|
11
11
|
from dashscope.utils.oss_utils import preprocess_message_element
|
|
@@ -111,3 +111,72 @@ class MultiModalEmbedding(BaseApi):
|
|
|
111
111
|
if is_upload and not has_upload:
|
|
112
112
|
has_upload = True
|
|
113
113
|
return has_upload
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class AioMultiModalEmbedding(BaseAioApi):
|
|
117
|
+
task = 'multimodal-embedding'
|
|
118
|
+
|
|
119
|
+
class Models:
|
|
120
|
+
multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
async def call(cls,
|
|
124
|
+
model: str,
|
|
125
|
+
input: List[MultiModalEmbeddingItemBase],
|
|
126
|
+
api_key: str = None,
|
|
127
|
+
workspace: str = None,
|
|
128
|
+
**kwargs) -> DashScopeAPIResponse:
|
|
129
|
+
"""Get embedding multimodal contents..
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
model (str): The embedding model name.
|
|
133
|
+
input (List[MultiModalEmbeddingElement]): The embedding elements,
|
|
134
|
+
every element include data, modal, factor field.
|
|
135
|
+
workspace (str): The dashscope workspace id.
|
|
136
|
+
**kwargs:
|
|
137
|
+
auto_truncation(bool, `optional`): Automatically truncate
|
|
138
|
+
audio longer than 15 seconds or text longer than 70 words.
|
|
139
|
+
Default to false(Too long input will result in failure).
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
DashScopeAPIResponse: The embedding result.
|
|
143
|
+
"""
|
|
144
|
+
if input is None or not input:
|
|
145
|
+
raise InputRequired('prompt is required!')
|
|
146
|
+
if model is None or not model:
|
|
147
|
+
raise ModelRequired('Model is required!')
|
|
148
|
+
embedding_input = {}
|
|
149
|
+
has_upload = cls._preprocess_message_inputs(model, input, api_key)
|
|
150
|
+
if has_upload:
|
|
151
|
+
headers = kwargs.pop('headers', {})
|
|
152
|
+
headers['X-DashScope-OssResourceResolve'] = 'enable'
|
|
153
|
+
kwargs['headers'] = headers
|
|
154
|
+
embedding_input['contents'] = input
|
|
155
|
+
kwargs.pop('stream', False) # not support streaming output.
|
|
156
|
+
task_group, function = _get_task_group_and_task(__name__)
|
|
157
|
+
response = await super().call(
|
|
158
|
+
model=model,
|
|
159
|
+
input=embedding_input,
|
|
160
|
+
task_group=task_group,
|
|
161
|
+
task=MultiModalEmbedding.task,
|
|
162
|
+
function=function,
|
|
163
|
+
api_key=api_key,
|
|
164
|
+
workspace=workspace,
|
|
165
|
+
**kwargs)
|
|
166
|
+
return response
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def _preprocess_message_inputs(cls, model: str, input: List[dict],
|
|
170
|
+
api_key: str):
|
|
171
|
+
"""preprocess following inputs
|
|
172
|
+
input = [{'factor': 1, 'text': 'hello'},
|
|
173
|
+
{'factor': 2, 'audio': ''},
|
|
174
|
+
{'factor': 3, 'image': ''}]
|
|
175
|
+
"""
|
|
176
|
+
has_upload = False
|
|
177
|
+
for elem in input:
|
|
178
|
+
if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
|
|
179
|
+
is_upload = preprocess_message_element(model, elem, api_key)
|
|
180
|
+
if is_upload and not has_upload:
|
|
181
|
+
has_upload = True
|
|
182
|
+
return has_upload
|
dashscope/multimodal/__init__.py
CHANGED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from .tingwu import tingwu
|
|
4
|
+
from .tingwu.tingwu import TingWu
|
|
5
|
+
from .tingwu.tingwu_realtime import TingWuRealtime, TingWuRealtimeCallback
|
|
6
|
+
|
|
7
|
+
from .multimodal_dialog import MultiModalDialog, MultiModalCallback
|
|
8
|
+
from .dialog_state import DialogState
|
|
9
|
+
from .multimodal_constants import *
|
|
10
|
+
from .multimodal_request_params import *
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'tingwu',
|
|
14
|
+
'TingWu',
|
|
15
|
+
'TingWuRealtime',
|
|
16
|
+
'TingWuRealtimeCallback',
|
|
17
|
+
'MultiModalDialog',
|
|
18
|
+
'MultiModalCallback',
|
|
19
|
+
'DialogState'
|
|
20
|
+
]
|
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import platform
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from queue import Queue
|
|
10
|
+
import dashscope
|
|
11
|
+
from dashscope.client.base_api import BaseApi
|
|
12
|
+
from dashscope.common.error import (InvalidParameter, ModelRequired)
|
|
13
|
+
import websocket
|
|
14
|
+
|
|
15
|
+
from dashscope.common.logging import logger
|
|
16
|
+
from dashscope.protocol.websocket import ActionType
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TingWuRealtimeCallback:
|
|
20
|
+
"""An interface that defines callback methods for getting TingWu results.
|
|
21
|
+
Derive from this class and implement its function to provide your own data.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def on_open(self) -> None:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
def on_started(self, task_id: str) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def on_speech_listen(self, result: dict):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def on_recognize_result(self, result: dict):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def on_ai_result(self, result: dict):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def on_stopped(self) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
def on_error(self, error_code: str, error_msg: str) -> None:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def on_close(self, close_status_code, close_msg):
|
|
46
|
+
"""
|
|
47
|
+
callback when websocket connection is closed
|
|
48
|
+
|
|
49
|
+
:param close_status_code
|
|
50
|
+
:param close_msg
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TingWuRealtime(BaseApi):
|
|
56
|
+
"""TingWuRealtime interface.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model (str): The requested model_id.
|
|
60
|
+
callback (TingWuRealtimeCallback): A callback that returns
|
|
61
|
+
speech recognition results.
|
|
62
|
+
app_id (str): The dashscope tingwu app id.
|
|
63
|
+
format (str): The input audio format for TingWu request.
|
|
64
|
+
sample_rate (int): The input audio sample rate.
|
|
65
|
+
terminology (str): The correct instruction set id.
|
|
66
|
+
workspace (str): The dashscope workspace id.
|
|
67
|
+
|
|
68
|
+
**kwargs:
|
|
69
|
+
max_end_silence (int): The maximum end silence time.
|
|
70
|
+
other_params (dict, `optional`): Other parameters.
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
InputRequired: Input is required.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
SILENCE_TIMEOUT_S = 60
|
|
77
|
+
|
|
78
|
+
def __init__(self,
|
|
79
|
+
model: str,
|
|
80
|
+
callback: TingWuRealtimeCallback,
|
|
81
|
+
audio_format: str = "pcm",
|
|
82
|
+
sample_rate: int = 16000,
|
|
83
|
+
max_end_silence: int = None,
|
|
84
|
+
app_id: str = None,
|
|
85
|
+
terminology: str = None,
|
|
86
|
+
workspace: str = None,
|
|
87
|
+
api_key: str = None,
|
|
88
|
+
base_address: str = None,
|
|
89
|
+
data_id: str = None,
|
|
90
|
+
**kwargs):
|
|
91
|
+
if api_key is None:
|
|
92
|
+
self.api_key = dashscope.api_key
|
|
93
|
+
else:
|
|
94
|
+
self.api_key = api_key
|
|
95
|
+
if base_address is None:
|
|
96
|
+
self.base_address = dashscope.base_websocket_api_url
|
|
97
|
+
else:
|
|
98
|
+
self.base_address = base_address
|
|
99
|
+
|
|
100
|
+
if model is None:
|
|
101
|
+
raise ModelRequired('Model is required!')
|
|
102
|
+
|
|
103
|
+
self.data_id = data_id
|
|
104
|
+
self.max_end_silence = max_end_silence
|
|
105
|
+
self.model = model
|
|
106
|
+
self.audio_format = audio_format
|
|
107
|
+
self.app_id = app_id
|
|
108
|
+
self.terminology = terminology
|
|
109
|
+
self.sample_rate = sample_rate
|
|
110
|
+
# continuous recognition with start() or once recognition with call()
|
|
111
|
+
self._recognition_once = False
|
|
112
|
+
self._callback = callback
|
|
113
|
+
self._running = False
|
|
114
|
+
self._stream_data = Queue()
|
|
115
|
+
self._worker = None
|
|
116
|
+
self._silence_timer = None
|
|
117
|
+
self._kwargs = kwargs
|
|
118
|
+
self._workspace = workspace
|
|
119
|
+
self._start_stream_timestamp = -1
|
|
120
|
+
self._first_package_timestamp = -1
|
|
121
|
+
self._stop_stream_timestamp = -1
|
|
122
|
+
self._on_complete_timestamp = -1
|
|
123
|
+
self.request_id_confirmed = False
|
|
124
|
+
self.last_request_id = uuid.uuid4().hex
|
|
125
|
+
self.request = _Request()
|
|
126
|
+
self.response = _TingWuResponse(self._callback, self.close) # 传递 self.close 作为回调
|
|
127
|
+
|
|
128
|
+
def _on_message(self, ws, message):
|
|
129
|
+
logger.debug(f"<<<<<<< Received message: {message}")
|
|
130
|
+
if isinstance(message, str):
|
|
131
|
+
self.response.handle_text_response(message)
|
|
132
|
+
elif isinstance(message, (bytes, bytearray)):
|
|
133
|
+
self.response.handle_binary_response(message)
|
|
134
|
+
|
|
135
|
+
def _on_error(self, ws, error):
|
|
136
|
+
logger.error(f"Error: {error}")
|
|
137
|
+
if self._callback:
|
|
138
|
+
error_code = "" # 默认错误码
|
|
139
|
+
if "connection" in str(error).lower():
|
|
140
|
+
error_code = "1001" # 连接错误
|
|
141
|
+
elif "timeout" in str(error).lower():
|
|
142
|
+
error_code = "1002" # 超时错误
|
|
143
|
+
elif "authentication" in str(error).lower():
|
|
144
|
+
error_code = "1003" # 认证错误
|
|
145
|
+
self._callback.on_error(error_code=error_code, error_msg=str(error))
|
|
146
|
+
|
|
147
|
+
def _on_close(self, ws, close_status_code, close_msg):
|
|
148
|
+
try:
|
|
149
|
+
logger.debug(
|
|
150
|
+
"WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
|
|
151
|
+
if close_status_code is None:
|
|
152
|
+
close_status_code = 1000
|
|
153
|
+
if close_msg is None:
|
|
154
|
+
close_msg = "websocket is closed"
|
|
155
|
+
self._callback.on_close(close_status_code, close_msg)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.error(f"Error: {e}")
|
|
158
|
+
|
|
159
|
+
def _on_open(self, ws):
|
|
160
|
+
self._callback.on_open()
|
|
161
|
+
self._running = True
|
|
162
|
+
|
|
163
|
+
# def _on_pong(self):
|
|
164
|
+
# logger.debug("on pong")
|
|
165
|
+
|
|
166
|
+
def start(self, **kwargs):
|
|
167
|
+
"""
|
|
168
|
+
interface for starting TingWu connection
|
|
169
|
+
"""
|
|
170
|
+
assert self._callback is not None, 'Please set the callback to get the TingWu result.' # noqa E501
|
|
171
|
+
|
|
172
|
+
if self._running:
|
|
173
|
+
raise InvalidParameter('TingWu client has started.')
|
|
174
|
+
|
|
175
|
+
# self._start_stream_timestamp = -1
|
|
176
|
+
# self._first_package_timestamp = -1
|
|
177
|
+
# self._stop_stream_timestamp = -1
|
|
178
|
+
# self._on_complete_timestamp = -1
|
|
179
|
+
if self._kwargs is not None and len(self._kwargs) != 0:
|
|
180
|
+
self._kwargs.update(**kwargs)
|
|
181
|
+
|
|
182
|
+
self._connect(self.api_key)
|
|
183
|
+
logger.debug("connected with server.")
|
|
184
|
+
self._send_start_request()
|
|
185
|
+
|
|
186
|
+
def send_audio_data(self, speech_data: bytes):
|
|
187
|
+
"""send audio data to server"""
|
|
188
|
+
if self._running:
|
|
189
|
+
self.__send_binary_frame(speech_data)
|
|
190
|
+
|
|
191
|
+
def stop(self):
|
|
192
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
193
|
+
self._callback.on_close(1001, "websocket is not connected")
|
|
194
|
+
return
|
|
195
|
+
_send_speech_json = self.request.generate_stop_request("stop")
|
|
196
|
+
self._send_text_frame(_send_speech_json)
|
|
197
|
+
|
|
198
|
+
"""inner class"""
|
|
199
|
+
|
|
200
|
+
def _send_start_request(self):
|
|
201
|
+
"""send start request"""
|
|
202
|
+
_start_json = self.request.generate_start_request(
|
|
203
|
+
workspace_id=self._workspace,
|
|
204
|
+
direction_name="start",
|
|
205
|
+
app_id=self.app_id,
|
|
206
|
+
model=self.model,
|
|
207
|
+
audio_format=self.audio_format,
|
|
208
|
+
sample_rate=self.sample_rate,
|
|
209
|
+
terminology=self.terminology,
|
|
210
|
+
max_end_silence=self.max_end_silence,
|
|
211
|
+
data_id=self.data_id,
|
|
212
|
+
**self._kwargs
|
|
213
|
+
)
|
|
214
|
+
# send start request
|
|
215
|
+
self._send_text_frame(_start_json)
|
|
216
|
+
|
|
217
|
+
def _run_forever(self):
|
|
218
|
+
self.ws.run_forever(ping_interval=5, ping_timeout=4)
|
|
219
|
+
|
|
220
|
+
def _connect(self, api_key: str):
|
|
221
|
+
"""init websocket connection"""
|
|
222
|
+
self.ws = websocket.WebSocketApp(self.base_address, header=self.request.get_websocket_header(api_key),
|
|
223
|
+
on_open=self._on_open,
|
|
224
|
+
on_message=self._on_message,
|
|
225
|
+
on_error=self._on_error,
|
|
226
|
+
on_close=self._on_close)
|
|
227
|
+
self.thread = threading.Thread(target=self._run_forever)
|
|
228
|
+
# 统一心跳机制配置
|
|
229
|
+
self.ws.ping_interval = 5
|
|
230
|
+
self.ws.ping_timeout = 4
|
|
231
|
+
self.thread.daemon = True
|
|
232
|
+
self.thread.start()
|
|
233
|
+
|
|
234
|
+
self._wait_for_connection()
|
|
235
|
+
|
|
236
|
+
def close(self):
|
|
237
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
238
|
+
return
|
|
239
|
+
self.ws.close()
|
|
240
|
+
|
|
241
|
+
def _wait_for_connection(self):
|
|
242
|
+
"""wait for connection using event instead of busy waiting"""
|
|
243
|
+
timeout = 5
|
|
244
|
+
start_time = time.time()
|
|
245
|
+
while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
|
|
246
|
+
time.sleep(0.1) # 短暂休眠,避免密集轮询
|
|
247
|
+
|
|
248
|
+
def _send_text_frame(self, text: str):
|
|
249
|
+
# 避免在日志中记录敏感信息,如API密钥等
|
|
250
|
+
# 只记录非敏感信息
|
|
251
|
+
if '"Authorization"' not in text:
|
|
252
|
+
logger.info('>>>>>> send text frame : %s' % text)
|
|
253
|
+
else:
|
|
254
|
+
logger.info('>>>>>> send text frame with authorization header')
|
|
255
|
+
self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
|
|
256
|
+
|
|
257
|
+
def __send_binary_frame(self, binary: bytes):
|
|
258
|
+
# _log.info('send binary frame length: %d' % len(binary))
|
|
259
|
+
self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
|
|
260
|
+
|
|
261
|
+
def __enter__(self):
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
265
|
+
self.cleanup()
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
def cleanup(self):
|
|
269
|
+
"""cleanup resources"""
|
|
270
|
+
try:
|
|
271
|
+
if self.ws:
|
|
272
|
+
self.ws.close()
|
|
273
|
+
if self.thread and self.thread.is_alive():
|
|
274
|
+
# 设置标志位通知线程退出
|
|
275
|
+
self.thread.join(timeout=2)
|
|
276
|
+
# 清除引用
|
|
277
|
+
self.ws = None
|
|
278
|
+
self.thread = None
|
|
279
|
+
self._callback = None
|
|
280
|
+
self.response = None
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.error(f"Error in cleanup: {e}")
|
|
283
|
+
|
|
284
|
+
def send_audio_frame(self, buffer: bytes):
|
|
285
|
+
"""Push audio to server
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
InvalidParameter: Cannot send data to an uninitiated recognition.
|
|
289
|
+
"""
|
|
290
|
+
if self._running is False:
|
|
291
|
+
raise InvalidParameter('TingWu client has stopped.')
|
|
292
|
+
|
|
293
|
+
if self._start_stream_timestamp < 0:
|
|
294
|
+
self._start_stream_timestamp = time.time() * 1000
|
|
295
|
+
logger.debug('send_audio_frame: {}'.format(len(buffer)))
|
|
296
|
+
self.__send_binary_frame(buffer)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class _Request:
|
|
300
|
+
def __init__(self):
|
|
301
|
+
# websocket header
|
|
302
|
+
self.ws_headers = None
|
|
303
|
+
# request body for voice chat
|
|
304
|
+
self.header = None
|
|
305
|
+
self.payload = None
|
|
306
|
+
# params
|
|
307
|
+
self.task_id = None
|
|
308
|
+
self.app_id = None
|
|
309
|
+
self.workspace_id = None
|
|
310
|
+
|
|
311
|
+
def get_websocket_header(self, api_key):
|
|
312
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
313
|
+
'1.18.0', # dashscope version
|
|
314
|
+
platform.python_version(),
|
|
315
|
+
platform.platform(),
|
|
316
|
+
platform.processor(),
|
|
317
|
+
)
|
|
318
|
+
self.ws_headers = {
|
|
319
|
+
"User-Agent": ua,
|
|
320
|
+
"Authorization": f"bearer {api_key}",
|
|
321
|
+
"Accept": "application/json"
|
|
322
|
+
}
|
|
323
|
+
logger.info('websocket header: {}'.format(self.ws_headers))
|
|
324
|
+
return self.ws_headers
|
|
325
|
+
|
|
326
|
+
def generate_start_request(self, direction_name: str,
|
|
327
|
+
app_id: str,
|
|
328
|
+
model: str = None,
|
|
329
|
+
workspace_id: str = None,
|
|
330
|
+
audio_format: str = None,
|
|
331
|
+
sample_rate: int = None,
|
|
332
|
+
terminology: str = None,
|
|
333
|
+
max_end_silence: int = None,
|
|
334
|
+
data_id: str = None,
|
|
335
|
+
**kwargs
|
|
336
|
+
) -> str:
|
|
337
|
+
"""
|
|
338
|
+
build start request.
|
|
339
|
+
:param app_id: web console app id
|
|
340
|
+
:param direction_name:
|
|
341
|
+
:param workspace_id: web console workspace id
|
|
342
|
+
:param model: model name
|
|
343
|
+
:param audio_format: audio format
|
|
344
|
+
:param sample_rate: sample rate
|
|
345
|
+
:param terminology:
|
|
346
|
+
:param max_end_silence:
|
|
347
|
+
:param data_id:
|
|
348
|
+
:return:
|
|
349
|
+
Args:
|
|
350
|
+
:
|
|
351
|
+
"""
|
|
352
|
+
self._get_dash_request_header(ActionType.START)
|
|
353
|
+
parameters = self._get_start_parameters(audio_format=audio_format, sample_rate=sample_rate,
|
|
354
|
+
max_end_silence=max_end_silence,
|
|
355
|
+
terminology=terminology,
|
|
356
|
+
**kwargs)
|
|
357
|
+
self._get_dash_request_payload(direction_name=direction_name, app_id=app_id, workspace_id=workspace_id,
|
|
358
|
+
model=model,
|
|
359
|
+
data_id=data_id,
|
|
360
|
+
request_params=parameters)
|
|
361
|
+
|
|
362
|
+
cmd = {
|
|
363
|
+
"header": self.header,
|
|
364
|
+
"payload": self.payload
|
|
365
|
+
}
|
|
366
|
+
return json.dumps(cmd)
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def _get_start_parameters(audio_format: str = None,
|
|
370
|
+
sample_rate: int = None,
|
|
371
|
+
terminology: str = None,
|
|
372
|
+
max_end_silence: int = None,
|
|
373
|
+
**kwargs):
|
|
374
|
+
"""
|
|
375
|
+
build start request parameters inner.
|
|
376
|
+
:param kwargs: parameters
|
|
377
|
+
:return
|
|
378
|
+
"""
|
|
379
|
+
parameters = {}
|
|
380
|
+
if audio_format is not None:
|
|
381
|
+
parameters['format'] = audio_format
|
|
382
|
+
if sample_rate is not None:
|
|
383
|
+
parameters['sampleRate'] = sample_rate
|
|
384
|
+
if terminology is not None:
|
|
385
|
+
parameters['terminology'] = terminology
|
|
386
|
+
if max_end_silence is not None:
|
|
387
|
+
parameters['maxEndSilence'] = max_end_silence
|
|
388
|
+
if kwargs is not None and len(kwargs) != 0:
|
|
389
|
+
parameters.update(kwargs)
|
|
390
|
+
return parameters
|
|
391
|
+
|
|
392
|
+
def generate_stop_request(self, direction_name: str) -> str:
|
|
393
|
+
"""
|
|
394
|
+
build stop request.
|
|
395
|
+
:param direction_name
|
|
396
|
+
:return
|
|
397
|
+
"""
|
|
398
|
+
self._get_dash_request_header(ActionType.FINISHED)
|
|
399
|
+
self._get_dash_request_payload(direction_name, self.app_id)
|
|
400
|
+
|
|
401
|
+
cmd = {
|
|
402
|
+
"header": self.header,
|
|
403
|
+
"payload": self.payload
|
|
404
|
+
}
|
|
405
|
+
return json.dumps(cmd)
|
|
406
|
+
|
|
407
|
+
def _get_dash_request_header(self, action: str):
|
|
408
|
+
"""
|
|
409
|
+
:param action: ActionType :run-task, continue-task, finish-task
|
|
410
|
+
"""
|
|
411
|
+
if self.task_id is None:
|
|
412
|
+
self.task_id = get_random_uuid()
|
|
413
|
+
self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
|
|
414
|
+
|
|
415
|
+
def _get_dash_request_payload(self, direction_name: str,
|
|
416
|
+
app_id: str,
|
|
417
|
+
workspace_id: str = None,
|
|
418
|
+
custom_input=None,
|
|
419
|
+
model: str = None,
|
|
420
|
+
data_id: str = None,
|
|
421
|
+
request_params=None,
|
|
422
|
+
):
|
|
423
|
+
"""
|
|
424
|
+
build start request payload inner.
|
|
425
|
+
:param direction_name: inner direction name
|
|
426
|
+
:param app_id: web console app id
|
|
427
|
+
:param request_params: start direction body parameters
|
|
428
|
+
:param custom_input: user custom input
|
|
429
|
+
:param data_id: data id
|
|
430
|
+
:param model: model name
|
|
431
|
+
"""
|
|
432
|
+
if custom_input is not None:
|
|
433
|
+
input = custom_input
|
|
434
|
+
else:
|
|
435
|
+
input = RequestBodyInput(
|
|
436
|
+
workspace_id=workspace_id,
|
|
437
|
+
app_id=app_id,
|
|
438
|
+
directive=direction_name,
|
|
439
|
+
data_id=data_id
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
self.payload = DashPayload(
|
|
443
|
+
model=model,
|
|
444
|
+
input=input.to_dict(),
|
|
445
|
+
parameters=request_params
|
|
446
|
+
).to_dict()
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class _TingWuResponse:
|
|
450
|
+
def __init__(self, callback: TingWuRealtimeCallback, close_callback=None):
|
|
451
|
+
super().__init__()
|
|
452
|
+
self.task_id = None # 对话ID.
|
|
453
|
+
self._callback = callback
|
|
454
|
+
self._close_callback = close_callback # 保存关闭回调函数
|
|
455
|
+
|
|
456
|
+
def handle_text_response(self, response_json: str):
|
|
457
|
+
"""
|
|
458
|
+
handle text response.
|
|
459
|
+
:param response_json: json format response from server
|
|
460
|
+
"""
|
|
461
|
+
logger.info("<<<<<< server response: %s" % response_json)
|
|
462
|
+
try:
|
|
463
|
+
# try to parse response as json
|
|
464
|
+
json_data = json.loads(response_json)
|
|
465
|
+
header = json_data.get('header', {})
|
|
466
|
+
if header.get('event') == 'task-failed':
|
|
467
|
+
logger.error('Server returned invalid message: %s' % response_json)
|
|
468
|
+
if self._callback:
|
|
469
|
+
self._callback.on_error(error_code=header.get('error_code'),
|
|
470
|
+
error_msg=header.get('error_message'))
|
|
471
|
+
return
|
|
472
|
+
if header.get('event') == "task-started":
|
|
473
|
+
self._handle_started(header.get('task_id'))
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
payload = json_data.get('payload', {})
|
|
477
|
+
output = payload.get('output', {})
|
|
478
|
+
if output is not None:
|
|
479
|
+
action = output.get('action')
|
|
480
|
+
logger.info("Server response action: %s" % action)
|
|
481
|
+
self._handle_tingwu_agent_text_response(action=action, response_json=json_data)
|
|
482
|
+
|
|
483
|
+
except json.JSONDecodeError:
|
|
484
|
+
logger.error("Failed to parse message as JSON.")
|
|
485
|
+
|
|
486
|
+
def handle_binary_response(self, response_binary: bytes):
|
|
487
|
+
"""
|
|
488
|
+
handle binary response.
|
|
489
|
+
:param response_binary: server response binary。
|
|
490
|
+
"""
|
|
491
|
+
logger.info("<<<<<< server response binary length: %d" % len(response_binary))
|
|
492
|
+
|
|
493
|
+
def _handle_tingwu_agent_text_response(self, action: str, response_json: dict):
|
|
494
|
+
payload = response_json.get('payload', {})
|
|
495
|
+
output = payload.get('output', {})
|
|
496
|
+
if action == "task-failed":
|
|
497
|
+
self._callback.on_error(error_code=output.get('errorCode'),
|
|
498
|
+
error_msg=output.get('errorMessage'))
|
|
499
|
+
elif action == "speech-listen":
|
|
500
|
+
self._callback.on_speech_listen(response_json)
|
|
501
|
+
elif action == "recognize-result":
|
|
502
|
+
self._callback.on_recognize_result(response_json)
|
|
503
|
+
elif action == "ai-result":
|
|
504
|
+
self._callback.on_ai_result(response_json)
|
|
505
|
+
elif action == "speech-end": # ai-result事件永远会先于speech-end事件
|
|
506
|
+
self._callback.on_stopped()
|
|
507
|
+
if self._close_callback is not None:
|
|
508
|
+
self._close_callback()
|
|
509
|
+
else:
|
|
510
|
+
logger.info("Unknown response name:" + action)
|
|
511
|
+
|
|
512
|
+
def _handle_started(self, task_id: str):
|
|
513
|
+
self.task_id = task_id
|
|
514
|
+
self._callback.on_started(self.task_id)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def get_random_uuid() -> str:
|
|
518
|
+
"""generate random uuid."""
|
|
519
|
+
return uuid.uuid4().hex
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@dataclass
|
|
523
|
+
class RequestBodyInput():
|
|
524
|
+
app_id: str
|
|
525
|
+
directive: str
|
|
526
|
+
data_id: str = field(default=None)
|
|
527
|
+
workspace_id: str = field(default=None)
|
|
528
|
+
|
|
529
|
+
def to_dict(self):
|
|
530
|
+
body_input = {
|
|
531
|
+
"appId": self.app_id,
|
|
532
|
+
"directive": self.directive,
|
|
533
|
+
}
|
|
534
|
+
if self.workspace_id is not None:
|
|
535
|
+
body_input["workspace_id"] = self.workspace_id
|
|
536
|
+
if self.data_id is not None:
|
|
537
|
+
body_input["dataId"] = self.data_id
|
|
538
|
+
return body_input
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
@dataclass
|
|
542
|
+
class DashHeader:
|
|
543
|
+
action: str
|
|
544
|
+
task_id: str = field(default=get_random_uuid())
|
|
545
|
+
streaming: str = field(default="duplex") # 默认为 duplex
|
|
546
|
+
|
|
547
|
+
def to_dict(self):
|
|
548
|
+
return {
|
|
549
|
+
"action": self.action,
|
|
550
|
+
"task_id": self.task_id,
|
|
551
|
+
"request_id": self.task_id,
|
|
552
|
+
"streaming": self.streaming
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
@dataclass
|
|
557
|
+
class DashPayload:
|
|
558
|
+
task_group: str = field(default="aigc")
|
|
559
|
+
function: str = field(default="generation")
|
|
560
|
+
model: str = field(default="")
|
|
561
|
+
task: str = field(default="multimodal-generation")
|
|
562
|
+
parameters: dict = field(default=None)
|
|
563
|
+
input: dict = field(default=None)
|
|
564
|
+
|
|
565
|
+
def to_dict(self):
|
|
566
|
+
payload = {
|
|
567
|
+
"task_group": self.task_group,
|
|
568
|
+
"function": self.function,
|
|
569
|
+
"model": self.model,
|
|
570
|
+
"task": self.task,
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
if self.parameters is not None:
|
|
574
|
+
payload["parameters"] = self.parameters
|
|
575
|
+
|
|
576
|
+
if self.input is not None:
|
|
577
|
+
payload["input"] = self.input
|
|
578
|
+
|
|
579
|
+
return payload
|
dashscope/version.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
dashscope/__init__.py,sha256=
|
|
1
|
+
dashscope/__init__.py,sha256=96J137Im9Ii9uxfVOOYkZDJNZXF1sEbcH4-QXFr4xEw,3172
|
|
2
2
|
dashscope/cli.py,sha256=64oGkevgX0RHPPmMg0sevXDgaFLQNA_0vdtjQ7Z2pHM,26492
|
|
3
3
|
dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
|
|
4
4
|
dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
|
|
5
5
|
dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
|
|
6
|
-
dashscope/version.py,sha256=
|
|
7
|
-
dashscope/aigc/__init__.py,sha256=
|
|
6
|
+
dashscope/version.py,sha256=2fvqw7bZLyWOIDvUb8DEkdi6y_VgyljhOeYdITEksWM,74
|
|
7
|
+
dashscope/aigc/__init__.py,sha256=kYvYEoRK-NUHyMWpBDNQBz4fVA__uOhHRK2kDTBaWgk,617
|
|
8
8
|
dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
|
|
9
9
|
dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
|
|
10
10
|
dashscope/aigc/conversation.py,sha256=95xEEY4ThZJysj5zy3aMw7ql9KLJVfD_1iHv9QZ17Ew,14282
|
|
11
11
|
dashscope/aigc/generation.py,sha256=xMcMu16rICTdjZiD_sPqYV_Ltdp4ewGzzfC7JD9VApY,17948
|
|
12
12
|
dashscope/aigc/image_synthesis.py,sha256=Itx9h5brEwC-d3Mj_ntDHGd4qaitqDg9DeGHMJouhMk,28178
|
|
13
|
-
dashscope/aigc/multimodal_conversation.py,sha256=
|
|
13
|
+
dashscope/aigc/multimodal_conversation.py,sha256=BXpUthyGxJHQs18-m_ZzAw6MI5nSM4_NVMUfTDSC1-k,11682
|
|
14
14
|
dashscope/aigc/video_synthesis.py,sha256=RSPjar5-YiF9xclRmf9H7-5QbRxLcsNXO4zS7oTKi2I,24137
|
|
15
15
|
dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
dashscope/api_entities/aiohttp_request.py,sha256=1L7XdIJ9L65cQmX8x9JCR4t5hNIMDrbiWADfKKp9yfo,10280
|
|
@@ -18,7 +18,7 @@ dashscope/api_entities/api_request_data.py,sha256=04rpYPNK1HkT3iTPJmZpquH621xcBb
|
|
|
18
18
|
dashscope/api_entities/api_request_factory.py,sha256=ynpbFmxSne4dJkv5m40Vlwt4hJSxQPprAuUgMSQIQDg,5639
|
|
19
19
|
dashscope/api_entities/base_request.py,sha256=W2SzrSAGFS6V8DErfSrayQtSL0T4iO7BrC8flr7nt1w,977
|
|
20
20
|
dashscope/api_entities/chat_completion_types.py,sha256=1WMWPszhM3HaJBVz-ZXx-El4D8-RfVUL3ym65xsDRLk,11435
|
|
21
|
-
dashscope/api_entities/dashscope_response.py,sha256=
|
|
21
|
+
dashscope/api_entities/dashscope_response.py,sha256=31guU41ePkLyFsVVN-1WODXdOHiURzRyxxhrUmX9dGM,22835
|
|
22
22
|
dashscope/api_entities/encryption.py,sha256=rUCZx3wwVvS5oyKXEeWgyWPxM8Y5d4AaVdgxLhizBqA,5517
|
|
23
23
|
dashscope/api_entities/http_request.py,sha256=MTxYsbkK8oYWDp8ZPjrkdY9YbnQ9SEIy87riyJidMXo,16484
|
|
24
24
|
dashscope/api_entities/websocket_request.py,sha256=PS0FU854-HjTbKa68f4GHa7-noFRMzKySJGfPkrrBjw,16146
|
|
@@ -46,7 +46,7 @@ dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO
|
|
|
46
46
|
dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
|
|
47
47
|
dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
|
|
48
48
|
dashscope/audio/tts_v2/enrollment.py,sha256=-nrlywYSOP73Bm9ETTSxNnlp-B8ezJcUmd59mVvyvgk,6361
|
|
49
|
-
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=
|
|
49
|
+
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=p764P4TYwLkvvPCpA4VnFwlNbIJbuNbp2d9mxgni7Ws,22047
|
|
50
50
|
dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
dashscope/client/base_api.py,sha256=znAJ65DeHiFw1H7FWK0YrkLz1CoNcyqUxF8EJ3gujeY,52523
|
|
52
52
|
dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -65,17 +65,18 @@ dashscope/customize/finetunes.py,sha256=AL_kGTJXMvM2ej-EKsLLd1dUphPQdVTefFVCSVH-
|
|
|
65
65
|
dashscope/embeddings/__init__.py,sha256=XQ7vKr8oZM2CmdOduE53BWy6_Qpn9xUPkma64yw8Gws,291
|
|
66
66
|
dashscope/embeddings/batch_text_embedding.py,sha256=lVhvTS8McYfXuqt_8CmmhA6bPqD0nrGv965kjYG_j0E,8842
|
|
67
67
|
dashscope/embeddings/batch_text_embedding_response.py,sha256=ZfkJMUq8GRsFA6XUTsiAsIySqGJH-VPi2P9Ba1KTU-s,2056
|
|
68
|
-
dashscope/embeddings/multimodal_embedding.py,sha256=
|
|
68
|
+
dashscope/embeddings/multimodal_embedding.py,sha256=PEF7DmtE5cbrXw4k3WQcfmsBKaAY3CTIoei3SyhOl34,6774
|
|
69
69
|
dashscope/embeddings/text_embedding.py,sha256=2MPEyMB99xueDbvFg9kKAe8bgHMDEaFLaFa6GzDWDHg,2108
|
|
70
70
|
dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
71
|
dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
|
|
72
|
-
dashscope/multimodal/__init__.py,sha256=
|
|
72
|
+
dashscope/multimodal/__init__.py,sha256=fyqeolbDLWVn5wSpPZ3nAOnUBRF9k6mlsy6dCmgjPvI,533
|
|
73
73
|
dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
|
|
74
74
|
dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
|
|
75
75
|
dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
|
|
76
76
|
dashscope/multimodal/multimodal_request_params.py,sha256=Lbxf_kLnFUkhty8AU9wL7ws9tYbmhHPVmsiXLdynlJg,8402
|
|
77
|
-
dashscope/multimodal/tingwu/__init__.py,sha256=
|
|
77
|
+
dashscope/multimodal/tingwu/__init__.py,sha256=Gi9GEM0bdeJlZpvyksSeHOc2--_tG5aF6QAx6TAS2fE,225
|
|
78
78
|
dashscope/multimodal/tingwu/tingwu.py,sha256=01d-QOeuB1QmRhiZqbXJ8pHoGqT0C-xZTjIs_ZBXOyw,2613
|
|
79
|
+
dashscope/multimodal/tingwu/tingwu_realtime.py,sha256=oBeqrZit3uBZHuyI7m9VILz2qaqJRMO0-Nm2eJ5Q63g,20215
|
|
79
80
|
dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
81
|
dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
|
|
81
82
|
dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -99,9 +100,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
|
|
|
99
100
|
dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
|
|
100
101
|
dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
102
|
dashscope/utils/oss_utils.py,sha256=aZIHlMN2JOfVw6kp0SVrMw_N1MfoTcR_-wiRbJ7DgHw,7501
|
|
102
|
-
dashscope-1.24.
|
|
103
|
-
dashscope-1.24.
|
|
104
|
-
dashscope-1.24.
|
|
105
|
-
dashscope-1.24.
|
|
106
|
-
dashscope-1.24.
|
|
107
|
-
dashscope-1.24.
|
|
103
|
+
dashscope-1.24.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
104
|
+
dashscope-1.24.5.dist-info/METADATA,sha256=eRxoK1TphMD4hn-vUM3aPMtaCsvCzstnDe-QPH9A4Q0,7146
|
|
105
|
+
dashscope-1.24.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
106
|
+
dashscope-1.24.5.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
|
|
107
|
+
dashscope-1.24.5.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
|
|
108
|
+
dashscope-1.24.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|