dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashscope/__init__.py +61 -14
- dashscope/aigc/__init__.py +10 -3
- dashscope/aigc/chat_completion.py +282 -0
- dashscope/aigc/code_generation.py +145 -0
- dashscope/aigc/conversation.py +71 -12
- dashscope/aigc/generation.py +288 -16
- dashscope/aigc/image_synthesis.py +473 -31
- dashscope/aigc/multimodal_conversation.py +299 -14
- dashscope/aigc/video_synthesis.py +610 -0
- dashscope/api_entities/aiohttp_request.py +8 -5
- dashscope/api_entities/api_request_data.py +4 -2
- dashscope/api_entities/api_request_factory.py +68 -20
- dashscope/api_entities/base_request.py +20 -3
- dashscope/api_entities/chat_completion_types.py +344 -0
- dashscope/api_entities/dashscope_response.py +243 -15
- dashscope/api_entities/encryption.py +179 -0
- dashscope/api_entities/http_request.py +216 -62
- dashscope/api_entities/websocket_request.py +43 -34
- dashscope/app/__init__.py +5 -0
- dashscope/app/application.py +203 -0
- dashscope/app/application_response.py +246 -0
- dashscope/assistants/__init__.py +16 -0
- dashscope/assistants/assistant_types.py +175 -0
- dashscope/assistants/assistants.py +311 -0
- dashscope/assistants/files.py +197 -0
- dashscope/audio/__init__.py +4 -2
- dashscope/audio/asr/__init__.py +17 -1
- dashscope/audio/asr/asr_phrase_manager.py +203 -0
- dashscope/audio/asr/recognition.py +167 -27
- dashscope/audio/asr/transcription.py +107 -14
- dashscope/audio/asr/translation_recognizer.py +1006 -0
- dashscope/audio/asr/vocabulary.py +177 -0
- dashscope/audio/qwen_asr/__init__.py +7 -0
- dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
- dashscope/audio/qwen_omni/__init__.py +11 -0
- dashscope/audio/qwen_omni/omni_realtime.py +524 -0
- dashscope/audio/qwen_tts/__init__.py +5 -0
- dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
- dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
- dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
- dashscope/audio/tts/__init__.py +2 -0
- dashscope/audio/tts/speech_synthesizer.py +5 -0
- dashscope/audio/tts_v2/__init__.py +12 -0
- dashscope/audio/tts_v2/enrollment.py +179 -0
- dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
- dashscope/cli.py +157 -37
- dashscope/client/base_api.py +652 -87
- dashscope/common/api_key.py +2 -0
- dashscope/common/base_type.py +135 -0
- dashscope/common/constants.py +13 -16
- dashscope/common/env.py +2 -0
- dashscope/common/error.py +58 -22
- dashscope/common/logging.py +2 -0
- dashscope/common/message_manager.py +2 -0
- dashscope/common/utils.py +276 -46
- dashscope/customize/__init__.py +0 -0
- dashscope/customize/customize_types.py +192 -0
- dashscope/customize/deployments.py +146 -0
- dashscope/customize/finetunes.py +234 -0
- dashscope/embeddings/__init__.py +5 -1
- dashscope/embeddings/batch_text_embedding.py +208 -0
- dashscope/embeddings/batch_text_embedding_response.py +65 -0
- dashscope/embeddings/multimodal_embedding.py +118 -10
- dashscope/embeddings/text_embedding.py +13 -1
- dashscope/{file.py → files.py} +19 -4
- dashscope/io/input_output.py +2 -0
- dashscope/model.py +11 -2
- dashscope/models.py +43 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +28 -0
- dashscope/multimodal/multimodal_dialog.py +648 -0
- dashscope/multimodal/multimodal_request_params.py +313 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu.py +80 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/nlp/__init__.py +0 -0
- dashscope/nlp/understanding.py +64 -0
- dashscope/protocol/websocket.py +3 -0
- dashscope/rerank/__init__.py +0 -0
- dashscope/rerank/text_rerank.py +69 -0
- dashscope/resources/qwen.tiktoken +151643 -0
- dashscope/threads/__init__.py +26 -0
- dashscope/threads/messages/__init__.py +0 -0
- dashscope/threads/messages/files.py +113 -0
- dashscope/threads/messages/messages.py +220 -0
- dashscope/threads/runs/__init__.py +0 -0
- dashscope/threads/runs/runs.py +501 -0
- dashscope/threads/runs/steps.py +112 -0
- dashscope/threads/thread_types.py +665 -0
- dashscope/threads/threads.py +212 -0
- dashscope/tokenizers/__init__.py +7 -0
- dashscope/tokenizers/qwen_tokenizer.py +111 -0
- dashscope/tokenizers/tokenization.py +125 -0
- dashscope/tokenizers/tokenizer.py +45 -0
- dashscope/tokenizers/tokenizer_base.py +32 -0
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/message_utils.py +838 -0
- dashscope/utils/oss_utils.py +243 -0
- dashscope/utils/param_utils.py +29 -0
- dashscope/version.py +3 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
- dashscope-1.25.6.dist-info/RECORD +112 -0
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
- dashscope/deployment.py +0 -129
- dashscope/finetune.py +0 -149
- dashscope-1.8.0.dist-info/RECORD +0 -49
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import platform
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from enum import Enum, unique
|
|
9
|
+
|
|
10
|
+
import dashscope
|
|
11
|
+
import websocket
|
|
12
|
+
from dashscope.common.error import InputRequired, ModelRequired
|
|
13
|
+
from dashscope.common.logging import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class QwenTtsRealtimeCallback:
|
|
17
|
+
"""
|
|
18
|
+
An interface that defines callback methods for getting omni-realtime results. # noqa E501
|
|
19
|
+
Derive from this class and implement its function to provide your own data.
|
|
20
|
+
"""
|
|
21
|
+
def on_open(self) -> None:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
def on_close(self, close_status_code, close_msg) -> None:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
def on_event(self, message: str) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@unique
|
|
32
|
+
class AudioFormat(Enum):
|
|
33
|
+
# format, sample_rate, channels, bit_rate, name
|
|
34
|
+
PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
|
|
35
|
+
|
|
36
|
+
def __init__(self, format, sample_rate, channels, bit_rate, format_str):
|
|
37
|
+
self.format = format
|
|
38
|
+
self.sample_rate = sample_rate
|
|
39
|
+
self.channels = channels
|
|
40
|
+
self.bit_rate = bit_rate
|
|
41
|
+
self.format_str = format_str
|
|
42
|
+
|
|
43
|
+
def __repr__(self):
|
|
44
|
+
return self.format_str
|
|
45
|
+
|
|
46
|
+
def __str__(self):
|
|
47
|
+
return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate: {self.format_str}'
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class QwenTtsRealtime:
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
model,
|
|
54
|
+
headers=None,
|
|
55
|
+
callback: QwenTtsRealtimeCallback = None,
|
|
56
|
+
workspace=None,
|
|
57
|
+
url=None,
|
|
58
|
+
additional_params=None,
|
|
59
|
+
):
|
|
60
|
+
"""
|
|
61
|
+
Qwen Tts Realtime SDK
|
|
62
|
+
Parameters:
|
|
63
|
+
-----------
|
|
64
|
+
model: str
|
|
65
|
+
Model name.
|
|
66
|
+
headers: Dict
|
|
67
|
+
User-defined headers.
|
|
68
|
+
callback: OmniRealtimeCallback
|
|
69
|
+
Callback to receive real-time omni results.
|
|
70
|
+
workspace: str
|
|
71
|
+
Dashscope workspace ID.
|
|
72
|
+
url: str
|
|
73
|
+
Dashscope WebSocket URL.
|
|
74
|
+
additional_params: Dict
|
|
75
|
+
Additional parameters for the Dashscope API.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
if model is None:
|
|
79
|
+
raise ModelRequired('Model is required!')
|
|
80
|
+
if url is None:
|
|
81
|
+
url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
|
|
82
|
+
else:
|
|
83
|
+
url = f'{url}?model={model}'
|
|
84
|
+
self.url = url
|
|
85
|
+
self.apikey = dashscope.api_key
|
|
86
|
+
self.user_headers = headers
|
|
87
|
+
self.user_workspace = workspace
|
|
88
|
+
self.model = model
|
|
89
|
+
self.config = {}
|
|
90
|
+
self.callback = callback
|
|
91
|
+
self.ws = None
|
|
92
|
+
self.session_id = None
|
|
93
|
+
self.last_message = None
|
|
94
|
+
self.last_response_id = None
|
|
95
|
+
self.last_first_text_time = None
|
|
96
|
+
self.last_first_audio_delay = None
|
|
97
|
+
self.metrics = []
|
|
98
|
+
|
|
99
|
+
def _generate_event_id(self):
|
|
100
|
+
'''
|
|
101
|
+
generate random event id: event_xxxx
|
|
102
|
+
'''
|
|
103
|
+
return 'event_' + uuid.uuid4().hex
|
|
104
|
+
|
|
105
|
+
def _get_websocket_header(self, ):
|
|
106
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
107
|
+
'1.18.0', # dashscope version
|
|
108
|
+
platform.python_version(),
|
|
109
|
+
platform.platform(),
|
|
110
|
+
platform.processor(),
|
|
111
|
+
)
|
|
112
|
+
headers = {
|
|
113
|
+
'user-agent': ua,
|
|
114
|
+
'Authorization': 'bearer ' + self.apikey,
|
|
115
|
+
}
|
|
116
|
+
if self.user_headers:
|
|
117
|
+
headers = {**self.user_headers, **headers}
|
|
118
|
+
if self.user_workspace:
|
|
119
|
+
headers = {
|
|
120
|
+
**headers,
|
|
121
|
+
'X-DashScope-WorkSpace': self.user_workspace,
|
|
122
|
+
}
|
|
123
|
+
return headers
|
|
124
|
+
|
|
125
|
+
def connect(self) -> None:
|
|
126
|
+
'''
|
|
127
|
+
connect to server, create session and return default session configuration
|
|
128
|
+
'''
|
|
129
|
+
self.ws = websocket.WebSocketApp(
|
|
130
|
+
self.url,
|
|
131
|
+
header=self._get_websocket_header(),
|
|
132
|
+
on_message=self.on_message,
|
|
133
|
+
on_error=self.on_error,
|
|
134
|
+
on_close=self.on_close,
|
|
135
|
+
)
|
|
136
|
+
self.thread = threading.Thread(target=self.ws.run_forever)
|
|
137
|
+
self.thread.daemon = True
|
|
138
|
+
self.thread.start()
|
|
139
|
+
timeout = 5 # 最长等待时间(秒)
|
|
140
|
+
start_time = time.time()
|
|
141
|
+
while (not (self.ws.sock and self.ws.sock.connected)
|
|
142
|
+
and (time.time() - start_time) < timeout):
|
|
143
|
+
time.sleep(0.1) # 短暂休眠,避免密集轮询
|
|
144
|
+
if not (self.ws.sock and self.ws.sock.connected):
|
|
145
|
+
raise TimeoutError(
|
|
146
|
+
'websocket connection could not established within 5s. '
|
|
147
|
+
'Please check your network connection, firewall settings, or server status.'
|
|
148
|
+
)
|
|
149
|
+
self.callback.on_open()
|
|
150
|
+
|
|
151
|
+
def __send_str(self, data: str, enable_log: bool = True):
|
|
152
|
+
if enable_log:
|
|
153
|
+
logger.debug('[qwen tts realtime] send string: {}'.format(data))
|
|
154
|
+
self.ws.send(data)
|
|
155
|
+
|
|
156
|
+
def update_session(self,
|
|
157
|
+
voice: str,
|
|
158
|
+
response_format: AudioFormat = AudioFormat.
|
|
159
|
+
PCM_24000HZ_MONO_16BIT,
|
|
160
|
+
mode: str = 'server_commit',
|
|
161
|
+
sample_rate: int = None,
|
|
162
|
+
volume: int = None,
|
|
163
|
+
speech_rate: float = None,
|
|
164
|
+
audio_format: str = None,
|
|
165
|
+
pitch_rate: float = None,
|
|
166
|
+
bit_rate: int = None,
|
|
167
|
+
language_type: str = None,
|
|
168
|
+
enable_tn: bool = None,
|
|
169
|
+
**kwargs) -> None:
|
|
170
|
+
'''
|
|
171
|
+
update session configuration, should be used before create response
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
voice: str
|
|
176
|
+
voice to be used in session
|
|
177
|
+
response_format: AudioFormat
|
|
178
|
+
output audio format
|
|
179
|
+
mode: str
|
|
180
|
+
response mode, server_commit or commit
|
|
181
|
+
language_type: str
|
|
182
|
+
language type for synthesized audio, default is 'auto'
|
|
183
|
+
sample_rate: int
|
|
184
|
+
sampleRate for tts, range [8000,16000,22050,24000,44100,48000] default is 24000
|
|
185
|
+
volume: int
|
|
186
|
+
volume for tts, range [0,100] default is 50
|
|
187
|
+
speech_rate: float
|
|
188
|
+
speech_rate for tts, range [0.5~2.0] default is 1.0
|
|
189
|
+
audio_format: str
|
|
190
|
+
format for tts, support mp3,wav,pcm,opus, default is 'pcm'
|
|
191
|
+
pitch_rate: float
|
|
192
|
+
pitch_rate for tts, range [0.5~2.0] default is 1.0
|
|
193
|
+
bit_rate: int
|
|
194
|
+
bit_rate for tts, support 6~510,default is 128kbps. only work on format: opus/mp3
|
|
195
|
+
enable_tn: bool
|
|
196
|
+
enable text normalization for tts, default is None
|
|
197
|
+
'''
|
|
198
|
+
self.config = {
|
|
199
|
+
'voice': voice,
|
|
200
|
+
'mode': mode,
|
|
201
|
+
'response_format': response_format.format,
|
|
202
|
+
'sample_rate': response_format.sample_rate,
|
|
203
|
+
}
|
|
204
|
+
if sample_rate is not None: # 如果配置,则更新
|
|
205
|
+
self.config['sample_rate'] = sample_rate
|
|
206
|
+
if volume is not None:
|
|
207
|
+
self.config['volume'] = volume
|
|
208
|
+
if speech_rate is not None:
|
|
209
|
+
self.config['speech_rate'] = speech_rate
|
|
210
|
+
if audio_format is not None:
|
|
211
|
+
self.config['response_format'] = audio_format # 如果配置,则更新
|
|
212
|
+
if pitch_rate is not None:
|
|
213
|
+
self.config['pitch_rate'] = pitch_rate
|
|
214
|
+
if bit_rate is not None:
|
|
215
|
+
self.config['bit_rate'] = bit_rate
|
|
216
|
+
if enable_tn is not None:
|
|
217
|
+
self.config['enable_tn'] = enable_tn
|
|
218
|
+
|
|
219
|
+
if language_type is not None:
|
|
220
|
+
self.config['language_type'] = language_type
|
|
221
|
+
self.config.update(kwargs)
|
|
222
|
+
self.__send_str(
|
|
223
|
+
json.dumps({
|
|
224
|
+
'event_id': self._generate_event_id(),
|
|
225
|
+
'type': 'session.update',
|
|
226
|
+
'session': self.config
|
|
227
|
+
}))
|
|
228
|
+
|
|
229
|
+
def append_text(self, text: str) -> None:
|
|
230
|
+
'''
|
|
231
|
+
send text
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
text: str
|
|
236
|
+
text to send
|
|
237
|
+
'''
|
|
238
|
+
self.__send_str(
|
|
239
|
+
json.dumps({
|
|
240
|
+
'event_id': self._generate_event_id(),
|
|
241
|
+
'type': 'input_text_buffer.append',
|
|
242
|
+
'text': text
|
|
243
|
+
}))
|
|
244
|
+
if self.last_first_text_time is None:
|
|
245
|
+
self.last_first_text_time = time.time() * 1000
|
|
246
|
+
|
|
247
|
+
def commit(self, ) -> None:
|
|
248
|
+
'''
|
|
249
|
+
commit the text sent before, create response and start synthesis audio.
|
|
250
|
+
'''
|
|
251
|
+
self.__send_str(
|
|
252
|
+
json.dumps({
|
|
253
|
+
'event_id': self._generate_event_id(),
|
|
254
|
+
'type': 'input_text_buffer.commit'
|
|
255
|
+
}))
|
|
256
|
+
|
|
257
|
+
def clear_appended_text(self, ) -> None:
|
|
258
|
+
'''
|
|
259
|
+
clear the text sent to server before.
|
|
260
|
+
'''
|
|
261
|
+
self.__send_str(
|
|
262
|
+
json.dumps({
|
|
263
|
+
'event_id': self._generate_event_id(),
|
|
264
|
+
'type': 'input_text_buffer.clear'
|
|
265
|
+
}))
|
|
266
|
+
|
|
267
|
+
def cancel_response(self, ) -> None:
|
|
268
|
+
'''
|
|
269
|
+
cancel the current response
|
|
270
|
+
'''
|
|
271
|
+
self.__send_str(
|
|
272
|
+
json.dumps({
|
|
273
|
+
'event_id': self._generate_event_id(),
|
|
274
|
+
'type': 'response.cancel'
|
|
275
|
+
}))
|
|
276
|
+
|
|
277
|
+
def send_raw(self, raw_data: str) -> None:
|
|
278
|
+
'''
|
|
279
|
+
send raw data to server
|
|
280
|
+
'''
|
|
281
|
+
self.__send_str(raw_data)
|
|
282
|
+
|
|
283
|
+
def finish(self, ) -> None:
|
|
284
|
+
'''
|
|
285
|
+
finish input text stream, server will synthesis all text in buffer and close the connection
|
|
286
|
+
'''
|
|
287
|
+
self.__send_str(
|
|
288
|
+
json.dumps({
|
|
289
|
+
'event_id': self._generate_event_id(),
|
|
290
|
+
'type': 'session.finish'
|
|
291
|
+
}))
|
|
292
|
+
|
|
293
|
+
def close(self, ) -> None:
|
|
294
|
+
'''
|
|
295
|
+
close the connection to server
|
|
296
|
+
'''
|
|
297
|
+
self.ws.close()
|
|
298
|
+
|
|
299
|
+
# 监听消息的回调函数
|
|
300
|
+
def on_message(self, ws, message):
|
|
301
|
+
if isinstance(message, str):
|
|
302
|
+
logger.debug('[omni realtime] receive string {}'.format(
|
|
303
|
+
message[:1024]))
|
|
304
|
+
try:
|
|
305
|
+
# 尝试将消息解析为JSON
|
|
306
|
+
json_data = json.loads(message)
|
|
307
|
+
self.last_message = json_data
|
|
308
|
+
self.callback.on_event(json_data)
|
|
309
|
+
if 'type' in message:
|
|
310
|
+
if 'session.created' == json_data['type']:
|
|
311
|
+
self.session_id = json_data['session']['id']
|
|
312
|
+
if 'response.created' == json_data['type']:
|
|
313
|
+
self.last_response_id = json_data['response']['id']
|
|
314
|
+
elif 'response.audio.delta' == json_data['type']:
|
|
315
|
+
if self.last_first_text_time and self.last_first_audio_delay is None:
|
|
316
|
+
self.last_first_audio_delay = time.time(
|
|
317
|
+
) * 1000 - self.last_first_text_time
|
|
318
|
+
elif 'response.done' == json_data['type']:
|
|
319
|
+
logger.debug(
|
|
320
|
+
'[Metric] response: {}, first audio delay: {}'
|
|
321
|
+
.format(self.last_response_id,
|
|
322
|
+
self.last_first_audio_delay))
|
|
323
|
+
except json.JSONDecodeError:
|
|
324
|
+
logger.error('Failed to parse message as JSON.')
|
|
325
|
+
raise Exception('Failed to parse message as JSON.')
|
|
326
|
+
elif isinstance(message, (bytes, bytearray)):
|
|
327
|
+
# 如果失败,认为是二进制消息
|
|
328
|
+
logger.error(
|
|
329
|
+
'should not receive binary message in omni realtime api')
|
|
330
|
+
logger.debug('[omni realtime] receive binary {} bytes'.format(
|
|
331
|
+
len(message)))
|
|
332
|
+
|
|
333
|
+
def on_close(self, ws, close_status_code, close_msg):
|
|
334
|
+
logger.debug(
|
|
335
|
+
'[omni realtime] connection closed with code {} and message {}'.format(
|
|
336
|
+
close_status_code, close_msg))
|
|
337
|
+
self.callback.on_close(close_status_code, close_msg)
|
|
338
|
+
|
|
339
|
+
# WebSocket发生错误的回调函数
|
|
340
|
+
def on_error(self, ws, error):
|
|
341
|
+
print(f'websocket closed due to {error}')
|
|
342
|
+
raise Exception(f'websocket closed due to {error}')
|
|
343
|
+
|
|
344
|
+
# 获取上一个任务的taskId
|
|
345
|
+
def get_session_id(self):
|
|
346
|
+
return self.session_id
|
|
347
|
+
|
|
348
|
+
def get_last_message(self):
|
|
349
|
+
return self.last_message
|
|
350
|
+
|
|
351
|
+
def get_last_response_id(self):
|
|
352
|
+
return self.last_response_id
|
|
353
|
+
|
|
354
|
+
def get_first_audio_delay(self):
|
|
355
|
+
return self.last_first_audio_delay
|
dashscope/audio/tts/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
1
3
|
from http import HTTPStatus
|
|
2
4
|
from typing import Dict, List
|
|
3
5
|
|
|
@@ -94,6 +96,7 @@ class SpeechSynthesizer(BaseApi):
|
|
|
94
96
|
model: str,
|
|
95
97
|
text: str,
|
|
96
98
|
callback: ResultCallback = None,
|
|
99
|
+
workspace: str = None,
|
|
97
100
|
**kwargs) -> SpeechSynthesisResult:
|
|
98
101
|
"""Convert text to speech synchronously.
|
|
99
102
|
|
|
@@ -102,6 +105,7 @@ class SpeechSynthesizer(BaseApi):
|
|
|
102
105
|
text (str): Text content used for speech synthesis.
|
|
103
106
|
callback (ResultCallback): A callback that returns
|
|
104
107
|
speech synthesis results.
|
|
108
|
+
workspace (str): The dashscope workspace id.
|
|
105
109
|
**kwargs:
|
|
106
110
|
format(str, `optional`): Audio encoding format,
|
|
107
111
|
such as pcm wav mp3, default is wav.
|
|
@@ -135,6 +139,7 @@ class SpeechSynthesizer(BaseApi):
|
|
|
135
139
|
input={'text': text},
|
|
136
140
|
stream=True,
|
|
137
141
|
api_protocol=ApiProtocol.WEBSOCKET,
|
|
142
|
+
workspace=workspace,
|
|
138
143
|
**kwargs)
|
|
139
144
|
|
|
140
145
|
if _callback is not None:
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from .enrollment import VoiceEnrollmentException, VoiceEnrollmentService
|
|
4
|
+
from .speech_synthesizer import (AudioFormat, ResultCallback,
|
|
5
|
+
SpeechSynthesizer,
|
|
6
|
+
SpeechSynthesizerObjectPool)
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
'SpeechSynthesizer', 'ResultCallback', 'AudioFormat',
|
|
10
|
+
'VoiceEnrollmentException', 'VoiceEnrollmentService',
|
|
11
|
+
'SpeechSynthesizerObjectPool'
|
|
12
|
+
]
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import aiohttp
|
|
8
|
+
|
|
9
|
+
from dashscope.client.base_api import BaseApi
|
|
10
|
+
from dashscope.common.constants import ApiProtocol, HTTPMethod
|
|
11
|
+
from dashscope.common.logging import logger
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class VoiceEnrollmentException(Exception):
|
|
15
|
+
def __init__(self, request_id: str, status_code: int, code: str,
|
|
16
|
+
error_message: str) -> None:
|
|
17
|
+
self._request_id = request_id
|
|
18
|
+
self._status_code = status_code
|
|
19
|
+
self._code = code
|
|
20
|
+
self._error_message = error_message
|
|
21
|
+
|
|
22
|
+
def __str__(self):
|
|
23
|
+
return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class VoiceEnrollmentService(BaseApi):
|
|
27
|
+
'''
|
|
28
|
+
API for voice clone service
|
|
29
|
+
'''
|
|
30
|
+
MAX_QUERY_TRY_COUNT = 3
|
|
31
|
+
|
|
32
|
+
def __init__(self,
|
|
33
|
+
api_key=None,
|
|
34
|
+
workspace=None,
|
|
35
|
+
model=None,
|
|
36
|
+
**kwargs) -> None:
|
|
37
|
+
super().__init__()
|
|
38
|
+
self._api_key = api_key
|
|
39
|
+
self._workspace = workspace
|
|
40
|
+
self._kwargs = kwargs
|
|
41
|
+
self._last_request_id = None
|
|
42
|
+
self.model = model
|
|
43
|
+
if self.model is None:
|
|
44
|
+
self.model = 'voice-enrollment'
|
|
45
|
+
|
|
46
|
+
def __call_with_input(self, input):
|
|
47
|
+
try_count = 0
|
|
48
|
+
while True:
|
|
49
|
+
try:
|
|
50
|
+
response = super().call(model=self.model,
|
|
51
|
+
task_group='audio',
|
|
52
|
+
task='tts',
|
|
53
|
+
function='customization',
|
|
54
|
+
input=input,
|
|
55
|
+
api_protocol=ApiProtocol.HTTP,
|
|
56
|
+
http_method=HTTPMethod.POST,
|
|
57
|
+
api_key=self._api_key,
|
|
58
|
+
workspace=self._workspace,
|
|
59
|
+
**self._kwargs)
|
|
60
|
+
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
|
|
61
|
+
logger.error(e)
|
|
62
|
+
try_count += 1
|
|
63
|
+
if try_count <= VoiceEnrollmentService.MAX_QUERY_TRY_COUNT:
|
|
64
|
+
time.sleep(2)
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
break
|
|
68
|
+
logger.debug('>>>>recv', response)
|
|
69
|
+
return response
|
|
70
|
+
|
|
71
|
+
def create_voice(self, target_model: str, prefix: str, url: str, language_hints: List[str] = None) -> str:
|
|
72
|
+
'''
|
|
73
|
+
创建新克隆音色
|
|
74
|
+
param: target_model 克隆音色对应的语音合成模型版本
|
|
75
|
+
param: prefix 音色自定义前缀,仅允许数字和小写字母,小于十个字符。
|
|
76
|
+
param: url 用于克隆的音频文件url
|
|
77
|
+
param: language_hints 克隆音色目标语言
|
|
78
|
+
return: voice_id
|
|
79
|
+
'''
|
|
80
|
+
|
|
81
|
+
input_params = {
|
|
82
|
+
'action': 'create_voice',
|
|
83
|
+
'target_model': target_model,
|
|
84
|
+
'prefix': prefix,
|
|
85
|
+
'url': url
|
|
86
|
+
}
|
|
87
|
+
if language_hints is not None:
|
|
88
|
+
input_params['language_hints'] = language_hints
|
|
89
|
+
response = self.__call_with_input(input_params)
|
|
90
|
+
self._last_request_id = response.request_id
|
|
91
|
+
if response.status_code == 200:
|
|
92
|
+
return response.output['voice_id']
|
|
93
|
+
else:
|
|
94
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
95
|
+
response.message)
|
|
96
|
+
|
|
97
|
+
def list_voices(self,
|
|
98
|
+
prefix=None,
|
|
99
|
+
page_index: int = 0,
|
|
100
|
+
page_size: int = 10) -> List[dict]:
|
|
101
|
+
'''
|
|
102
|
+
查询已创建的所有音色
|
|
103
|
+
param: page_index 查询的页索引
|
|
104
|
+
param: page_size 查询页大小
|
|
105
|
+
return: List[dict] 音色列表,包含每个音色的id,创建时间,修改时间,状态。
|
|
106
|
+
'''
|
|
107
|
+
if prefix:
|
|
108
|
+
response = self.__call_with_input(input={
|
|
109
|
+
'action': 'list_voice',
|
|
110
|
+
'prefix': prefix,
|
|
111
|
+
'page_index': page_index,
|
|
112
|
+
'page_size': page_size,
|
|
113
|
+
}, )
|
|
114
|
+
else:
|
|
115
|
+
response = self.__call_with_input(input={
|
|
116
|
+
'action': 'list_voice',
|
|
117
|
+
'page_index': page_index,
|
|
118
|
+
'page_size': page_size,
|
|
119
|
+
}, )
|
|
120
|
+
self._last_request_id = response.request_id
|
|
121
|
+
if response.status_code == 200:
|
|
122
|
+
return response.output['voice_list']
|
|
123
|
+
else:
|
|
124
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
125
|
+
response.message)
|
|
126
|
+
|
|
127
|
+
def query_voice(self, voice_id: str) -> List[str]:
|
|
128
|
+
'''
|
|
129
|
+
查询已创建的所有音色
|
|
130
|
+
param: voice_id 需要查询的音色
|
|
131
|
+
return: bytes 注册音色使用的音频
|
|
132
|
+
'''
|
|
133
|
+
response = self.__call_with_input(input={
|
|
134
|
+
'action': 'query_voice',
|
|
135
|
+
'voice_id': voice_id,
|
|
136
|
+
}, )
|
|
137
|
+
self._last_request_id = response.request_id
|
|
138
|
+
if response.status_code == 200:
|
|
139
|
+
return response.output
|
|
140
|
+
else:
|
|
141
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
142
|
+
response.message)
|
|
143
|
+
|
|
144
|
+
def update_voice(self, voice_id: str, url: str) -> None:
|
|
145
|
+
'''
|
|
146
|
+
更新音色
|
|
147
|
+
param: voice_id 音色id
|
|
148
|
+
param: url 用于克隆的音频文件url
|
|
149
|
+
'''
|
|
150
|
+
response = self.__call_with_input(input={
|
|
151
|
+
'action': 'update_voice',
|
|
152
|
+
'voice_id': voice_id,
|
|
153
|
+
'url': url,
|
|
154
|
+
}, )
|
|
155
|
+
self._last_request_id = response.request_id
|
|
156
|
+
if response.status_code == 200:
|
|
157
|
+
return
|
|
158
|
+
else:
|
|
159
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
160
|
+
response.message)
|
|
161
|
+
|
|
162
|
+
def delete_voice(self, voice_id: str) -> None:
|
|
163
|
+
'''
|
|
164
|
+
删除音色
|
|
165
|
+
param: voice_id 需要删除的音色
|
|
166
|
+
'''
|
|
167
|
+
response = self.__call_with_input(input={
|
|
168
|
+
'action': 'delete_voice',
|
|
169
|
+
'voice_id': voice_id,
|
|
170
|
+
}, )
|
|
171
|
+
self._last_request_id = response.request_id
|
|
172
|
+
if response.status_code == 200:
|
|
173
|
+
return
|
|
174
|
+
else:
|
|
175
|
+
raise VoiceEnrollmentException(response.request_id, response.status_code, response.code,
|
|
176
|
+
response.message)
|
|
177
|
+
|
|
178
|
+
def get_last_request_id(self):
|
|
179
|
+
return self._last_request_id
|