dashscope 1.20.1__py3-none-any.whl → 1.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/aigc/image_synthesis.py +1 -1
- dashscope/audio/asr/recognition.py +25 -6
- dashscope/audio/tts_v2/speech_synthesizer.py +60 -16
- dashscope/version.py +1 -1
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/METADATA +1 -1
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/RECORD +10 -10
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/LICENSE +0 -0
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/WHEEL +0 -0
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/entry_points.txt +0 -0
- {dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/top_level.txt +0 -0
|
@@ -127,7 +127,7 @@ class ImageSynthesis(BaseAsyncApi):
|
|
|
127
127
|
if sketch_image_url is not None and sketch_image_url:
|
|
128
128
|
input['sketch_image_url'] = sketch_image_url
|
|
129
129
|
if ref_img is not None and ref_img:
|
|
130
|
-
input['ref_img']
|
|
130
|
+
input['ref_img'] = ref_img
|
|
131
131
|
if extra_input is not None and extra_input:
|
|
132
132
|
input = {**input, **extra_input}
|
|
133
133
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import time
|
|
3
4
|
import threading
|
|
4
5
|
from http import HTTPStatus
|
|
5
6
|
from threading import Timer
|
|
@@ -157,6 +158,8 @@ class Recognition(BaseApi):
|
|
|
157
158
|
self._silence_timer = None
|
|
158
159
|
self._kwargs = kwargs
|
|
159
160
|
self._workspace = workspace
|
|
161
|
+
self._start_stream_timestamp = -1
|
|
162
|
+
self._first_package_timestamp = -1
|
|
160
163
|
|
|
161
164
|
def __del__(self):
|
|
162
165
|
if self._running:
|
|
@@ -183,12 +186,18 @@ class Recognition(BaseApi):
|
|
|
183
186
|
else:
|
|
184
187
|
usage: Dict[str, Any] = None
|
|
185
188
|
useags: List[Any] = None
|
|
186
|
-
if 'sentence' in part.output
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
'
|
|
190
|
-
|
|
191
|
-
|
|
189
|
+
if 'sentence' in part.output:
|
|
190
|
+
if (self._first_package_timestamp < 0):
|
|
191
|
+
self._first_package_timestamp = time.time()*1000
|
|
192
|
+
logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
|
|
193
|
+
sentence = part.output['sentence']
|
|
194
|
+
logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
|
|
195
|
+
if part.usage is not None:
|
|
196
|
+
usage = {
|
|
197
|
+
'end_time': part.output['sentence']['end_time'],
|
|
198
|
+
'usage': part.usage
|
|
199
|
+
}
|
|
200
|
+
useags = [usage]
|
|
192
201
|
|
|
193
202
|
self._callback.on_event(
|
|
194
203
|
RecognitionResult(
|
|
@@ -260,6 +269,8 @@ class Recognition(BaseApi):
|
|
|
260
269
|
if self._running:
|
|
261
270
|
raise InvalidParameter('Speech recognition has started.')
|
|
262
271
|
|
|
272
|
+
self._start_stream_timestamp = -1
|
|
273
|
+
self._first_package_timestamp = -1
|
|
263
274
|
self._phrase = phrase_id
|
|
264
275
|
self._kwargs.update(**kwargs)
|
|
265
276
|
self._recognition_once = False
|
|
@@ -307,6 +318,7 @@ class Recognition(BaseApi):
|
|
|
307
318
|
Returns:
|
|
308
319
|
RecognitionResult: The result of speech recognition.
|
|
309
320
|
"""
|
|
321
|
+
self._start_stream_timestamp = time.time()*1000
|
|
310
322
|
if self._running:
|
|
311
323
|
raise InvalidParameter('Speech recognition has been called.')
|
|
312
324
|
|
|
@@ -350,7 +362,11 @@ class Recognition(BaseApi):
|
|
|
350
362
|
for part in responses:
|
|
351
363
|
if part.status_code == HTTPStatus.OK:
|
|
352
364
|
if 'sentence' in part.output:
|
|
365
|
+
if (self._first_package_timestamp < 0):
|
|
366
|
+
self._first_package_timestamp = time.time()*1000
|
|
367
|
+
logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
|
|
353
368
|
sentence = part.output['sentence']
|
|
369
|
+
logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
|
|
354
370
|
if RecognitionResult.is_sentence_end(sentence):
|
|
355
371
|
sentences.append(sentence)
|
|
356
372
|
|
|
@@ -408,6 +424,9 @@ class Recognition(BaseApi):
|
|
|
408
424
|
if self._running is False:
|
|
409
425
|
raise InvalidParameter('Speech recognition has stopped.')
|
|
410
426
|
|
|
427
|
+
if (self._start_stream_timestamp < 0):
|
|
428
|
+
self._start_stream_timestamp = time.time() * 1000
|
|
429
|
+
logger.debug('send_audio_frame: {}'.format(len(buffer)))
|
|
411
430
|
self._stream_data = self._stream_data + [buffer]
|
|
412
431
|
|
|
413
432
|
def _tidy_kwargs(self):
|
|
@@ -5,9 +5,8 @@ import time
|
|
|
5
5
|
import uuid
|
|
6
6
|
from enum import Enum, unique
|
|
7
7
|
|
|
8
|
-
import websocket
|
|
9
|
-
|
|
10
8
|
import dashscope
|
|
9
|
+
import websocket
|
|
11
10
|
from dashscope.common.error import InputRequired, InvalidTask, ModelRequired
|
|
12
11
|
from dashscope.common.logging import logger
|
|
13
12
|
from dashscope.protocol.websocket import (ACTION_KEY, EVENT_KEY, HEADER,
|
|
@@ -239,6 +238,14 @@ class SpeechSynthesizer:
|
|
|
239
238
|
self.headers = headers
|
|
240
239
|
self.workspace = workspace
|
|
241
240
|
self.additional_params = additional_params
|
|
241
|
+
self.model = model
|
|
242
|
+
self.voice = voice
|
|
243
|
+
self.aformat = format.format
|
|
244
|
+
if (self.aformat == 'DEFAULT'):
|
|
245
|
+
self.aformat = 'mp3'
|
|
246
|
+
self.sample_rate = format.sample_rate
|
|
247
|
+
if (self.sample_rate == 0):
|
|
248
|
+
self.sample_rate = 22050
|
|
242
249
|
|
|
243
250
|
self.request = Request(
|
|
244
251
|
apikey=self.apikey,
|
|
@@ -265,12 +272,18 @@ class SpeechSynthesizer:
|
|
|
265
272
|
# since dashscope sdk will send first text in run-task
|
|
266
273
|
if not self.callback:
|
|
267
274
|
self.async_call = False
|
|
275
|
+
self._start_stream_timestamp = -1
|
|
276
|
+
self._first_package_timestamp = -1
|
|
277
|
+
self._recv_audio_length = 0
|
|
268
278
|
|
|
269
279
|
def __send_str(self, data: str):
|
|
270
280
|
logger.debug('>>>send {}'.format(data))
|
|
271
281
|
self.ws.send(data)
|
|
272
282
|
|
|
273
283
|
def __start_stream(self, ):
|
|
284
|
+
self._start_stream_timestamp = time.time() * 1000
|
|
285
|
+
self._first_package_timestamp = -1
|
|
286
|
+
self._recv_audio_length = 0
|
|
274
287
|
if self.callback is None:
|
|
275
288
|
raise InputRequired('callback is required!')
|
|
276
289
|
# reset inner params
|
|
@@ -332,7 +345,7 @@ class SpeechSynthesizer:
|
|
|
332
345
|
self.__submit_text(text)
|
|
333
346
|
return None
|
|
334
347
|
|
|
335
|
-
def streaming_complete(self,
|
|
348
|
+
def streaming_complete(self, complete_timeout_millis=10000):
|
|
336
349
|
"""
|
|
337
350
|
Synchronously stop the streaming input speech synthesis task.
|
|
338
351
|
Wait for all remaining synthesized audio before returning
|
|
@@ -340,7 +353,9 @@ class SpeechSynthesizer:
|
|
|
340
353
|
Parameters:
|
|
341
354
|
-----------
|
|
342
355
|
complete_timeout_millis: int
|
|
343
|
-
Throws TimeoutError exception if it times out.
|
|
356
|
+
Throws TimeoutError exception if it times out. If the timeout is not None
|
|
357
|
+
and greater than zero, it will wait for the corresponding number of
|
|
358
|
+
milliseconds; otherwise, it will wait indefinitely.
|
|
344
359
|
"""
|
|
345
360
|
if not self._is_started:
|
|
346
361
|
raise InvalidTask('speech synthesizer has not been started.')
|
|
@@ -348,27 +363,38 @@ class SpeechSynthesizer:
|
|
|
348
363
|
raise InvalidTask('speech synthesizer task has stopped.')
|
|
349
364
|
request = self.request.getFinishRequest()
|
|
350
365
|
self.__send_str(request)
|
|
351
|
-
if not
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
366
|
+
if complete_timeout_millis is not None and complete_timeout_millis > 0:
|
|
367
|
+
if not self.complete_event.wait(timeout=complete_timeout_millis /
|
|
368
|
+
1000):
|
|
369
|
+
raise TimeoutError(
|
|
370
|
+
'speech synthesizer wait for complete timeout {}ms'.format(
|
|
371
|
+
complete_timeout_millis))
|
|
372
|
+
else:
|
|
373
|
+
self.complete_event.wait()
|
|
355
374
|
self.close()
|
|
356
375
|
self._stopped.set()
|
|
357
376
|
self._is_started = False
|
|
358
377
|
|
|
359
|
-
def __waiting_for_complete(self):
|
|
360
|
-
if not self.complete_event.wait(timeout=
|
|
378
|
+
def __waiting_for_complete(self, timeout):
|
|
379
|
+
if not self.complete_event.wait(timeout=timeout / 1000):
|
|
361
380
|
raise TimeoutError(
|
|
362
381
|
'speech synthesizer wait for complete timeout 10000ms')
|
|
363
382
|
self.close()
|
|
364
383
|
self._stopped.set()
|
|
365
384
|
self._is_started = False
|
|
366
385
|
|
|
367
|
-
def async_streaming_complete(self):
|
|
386
|
+
def async_streaming_complete(self, complete_timeout_millis=10000):
|
|
368
387
|
"""
|
|
369
388
|
Asynchronously stop the streaming input speech synthesis task, returns immediately.
|
|
370
389
|
You need to listen and handle the STREAM_INPUT_TTS_EVENT_SYNTHESIS_COMPLETE event in the on_event callback.
|
|
371
390
|
Do not destroy the object and callback before this event.
|
|
391
|
+
|
|
392
|
+
Parameters:
|
|
393
|
+
-----------
|
|
394
|
+
complete_timeout_millis: int
|
|
395
|
+
Throws TimeoutError exception if it times out. If the timeout is not None
|
|
396
|
+
and greater than zero, it will wait for the corresponding number of
|
|
397
|
+
milliseconds; otherwise, it will wait indefinitely.
|
|
372
398
|
"""
|
|
373
399
|
|
|
374
400
|
if not self._is_started:
|
|
@@ -377,7 +403,8 @@ class SpeechSynthesizer:
|
|
|
377
403
|
raise InvalidTask('speech synthesizer task has stopped.')
|
|
378
404
|
request = self.request.getFinishRequest()
|
|
379
405
|
self.__send_str(request)
|
|
380
|
-
thread = threading.Thread(target=self.__waiting_for_complete
|
|
406
|
+
thread = threading.Thread(target=self.__waiting_for_complete,
|
|
407
|
+
args=(complete_timeout_millis, ))
|
|
381
408
|
thread.start()
|
|
382
409
|
|
|
383
410
|
def streaming_cancel(self):
|
|
@@ -428,6 +455,18 @@ class SpeechSynthesizer:
|
|
|
428
455
|
elif isinstance(message, (bytes, bytearray)):
|
|
429
456
|
# 如果失败,认为是二进制消息
|
|
430
457
|
logger.debug('<<<recv binary {}'.format(len(message)))
|
|
458
|
+
if (self._recv_audio_length == 0):
|
|
459
|
+
self._first_package_timestamp = time.time() * 1000
|
|
460
|
+
logger.debug('first package delay {}'.format(
|
|
461
|
+
self._first_package_timestamp -
|
|
462
|
+
self._start_stream_timestamp))
|
|
463
|
+
self._recv_audio_length += len(message) / (2 * self.sample_rate /
|
|
464
|
+
1000)
|
|
465
|
+
current = time.time() * 1000
|
|
466
|
+
current_rtf = (current - self._first_package_timestamp
|
|
467
|
+
) / self._recv_audio_length
|
|
468
|
+
logger.debug('total audio {} ms, current_rtf: {}'.format(
|
|
469
|
+
self._recv_audio_length, current_rtf))
|
|
431
470
|
# 只有在非异步调用的时候保存音频
|
|
432
471
|
if not self.async_call:
|
|
433
472
|
if self._audio_data is None:
|
|
@@ -437,7 +476,7 @@ class SpeechSynthesizer:
|
|
|
437
476
|
if self.callback:
|
|
438
477
|
self.callback.on_data(message)
|
|
439
478
|
|
|
440
|
-
def call(self, text: str):
|
|
479
|
+
def call(self, text: str, timeout_millis=None):
|
|
441
480
|
"""
|
|
442
481
|
Speech synthesis.
|
|
443
482
|
If callback is set, the audio will be returned in real-time through the on_event interface.
|
|
@@ -447,9 +486,14 @@ class SpeechSynthesizer:
|
|
|
447
486
|
-----------
|
|
448
487
|
text: str
|
|
449
488
|
utf-8 encoded text
|
|
489
|
+
timeoutMillis:
|
|
490
|
+
Integer or None
|
|
450
491
|
return: bytes
|
|
451
492
|
If a callback is not set during initialization, the complete audio is returned
|
|
452
493
|
as the function's return value. Otherwise, the return value is null.
|
|
494
|
+
If the timeout is set to a value greater than zero and not None,
|
|
495
|
+
it will wait for the corresponding number of milliseconds;
|
|
496
|
+
otherwise, it will wait indefinitely.
|
|
453
497
|
"""
|
|
454
498
|
# print('还不支持非流式语音合成sdk调用大模型,使用流式模拟')
|
|
455
499
|
if not self.callback:
|
|
@@ -457,10 +501,10 @@ class SpeechSynthesizer:
|
|
|
457
501
|
self.__start_stream()
|
|
458
502
|
self.__submit_text(text)
|
|
459
503
|
if self.async_call:
|
|
460
|
-
self.async_streaming_complete()
|
|
504
|
+
self.async_streaming_complete(timeout_millis)
|
|
461
505
|
return None
|
|
462
506
|
else:
|
|
463
|
-
self.streaming_complete()
|
|
507
|
+
self.streaming_complete(timeout_millis)
|
|
464
508
|
return self._audio_data
|
|
465
509
|
|
|
466
510
|
# WebSocket关闭的回调函数
|
|
@@ -478,4 +522,4 @@ class SpeechSynthesizer:
|
|
|
478
522
|
|
|
479
523
|
# 获取上一个任务的taskId
|
|
480
524
|
def get_last_request_id(self):
|
|
481
|
-
return self.last_request_id
|
|
525
|
+
return self.last_request_id
|
dashscope/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '1.20.
|
|
1
|
+
__version__ = '1.20.2'
|
|
@@ -6,12 +6,12 @@ dashscope/files.py,sha256=QgJjwhtn9F548nCA8jD8OvE6aQEj-20hZqJgYXsUdQU,3930
|
|
|
6
6
|
dashscope/finetune.py,sha256=_tflDUvu0KagSoCzLaf0hofpG_P8NU6PylL8CPjVhrA,6243
|
|
7
7
|
dashscope/model.py,sha256=UPOn1qMYFhX-ovXi3BMxZEBk8qOK7WLJOYHMbPZwYBo,1440
|
|
8
8
|
dashscope/models.py,sha256=1-bc-Ue68zurgu_y6RhfFr9uzeQMF5AZq-C32lJGMGU,1224
|
|
9
|
-
dashscope/version.py,sha256=
|
|
9
|
+
dashscope/version.py,sha256=6KIFuOk_uHObRUtO_hf7gfIcH-9norzWjEkjzb1Dclg,23
|
|
10
10
|
dashscope/aigc/__init__.py,sha256=s-MCA87KYiVumYtKtJi5IMN7xelSF6TqEU3s3_7RF-Y,327
|
|
11
11
|
dashscope/aigc/code_generation.py,sha256=KAJVrGp6tiNFBBg64Ovs9RfcP5SrIhrbW3wdA89NKso,10885
|
|
12
12
|
dashscope/aigc/conversation.py,sha256=xRoJlCR-IXHjSdkDrK74a9ut1FJg0FZhTNXZAJC18MA,14231
|
|
13
13
|
dashscope/aigc/generation.py,sha256=53oMCmN5ZbqeqAsKxmdunXlRh-XP8ZtnA7hB2id4Koo,17897
|
|
14
|
-
dashscope/aigc/image_synthesis.py,sha256=
|
|
14
|
+
dashscope/aigc/image_synthesis.py,sha256=_TGkh4L_yBNQNmoJUKlfXfljKfxX-SSyxHHQBDm1AC8,10418
|
|
15
15
|
dashscope/aigc/multimodal_conversation.py,sha256=SlNnnsUPV19gdx8fYJAtsMFWPNGY6vhk5IGHZ5ZczpI,5369
|
|
16
16
|
dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
dashscope/api_entities/aiohttp_request.py,sha256=aE3AeWba8Ig_xHMYjrAdkq0N61l_L2VFTG6HYh912X0,10229
|
|
@@ -31,12 +31,12 @@ dashscope/assistants/files.py,sha256=pwLVJ_pjpRFWyfI_MRvhH7Si7FzGDj4ChzZgWTpLOhg
|
|
|
31
31
|
dashscope/audio/__init__.py,sha256=-ZRxrK-gV4QsUtlThIT-XwqB6vmyEsnhxIxdLmhCUuc,61
|
|
32
32
|
dashscope/audio/asr/__init__.py,sha256=-s180qWn_JPSpCo1q0aDJJ5HQ3zTzD4z5yUwsRqH4aU,275
|
|
33
33
|
dashscope/audio/asr/asr_phrase_manager.py,sha256=EjtbI3zz9UQGS1qv6Yb4zzEMj4OJJVXmwkqZyIrzvEA,7642
|
|
34
|
-
dashscope/audio/asr/recognition.py,sha256=
|
|
34
|
+
dashscope/audio/asr/recognition.py,sha256=cEooE3wGf8kKfJIVbaXEytl5X6F0hMsLe8g4Bj9Fn4w,18768
|
|
35
35
|
dashscope/audio/asr/transcription.py,sha256=1WAg9WH89antVzRYEKXb5LQP9xylZmX4YKp7v5oMYjY,8931
|
|
36
36
|
dashscope/audio/tts/__init__.py,sha256=fbnieZX9yNFNh5BsxLpLXb63jlxzxrdCJakV3ignjlQ,194
|
|
37
37
|
dashscope/audio/tts/speech_synthesizer.py,sha256=dnKx9FDDdO_ETHAjhK8zaMVaH6SfoTtN5YxXXqgY1JA,7571
|
|
38
38
|
dashscope/audio/tts_v2/__init__.py,sha256=ve5a81qTbWDcRaSuritZtJBzryOIol2_dxzfqqdCw-k,345
|
|
39
|
-
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=
|
|
39
|
+
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=RenOvxp6BOs_-E_K5akMEBFw9G0HXoLnvHw1IOTUgeU,18991
|
|
40
40
|
dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
dashscope/client/base_api.py,sha256=rXN97XGyDhCCaD_dz_clpFDjOJfpGjqiH7yX3LaD-GE,41233
|
|
42
42
|
dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -82,9 +82,9 @@ dashscope/tokenizers/tokenizer.py,sha256=y6P91qTCYo__pEx_0VHAcj9YECfbUdRqZU1fdGT
|
|
|
82
82
|
dashscope/tokenizers/tokenizer_base.py,sha256=REDhzRyDT13iequ61-a6_KcTy0GFKlihQve5HkyoyRs,656
|
|
83
83
|
dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
84
|
dashscope/utils/oss_utils.py,sha256=fi8-PPsN-iR-iv5k2NS5Z8nlWkpgUhr56FRWm4BDh4A,6984
|
|
85
|
-
dashscope-1.20.
|
|
86
|
-
dashscope-1.20.
|
|
87
|
-
dashscope-1.20.
|
|
88
|
-
dashscope-1.20.
|
|
89
|
-
dashscope-1.20.
|
|
90
|
-
dashscope-1.20.
|
|
85
|
+
dashscope-1.20.2.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
|
|
86
|
+
dashscope-1.20.2.dist-info/METADATA,sha256=I52DKGYRGq77FO4pFbQwUHN-2g4qLLNz-jMO0DMQ008,6641
|
|
87
|
+
dashscope-1.20.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
88
|
+
dashscope-1.20.2.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
|
|
89
|
+
dashscope-1.20.2.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
|
|
90
|
+
dashscope-1.20.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|