dashscope 1.20.1__py3-none-any.whl → 1.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dashscope might be problematic. Click here for more details.

@@ -127,7 +127,7 @@ class ImageSynthesis(BaseAsyncApi):
127
127
  if sketch_image_url is not None and sketch_image_url:
128
128
  input['sketch_image_url'] = sketch_image_url
129
129
  if ref_img is not None and ref_img:
130
- input['ref_img'] == ref_img
130
+ input['ref_img'] = ref_img
131
131
  if extra_input is not None and extra_input:
132
132
  input = {**input, **extra_input}
133
133
 
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import os
3
+ import time
3
4
  import threading
4
5
  from http import HTTPStatus
5
6
  from threading import Timer
@@ -157,6 +158,8 @@ class Recognition(BaseApi):
157
158
  self._silence_timer = None
158
159
  self._kwargs = kwargs
159
160
  self._workspace = workspace
161
+ self._start_stream_timestamp = -1
162
+ self._first_package_timestamp = -1
160
163
 
161
164
  def __del__(self):
162
165
  if self._running:
@@ -183,12 +186,18 @@ class Recognition(BaseApi):
183
186
  else:
184
187
  usage: Dict[str, Any] = None
185
188
  useags: List[Any] = None
186
- if 'sentence' in part.output and part.usage is not None:
187
- usage = {
188
- 'end_time': part.output['sentence']['end_time'],
189
- 'usage': part.usage
190
- }
191
- useags = [usage]
189
+ if 'sentence' in part.output:
190
+ if (self._first_package_timestamp < 0):
191
+ self._first_package_timestamp = time.time()*1000
192
+ logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
193
+ sentence = part.output['sentence']
194
+ logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
195
+ if part.usage is not None:
196
+ usage = {
197
+ 'end_time': part.output['sentence']['end_time'],
198
+ 'usage': part.usage
199
+ }
200
+ useags = [usage]
192
201
 
193
202
  self._callback.on_event(
194
203
  RecognitionResult(
@@ -260,6 +269,8 @@ class Recognition(BaseApi):
260
269
  if self._running:
261
270
  raise InvalidParameter('Speech recognition has started.')
262
271
 
272
+ self._start_stream_timestamp = -1
273
+ self._first_package_timestamp = -1
263
274
  self._phrase = phrase_id
264
275
  self._kwargs.update(**kwargs)
265
276
  self._recognition_once = False
@@ -307,6 +318,7 @@ class Recognition(BaseApi):
307
318
  Returns:
308
319
  RecognitionResult: The result of speech recognition.
309
320
  """
321
+ self._start_stream_timestamp = time.time()*1000
310
322
  if self._running:
311
323
  raise InvalidParameter('Speech recognition has been called.')
312
324
 
@@ -350,7 +362,11 @@ class Recognition(BaseApi):
350
362
  for part in responses:
351
363
  if part.status_code == HTTPStatus.OK:
352
364
  if 'sentence' in part.output:
365
+ if (self._first_package_timestamp < 0):
366
+ self._first_package_timestamp = time.time()*1000
367
+ logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
353
368
  sentence = part.output['sentence']
369
+ logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
354
370
  if RecognitionResult.is_sentence_end(sentence):
355
371
  sentences.append(sentence)
356
372
 
@@ -408,6 +424,9 @@ class Recognition(BaseApi):
408
424
  if self._running is False:
409
425
  raise InvalidParameter('Speech recognition has stopped.')
410
426
 
427
+ if (self._start_stream_timestamp < 0):
428
+ self._start_stream_timestamp = time.time() * 1000
429
+ logger.debug('send_audio_frame: {}'.format(len(buffer)))
411
430
  self._stream_data = self._stream_data + [buffer]
412
431
 
413
432
  def _tidy_kwargs(self):
@@ -5,9 +5,8 @@ import time
5
5
  import uuid
6
6
  from enum import Enum, unique
7
7
 
8
- import websocket
9
-
10
8
  import dashscope
9
+ import websocket
11
10
  from dashscope.common.error import InputRequired, InvalidTask, ModelRequired
12
11
  from dashscope.common.logging import logger
13
12
  from dashscope.protocol.websocket import (ACTION_KEY, EVENT_KEY, HEADER,
@@ -239,6 +238,14 @@ class SpeechSynthesizer:
239
238
  self.headers = headers
240
239
  self.workspace = workspace
241
240
  self.additional_params = additional_params
241
+ self.model = model
242
+ self.voice = voice
243
+ self.aformat = format.format
244
+ if (self.aformat == 'DEFAULT'):
245
+ self.aformat = 'mp3'
246
+ self.sample_rate = format.sample_rate
247
+ if (self.sample_rate == 0):
248
+ self.sample_rate = 22050
242
249
 
243
250
  self.request = Request(
244
251
  apikey=self.apikey,
@@ -265,12 +272,18 @@ class SpeechSynthesizer:
265
272
  # since dashscope sdk will send first text in run-task
266
273
  if not self.callback:
267
274
  self.async_call = False
275
+ self._start_stream_timestamp = -1
276
+ self._first_package_timestamp = -1
277
+ self._recv_audio_length = 0
268
278
 
269
279
  def __send_str(self, data: str):
270
280
  logger.debug('>>>send {}'.format(data))
271
281
  self.ws.send(data)
272
282
 
273
283
  def __start_stream(self, ):
284
+ self._start_stream_timestamp = time.time() * 1000
285
+ self._first_package_timestamp = -1
286
+ self._recv_audio_length = 0
274
287
  if self.callback is None:
275
288
  raise InputRequired('callback is required!')
276
289
  # reset inner params
@@ -332,7 +345,7 @@ class SpeechSynthesizer:
332
345
  self.__submit_text(text)
333
346
  return None
334
347
 
335
- def streaming_complete(self, complete_timeout_millie=10000):
348
+ def streaming_complete(self, complete_timeout_millis=10000):
336
349
  """
337
350
  Synchronously stop the streaming input speech synthesis task.
338
351
  Wait for all remaining synthesized audio before returning
@@ -340,7 +353,9 @@ class SpeechSynthesizer:
340
353
  Parameters:
341
354
  -----------
342
355
  complete_timeout_millis: int
343
- Throws TimeoutError exception if it times out.
356
+ Throws TimeoutError exception if it times out. If the timeout is not None
357
+ and greater than zero, it will wait for the corresponding number of
358
+ milliseconds; otherwise, it will wait indefinitely.
344
359
  """
345
360
  if not self._is_started:
346
361
  raise InvalidTask('speech synthesizer has not been started.')
@@ -348,27 +363,38 @@ class SpeechSynthesizer:
348
363
  raise InvalidTask('speech synthesizer task has stopped.')
349
364
  request = self.request.getFinishRequest()
350
365
  self.__send_str(request)
351
- if not self.complete_event.wait(timeout=complete_timeout_millie):
352
- raise TimeoutError(
353
- 'speech synthesizer wait for complete timeout {}ms'.format(
354
- complete_timeout_millie))
366
+ if complete_timeout_millis is not None and complete_timeout_millis > 0:
367
+ if not self.complete_event.wait(timeout=complete_timeout_millis /
368
+ 1000):
369
+ raise TimeoutError(
370
+ 'speech synthesizer wait for complete timeout {}ms'.format(
371
+ complete_timeout_millis))
372
+ else:
373
+ self.complete_event.wait()
355
374
  self.close()
356
375
  self._stopped.set()
357
376
  self._is_started = False
358
377
 
359
- def __waiting_for_complete(self):
360
- if not self.complete_event.wait(timeout=10000):
378
+ def __waiting_for_complete(self, timeout):
379
+ if not self.complete_event.wait(timeout=timeout / 1000):
361
380
  raise TimeoutError(
362
381
  'speech synthesizer wait for complete timeout 10000ms')
363
382
  self.close()
364
383
  self._stopped.set()
365
384
  self._is_started = False
366
385
 
367
- def async_streaming_complete(self):
386
+ def async_streaming_complete(self, complete_timeout_millis=10000):
368
387
  """
369
388
  Asynchronously stop the streaming input speech synthesis task, returns immediately.
370
389
  You need to listen and handle the STREAM_INPUT_TTS_EVENT_SYNTHESIS_COMPLETE event in the on_event callback.
371
390
  Do not destroy the object and callback before this event.
391
+
392
+ Parameters:
393
+ -----------
394
+ complete_timeout_millis: int
395
+ Throws TimeoutError exception if it times out. If the timeout is not None
396
+ and greater than zero, it will wait for the corresponding number of
397
+ milliseconds; otherwise, it will wait indefinitely.
372
398
  """
373
399
 
374
400
  if not self._is_started:
@@ -377,7 +403,8 @@ class SpeechSynthesizer:
377
403
  raise InvalidTask('speech synthesizer task has stopped.')
378
404
  request = self.request.getFinishRequest()
379
405
  self.__send_str(request)
380
- thread = threading.Thread(target=self.__waiting_for_complete)
406
+ thread = threading.Thread(target=self.__waiting_for_complete,
407
+ args=(complete_timeout_millis, ))
381
408
  thread.start()
382
409
 
383
410
  def streaming_cancel(self):
@@ -428,6 +455,18 @@ class SpeechSynthesizer:
428
455
  elif isinstance(message, (bytes, bytearray)):
429
456
  # 如果失败,认为是二进制消息
430
457
  logger.debug('<<<recv binary {}'.format(len(message)))
458
+ if (self._recv_audio_length == 0):
459
+ self._first_package_timestamp = time.time() * 1000
460
+ logger.debug('first package delay {}'.format(
461
+ self._first_package_timestamp -
462
+ self._start_stream_timestamp))
463
+ self._recv_audio_length += len(message) / (2 * self.sample_rate /
464
+ 1000)
465
+ current = time.time() * 1000
466
+ current_rtf = (current - self._first_package_timestamp
467
+ ) / self._recv_audio_length
468
+ logger.debug('total audio {} ms, current_rtf: {}'.format(
469
+ self._recv_audio_length, current_rtf))
431
470
  # 只有在非异步调用的时候保存音频
432
471
  if not self.async_call:
433
472
  if self._audio_data is None:
@@ -437,7 +476,7 @@ class SpeechSynthesizer:
437
476
  if self.callback:
438
477
  self.callback.on_data(message)
439
478
 
440
- def call(self, text: str):
479
+ def call(self, text: str, timeout_millis=None):
441
480
  """
442
481
  Speech synthesis.
443
482
  If callback is set, the audio will be returned in real-time through the on_event interface.
@@ -447,9 +486,14 @@ class SpeechSynthesizer:
447
486
  -----------
448
487
  text: str
449
488
  utf-8 encoded text
489
+ timeoutMillis:
490
+ Integer or None
450
491
  return: bytes
451
492
  If a callback is not set during initialization, the complete audio is returned
452
493
  as the function's return value. Otherwise, the return value is null.
494
+ If the timeout is set to a value greater than zero and not None,
495
+ it will wait for the corresponding number of milliseconds;
496
+ otherwise, it will wait indefinitely.
453
497
  """
454
498
  # print('还不支持非流式语音合成sdk调用大模型,使用流式模拟')
455
499
  if not self.callback:
@@ -457,10 +501,10 @@ class SpeechSynthesizer:
457
501
  self.__start_stream()
458
502
  self.__submit_text(text)
459
503
  if self.async_call:
460
- self.async_streaming_complete()
504
+ self.async_streaming_complete(timeout_millis)
461
505
  return None
462
506
  else:
463
- self.streaming_complete()
507
+ self.streaming_complete(timeout_millis)
464
508
  return self._audio_data
465
509
 
466
510
  # WebSocket关闭的回调函数
@@ -478,4 +522,4 @@ class SpeechSynthesizer:
478
522
 
479
523
  # 获取上一个任务的taskId
480
524
  def get_last_request_id(self):
481
- return self.last_request_id
525
+ return self.last_request_id
dashscope/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '1.20.1'
1
+ __version__ = '1.20.2'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dashscope
3
- Version: 1.20.1
3
+ Version: 1.20.2
4
4
  Summary: dashscope client sdk library
5
5
  Home-page: https://dashscope.aliyun.com/
6
6
  Author: Alibaba Cloud
@@ -6,12 +6,12 @@ dashscope/files.py,sha256=QgJjwhtn9F548nCA8jD8OvE6aQEj-20hZqJgYXsUdQU,3930
6
6
  dashscope/finetune.py,sha256=_tflDUvu0KagSoCzLaf0hofpG_P8NU6PylL8CPjVhrA,6243
7
7
  dashscope/model.py,sha256=UPOn1qMYFhX-ovXi3BMxZEBk8qOK7WLJOYHMbPZwYBo,1440
8
8
  dashscope/models.py,sha256=1-bc-Ue68zurgu_y6RhfFr9uzeQMF5AZq-C32lJGMGU,1224
9
- dashscope/version.py,sha256=50ZydhgZIM_dwezjbX2voC6nDV_wx5kxvUuz3Nl2Eig,23
9
+ dashscope/version.py,sha256=6KIFuOk_uHObRUtO_hf7gfIcH-9norzWjEkjzb1Dclg,23
10
10
  dashscope/aigc/__init__.py,sha256=s-MCA87KYiVumYtKtJi5IMN7xelSF6TqEU3s3_7RF-Y,327
11
11
  dashscope/aigc/code_generation.py,sha256=KAJVrGp6tiNFBBg64Ovs9RfcP5SrIhrbW3wdA89NKso,10885
12
12
  dashscope/aigc/conversation.py,sha256=xRoJlCR-IXHjSdkDrK74a9ut1FJg0FZhTNXZAJC18MA,14231
13
13
  dashscope/aigc/generation.py,sha256=53oMCmN5ZbqeqAsKxmdunXlRh-XP8ZtnA7hB2id4Koo,17897
14
- dashscope/aigc/image_synthesis.py,sha256=Hg2r6H7Vj4MsXwm62lHf2lTpUb6nA3xWGEYX2o-2TLQ,10419
14
+ dashscope/aigc/image_synthesis.py,sha256=_TGkh4L_yBNQNmoJUKlfXfljKfxX-SSyxHHQBDm1AC8,10418
15
15
  dashscope/aigc/multimodal_conversation.py,sha256=SlNnnsUPV19gdx8fYJAtsMFWPNGY6vhk5IGHZ5ZczpI,5369
16
16
  dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  dashscope/api_entities/aiohttp_request.py,sha256=aE3AeWba8Ig_xHMYjrAdkq0N61l_L2VFTG6HYh912X0,10229
@@ -31,12 +31,12 @@ dashscope/assistants/files.py,sha256=pwLVJ_pjpRFWyfI_MRvhH7Si7FzGDj4ChzZgWTpLOhg
31
31
  dashscope/audio/__init__.py,sha256=-ZRxrK-gV4QsUtlThIT-XwqB6vmyEsnhxIxdLmhCUuc,61
32
32
  dashscope/audio/asr/__init__.py,sha256=-s180qWn_JPSpCo1q0aDJJ5HQ3zTzD4z5yUwsRqH4aU,275
33
33
  dashscope/audio/asr/asr_phrase_manager.py,sha256=EjtbI3zz9UQGS1qv6Yb4zzEMj4OJJVXmwkqZyIrzvEA,7642
34
- dashscope/audio/asr/recognition.py,sha256=F2iz6hyXg16Z6DGlPwGpKfRNcAZIIsqXnNPtaZp4Fzo,17369
34
+ dashscope/audio/asr/recognition.py,sha256=cEooE3wGf8kKfJIVbaXEytl5X6F0hMsLe8g4Bj9Fn4w,18768
35
35
  dashscope/audio/asr/transcription.py,sha256=1WAg9WH89antVzRYEKXb5LQP9xylZmX4YKp7v5oMYjY,8931
36
36
  dashscope/audio/tts/__init__.py,sha256=fbnieZX9yNFNh5BsxLpLXb63jlxzxrdCJakV3ignjlQ,194
37
37
  dashscope/audio/tts/speech_synthesizer.py,sha256=dnKx9FDDdO_ETHAjhK8zaMVaH6SfoTtN5YxXXqgY1JA,7571
38
38
  dashscope/audio/tts_v2/__init__.py,sha256=ve5a81qTbWDcRaSuritZtJBzryOIol2_dxzfqqdCw-k,345
39
- dashscope/audio/tts_v2/speech_synthesizer.py,sha256=sv5f4vi17rFcYDif5bSS4UuX_2eVUL2q5rTnXK1EoAg,16650
39
+ dashscope/audio/tts_v2/speech_synthesizer.py,sha256=RenOvxp6BOs_-E_K5akMEBFw9G0HXoLnvHw1IOTUgeU,18991
40
40
  dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  dashscope/client/base_api.py,sha256=rXN97XGyDhCCaD_dz_clpFDjOJfpGjqiH7yX3LaD-GE,41233
42
42
  dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -82,9 +82,9 @@ dashscope/tokenizers/tokenizer.py,sha256=y6P91qTCYo__pEx_0VHAcj9YECfbUdRqZU1fdGT
82
82
  dashscope/tokenizers/tokenizer_base.py,sha256=REDhzRyDT13iequ61-a6_KcTy0GFKlihQve5HkyoyRs,656
83
83
  dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
84
  dashscope/utils/oss_utils.py,sha256=fi8-PPsN-iR-iv5k2NS5Z8nlWkpgUhr56FRWm4BDh4A,6984
85
- dashscope-1.20.1.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
86
- dashscope-1.20.1.dist-info/METADATA,sha256=pcY6Qv8KtqGlNAiqRr0JEfAzXBH8N56KlUD3o3expoA,6641
87
- dashscope-1.20.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
88
- dashscope-1.20.1.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
89
- dashscope-1.20.1.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
90
- dashscope-1.20.1.dist-info/RECORD,,
85
+ dashscope-1.20.2.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
86
+ dashscope-1.20.2.dist-info/METADATA,sha256=I52DKGYRGq77FO4pFbQwUHN-2g4qLLNz-jMO0DMQ008,6641
87
+ dashscope-1.20.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
88
+ dashscope-1.20.2.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
89
+ dashscope-1.20.2.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
90
+ dashscope-1.20.2.dist-info/RECORD,,