dashscope 1.20.1__py3-none-any.whl → 1.20.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dashscope might be problematic. Click here for more details.

@@ -127,7 +127,7 @@ class ImageSynthesis(BaseAsyncApi):
127
127
  if sketch_image_url is not None and sketch_image_url:
128
128
  input['sketch_image_url'] = sketch_image_url
129
129
  if ref_img is not None and ref_img:
130
- input['ref_img'] == ref_img
130
+ input['ref_img'] = ref_img
131
131
  if extra_input is not None and extra_input:
132
132
  input = {**input, **extra_input}
133
133
 
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import os
3
+ import time
3
4
  import threading
4
5
  from http import HTTPStatus
5
6
  from threading import Timer
@@ -157,6 +158,8 @@ class Recognition(BaseApi):
157
158
  self._silence_timer = None
158
159
  self._kwargs = kwargs
159
160
  self._workspace = workspace
161
+ self._start_stream_timestamp = -1
162
+ self._first_package_timestamp = -1
160
163
 
161
164
  def __del__(self):
162
165
  if self._running:
@@ -183,12 +186,18 @@ class Recognition(BaseApi):
183
186
  else:
184
187
  usage: Dict[str, Any] = None
185
188
  useags: List[Any] = None
186
- if 'sentence' in part.output and part.usage is not None:
187
- usage = {
188
- 'end_time': part.output['sentence']['end_time'],
189
- 'usage': part.usage
190
- }
191
- useags = [usage]
189
+ if 'sentence' in part.output:
190
+ if (self._first_package_timestamp < 0):
191
+ self._first_package_timestamp = time.time()*1000
192
+ logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
193
+ sentence = part.output['sentence']
194
+ logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
195
+ if part.usage is not None:
196
+ usage = {
197
+ 'end_time': part.output['sentence']['end_time'],
198
+ 'usage': part.usage
199
+ }
200
+ useags = [usage]
192
201
 
193
202
  self._callback.on_event(
194
203
  RecognitionResult(
@@ -260,6 +269,8 @@ class Recognition(BaseApi):
260
269
  if self._running:
261
270
  raise InvalidParameter('Speech recognition has started.')
262
271
 
272
+ self._start_stream_timestamp = -1
273
+ self._first_package_timestamp = -1
263
274
  self._phrase = phrase_id
264
275
  self._kwargs.update(**kwargs)
265
276
  self._recognition_once = False
@@ -307,6 +318,7 @@ class Recognition(BaseApi):
307
318
  Returns:
308
319
  RecognitionResult: The result of speech recognition.
309
320
  """
321
+ self._start_stream_timestamp = time.time()*1000
310
322
  if self._running:
311
323
  raise InvalidParameter('Speech recognition has been called.')
312
324
 
@@ -350,7 +362,11 @@ class Recognition(BaseApi):
350
362
  for part in responses:
351
363
  if part.status_code == HTTPStatus.OK:
352
364
  if 'sentence' in part.output:
365
+ if (self._first_package_timestamp < 0):
366
+ self._first_package_timestamp = time.time()*1000
367
+ logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
353
368
  sentence = part.output['sentence']
369
+ logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
354
370
  if RecognitionResult.is_sentence_end(sentence):
355
371
  sentences.append(sentence)
356
372
 
@@ -408,6 +424,9 @@ class Recognition(BaseApi):
408
424
  if self._running is False:
409
425
  raise InvalidParameter('Speech recognition has stopped.')
410
426
 
427
+ if (self._start_stream_timestamp < 0):
428
+ self._start_stream_timestamp = time.time() * 1000
429
+ logger.debug('send_audio_frame: {}'.format(len(buffer)))
411
430
  self._stream_data = self._stream_data + [buffer]
412
431
 
413
432
  def _tidy_kwargs(self):
@@ -5,9 +5,8 @@ import time
5
5
  import uuid
6
6
  from enum import Enum, unique
7
7
 
8
- import websocket
9
-
10
8
  import dashscope
9
+ import websocket
11
10
  from dashscope.common.error import InputRequired, InvalidTask, ModelRequired
12
11
  from dashscope.common.logging import logger
13
12
  from dashscope.protocol.websocket import (ACTION_KEY, EVENT_KEY, HEADER,
@@ -239,6 +238,14 @@ class SpeechSynthesizer:
239
238
  self.headers = headers
240
239
  self.workspace = workspace
241
240
  self.additional_params = additional_params
241
+ self.model = model
242
+ self.voice = voice
243
+ self.aformat = format.format
244
+ if (self.aformat == 'DEFAULT'):
245
+ self.aformat = 'mp3'
246
+ self.sample_rate = format.sample_rate
247
+ if (self.sample_rate == 0):
248
+ self.sample_rate = 22050
242
249
 
243
250
  self.request = Request(
244
251
  apikey=self.apikey,
@@ -265,12 +272,18 @@ class SpeechSynthesizer:
265
272
  # since dashscope sdk will send first text in run-task
266
273
  if not self.callback:
267
274
  self.async_call = False
275
+ self._start_stream_timestamp = -1
276
+ self._first_package_timestamp = -1
277
+ self._recv_audio_length = 0
268
278
 
269
279
  def __send_str(self, data: str):
270
280
  logger.debug('>>>send {}'.format(data))
271
281
  self.ws.send(data)
272
282
 
273
283
  def __start_stream(self, ):
284
+ self._start_stream_timestamp = time.time() * 1000
285
+ self._first_package_timestamp = -1
286
+ self._recv_audio_length = 0
274
287
  if self.callback is None:
275
288
  raise InputRequired('callback is required!')
276
289
  # reset inner params
@@ -332,7 +345,7 @@ class SpeechSynthesizer:
332
345
  self.__submit_text(text)
333
346
  return None
334
347
 
335
- def streaming_complete(self, complete_timeout_millie=10000):
348
+ def streaming_complete(self, complete_timeout_millis=600000):
336
349
  """
337
350
  Synchronously stop the streaming input speech synthesis task.
338
351
  Wait for all remaining synthesized audio before returning
@@ -340,7 +353,9 @@ class SpeechSynthesizer:
340
353
  Parameters:
341
354
  -----------
342
355
  complete_timeout_millis: int
343
- Throws TimeoutError exception if it times out.
356
+ Throws TimeoutError exception if it times out. If the timeout is not None
357
+ and greater than zero, it will wait for the corresponding number of
358
+ milliseconds; otherwise, it will wait indefinitely.
344
359
  """
345
360
  if not self._is_started:
346
361
  raise InvalidTask('speech synthesizer has not been started.')
@@ -348,27 +363,42 @@ class SpeechSynthesizer:
348
363
  raise InvalidTask('speech synthesizer task has stopped.')
349
364
  request = self.request.getFinishRequest()
350
365
  self.__send_str(request)
351
- if not self.complete_event.wait(timeout=complete_timeout_millie):
352
- raise TimeoutError(
353
- 'speech synthesizer wait for complete timeout {}ms'.format(
354
- complete_timeout_millie))
366
+ if complete_timeout_millis is not None and complete_timeout_millis > 0:
367
+ if not self.complete_event.wait(timeout=complete_timeout_millis /
368
+ 1000):
369
+ raise TimeoutError(
370
+ 'speech synthesizer wait for complete timeout {}ms'.format(
371
+ complete_timeout_millis))
372
+ else:
373
+ self.complete_event.wait()
355
374
  self.close()
356
375
  self._stopped.set()
357
376
  self._is_started = False
358
377
 
359
- def __waiting_for_complete(self):
360
- if not self.complete_event.wait(timeout=10000):
361
- raise TimeoutError(
362
- 'speech synthesizer wait for complete timeout 10000ms')
378
+ def __waiting_for_complete(self, timeout):
379
+ if timeout is not None and timeout > 0:
380
+ if not self.complete_event.wait(timeout=timeout / 1000):
381
+ raise TimeoutError(
382
+ f'speech synthesizer wait for complete timeout {timeout}ms'
383
+ )
384
+ else:
385
+ self.complete_event.wait()
363
386
  self.close()
364
387
  self._stopped.set()
365
388
  self._is_started = False
366
389
 
367
- def async_streaming_complete(self):
390
+ def async_streaming_complete(self, complete_timeout_millis=600000):
368
391
  """
369
392
  Asynchronously stop the streaming input speech synthesis task, returns immediately.
370
393
  You need to listen and handle the STREAM_INPUT_TTS_EVENT_SYNTHESIS_COMPLETE event in the on_event callback.
371
394
  Do not destroy the object and callback before this event.
395
+
396
+ Parameters:
397
+ -----------
398
+ complete_timeout_millis: int
399
+ Throws TimeoutError exception if it times out. If the timeout is not None
400
+ and greater than zero, it will wait for the corresponding number of
401
+ milliseconds; otherwise, it will wait indefinitely.
372
402
  """
373
403
 
374
404
  if not self._is_started:
@@ -377,7 +407,8 @@ class SpeechSynthesizer:
377
407
  raise InvalidTask('speech synthesizer task has stopped.')
378
408
  request = self.request.getFinishRequest()
379
409
  self.__send_str(request)
380
- thread = threading.Thread(target=self.__waiting_for_complete)
410
+ thread = threading.Thread(target=self.__waiting_for_complete,
411
+ args=(complete_timeout_millis, ))
381
412
  thread.start()
382
413
 
383
414
  def streaming_cancel(self):
@@ -418,6 +449,7 @@ class SpeechSynthesizer:
418
449
  self.callback.on_close()
419
450
  else:
420
451
  logger.error(f'TaskFailed: {message}')
452
+ raise Exception(f'TaskFailed: {message}')
421
453
  elif EventType.GENERATED == event:
422
454
  if self.callback:
423
455
  self.callback.on_event(message)
@@ -425,9 +457,22 @@ class SpeechSynthesizer:
425
457
  pass
426
458
  except json.JSONDecodeError:
427
459
  logger.error('Failed to parse message as JSON.')
460
+ raise Exception('Failed to parse message as JSON.')
428
461
  elif isinstance(message, (bytes, bytearray)):
429
462
  # 如果失败,认为是二进制消息
430
463
  logger.debug('<<<recv binary {}'.format(len(message)))
464
+ if (self._recv_audio_length == 0):
465
+ self._first_package_timestamp = time.time() * 1000
466
+ logger.debug('first package delay {}'.format(
467
+ self._first_package_timestamp -
468
+ self._start_stream_timestamp))
469
+ self._recv_audio_length += len(message) / (2 * self.sample_rate /
470
+ 1000)
471
+ current = time.time() * 1000
472
+ current_rtf = (current - self._first_package_timestamp
473
+ ) / self._recv_audio_length
474
+ logger.debug('total audio {} ms, current_rtf: {}'.format(
475
+ self._recv_audio_length, current_rtf))
431
476
  # 只有在非异步调用的时候保存音频
432
477
  if not self.async_call:
433
478
  if self._audio_data is None:
@@ -437,7 +482,7 @@ class SpeechSynthesizer:
437
482
  if self.callback:
438
483
  self.callback.on_data(message)
439
484
 
440
- def call(self, text: str):
485
+ def call(self, text: str, timeout_millis=None):
441
486
  """
442
487
  Speech synthesis.
443
488
  If callback is set, the audio will be returned in real-time through the on_event interface.
@@ -447,9 +492,14 @@ class SpeechSynthesizer:
447
492
  -----------
448
493
  text: str
449
494
  utf-8 encoded text
495
+ timeoutMillis:
496
+ Integer or None
450
497
  return: bytes
451
498
  If a callback is not set during initialization, the complete audio is returned
452
499
  as the function's return value. Otherwise, the return value is null.
500
+ If the timeout is set to a value greater than zero and not None,
501
+ it will wait for the corresponding number of milliseconds;
502
+ otherwise, it will wait indefinitely.
453
503
  """
454
504
  # print('还不支持非流式语音合成sdk调用大模型,使用流式模拟')
455
505
  if not self.callback:
@@ -457,19 +507,19 @@ class SpeechSynthesizer:
457
507
  self.__start_stream()
458
508
  self.__submit_text(text)
459
509
  if self.async_call:
460
- self.async_streaming_complete()
510
+ self.async_streaming_complete(timeout_millis)
461
511
  return None
462
512
  else:
463
- self.streaming_complete()
513
+ self.streaming_complete(timeout_millis)
464
514
  return self._audio_data
465
515
 
466
516
  # WebSocket关闭的回调函数
467
517
  def on_close(self, ws, close_status_code, close_msg):
468
518
  pass
469
- # print("### websocket closed msg [{}]{} ###".format(close_status_code, close_msg))
470
519
 
471
520
  # WebSocket发生错误的回调函数
472
521
  def on_error(self, ws, error):
522
+ print(f'websocket closed due to {error}')
473
523
  raise Exception(f'websocket closed due to {error}')
474
524
 
475
525
  # 关闭WebSocket连接
@@ -478,4 +528,4 @@ class SpeechSynthesizer:
478
528
 
479
529
  # 获取上一个任务的taskId
480
530
  def get_last_request_id(self):
481
- return self.last_request_id
531
+ return self.last_request_id
dashscope/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '1.20.1'
1
+ __version__ = '1.20.3'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dashscope
3
- Version: 1.20.1
3
+ Version: 1.20.3
4
4
  Summary: dashscope client sdk library
5
5
  Home-page: https://dashscope.aliyun.com/
6
6
  Author: Alibaba Cloud
@@ -6,12 +6,12 @@ dashscope/files.py,sha256=QgJjwhtn9F548nCA8jD8OvE6aQEj-20hZqJgYXsUdQU,3930
6
6
  dashscope/finetune.py,sha256=_tflDUvu0KagSoCzLaf0hofpG_P8NU6PylL8CPjVhrA,6243
7
7
  dashscope/model.py,sha256=UPOn1qMYFhX-ovXi3BMxZEBk8qOK7WLJOYHMbPZwYBo,1440
8
8
  dashscope/models.py,sha256=1-bc-Ue68zurgu_y6RhfFr9uzeQMF5AZq-C32lJGMGU,1224
9
- dashscope/version.py,sha256=50ZydhgZIM_dwezjbX2voC6nDV_wx5kxvUuz3Nl2Eig,23
9
+ dashscope/version.py,sha256=axe7tbDmiC_ViryS2MD_XVQpBGZsB2RQ0ksPIJnHWeo,23
10
10
  dashscope/aigc/__init__.py,sha256=s-MCA87KYiVumYtKtJi5IMN7xelSF6TqEU3s3_7RF-Y,327
11
11
  dashscope/aigc/code_generation.py,sha256=KAJVrGp6tiNFBBg64Ovs9RfcP5SrIhrbW3wdA89NKso,10885
12
12
  dashscope/aigc/conversation.py,sha256=xRoJlCR-IXHjSdkDrK74a9ut1FJg0FZhTNXZAJC18MA,14231
13
13
  dashscope/aigc/generation.py,sha256=53oMCmN5ZbqeqAsKxmdunXlRh-XP8ZtnA7hB2id4Koo,17897
14
- dashscope/aigc/image_synthesis.py,sha256=Hg2r6H7Vj4MsXwm62lHf2lTpUb6nA3xWGEYX2o-2TLQ,10419
14
+ dashscope/aigc/image_synthesis.py,sha256=_TGkh4L_yBNQNmoJUKlfXfljKfxX-SSyxHHQBDm1AC8,10418
15
15
  dashscope/aigc/multimodal_conversation.py,sha256=SlNnnsUPV19gdx8fYJAtsMFWPNGY6vhk5IGHZ5ZczpI,5369
16
16
  dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  dashscope/api_entities/aiohttp_request.py,sha256=aE3AeWba8Ig_xHMYjrAdkq0N61l_L2VFTG6HYh912X0,10229
@@ -31,12 +31,12 @@ dashscope/assistants/files.py,sha256=pwLVJ_pjpRFWyfI_MRvhH7Si7FzGDj4ChzZgWTpLOhg
31
31
  dashscope/audio/__init__.py,sha256=-ZRxrK-gV4QsUtlThIT-XwqB6vmyEsnhxIxdLmhCUuc,61
32
32
  dashscope/audio/asr/__init__.py,sha256=-s180qWn_JPSpCo1q0aDJJ5HQ3zTzD4z5yUwsRqH4aU,275
33
33
  dashscope/audio/asr/asr_phrase_manager.py,sha256=EjtbI3zz9UQGS1qv6Yb4zzEMj4OJJVXmwkqZyIrzvEA,7642
34
- dashscope/audio/asr/recognition.py,sha256=F2iz6hyXg16Z6DGlPwGpKfRNcAZIIsqXnNPtaZp4Fzo,17369
34
+ dashscope/audio/asr/recognition.py,sha256=cEooE3wGf8kKfJIVbaXEytl5X6F0hMsLe8g4Bj9Fn4w,18768
35
35
  dashscope/audio/asr/transcription.py,sha256=1WAg9WH89antVzRYEKXb5LQP9xylZmX4YKp7v5oMYjY,8931
36
36
  dashscope/audio/tts/__init__.py,sha256=fbnieZX9yNFNh5BsxLpLXb63jlxzxrdCJakV3ignjlQ,194
37
37
  dashscope/audio/tts/speech_synthesizer.py,sha256=dnKx9FDDdO_ETHAjhK8zaMVaH6SfoTtN5YxXXqgY1JA,7571
38
38
  dashscope/audio/tts_v2/__init__.py,sha256=ve5a81qTbWDcRaSuritZtJBzryOIol2_dxzfqqdCw-k,345
39
- dashscope/audio/tts_v2/speech_synthesizer.py,sha256=sv5f4vi17rFcYDif5bSS4UuX_2eVUL2q5rTnXK1EoAg,16650
39
+ dashscope/audio/tts_v2/speech_synthesizer.py,sha256=bpzj9gx2D_FfOzgsjU-GBGmeWvEdewNPFd447mOgM-o,19220
40
40
  dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  dashscope/client/base_api.py,sha256=rXN97XGyDhCCaD_dz_clpFDjOJfpGjqiH7yX3LaD-GE,41233
42
42
  dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -82,9 +82,9 @@ dashscope/tokenizers/tokenizer.py,sha256=y6P91qTCYo__pEx_0VHAcj9YECfbUdRqZU1fdGT
82
82
  dashscope/tokenizers/tokenizer_base.py,sha256=REDhzRyDT13iequ61-a6_KcTy0GFKlihQve5HkyoyRs,656
83
83
  dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
84
  dashscope/utils/oss_utils.py,sha256=fi8-PPsN-iR-iv5k2NS5Z8nlWkpgUhr56FRWm4BDh4A,6984
85
- dashscope-1.20.1.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
86
- dashscope-1.20.1.dist-info/METADATA,sha256=pcY6Qv8KtqGlNAiqRr0JEfAzXBH8N56KlUD3o3expoA,6641
87
- dashscope-1.20.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
88
- dashscope-1.20.1.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
89
- dashscope-1.20.1.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
90
- dashscope-1.20.1.dist-info/RECORD,,
85
+ dashscope-1.20.3.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
86
+ dashscope-1.20.3.dist-info/METADATA,sha256=JUS8qY6hiQtwyE2f65w3JBnaqUsyhAV3kfwE-4KkSoM,6641
87
+ dashscope-1.20.3.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
88
+ dashscope-1.20.3.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
89
+ dashscope-1.20.3.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
90
+ dashscope-1.20.3.dist-info/RECORD,,