PyPI - dashscope - Versions diffs - 1.20.1__py3-none-any.whl → 1.20.2__py3-none-any.whl - Mend

dashscope 1.20.1py3-none-any.whl → 1.20.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dashscope might be problematic. Click here for more details.

Files changed (10) hide show

dashscope/aigc/image_synthesis.py CHANGED Viewed

@@ -127,7 +127,7 @@ class ImageSynthesis(BaseAsyncApi):
         if sketch_image_url is not None and sketch_image_url:
             input['sketch_image_url'] = sketch_image_url
         if ref_img is not None and ref_img:
-            input['ref_img'] == ref_img
+            input['ref_img'] = ref_img
         if extra_input is not None and extra_input:
             input = {**input, **extra_input}

dashscope/audio/asr/recognition.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
+import time
 import threading
 from http import HTTPStatus
 from threading import Timer
@@ -157,6 +158,8 @@ class Recognition(BaseApi):
         self._silence_timer = None
         self._kwargs = kwargs
         self._workspace = workspace
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
     def __del__(self):
         if self._running:
@@ -183,12 +186,18 @@ class Recognition(BaseApi):
                 else:
                     usage: Dict[str, Any] = None
                     useags: List[Any] = None
-                    if 'sentence' in part.output and part.usage is not None:
-                        usage = {
-                            'end_time': part.output['sentence']['end_time'],
-                            'usage': part.usage
-                        }
-                        useags = [usage]
+                    if 'sentence' in part.output:
+                        if (self._first_package_timestamp < 0):
+                            self._first_package_timestamp = time.time()*1000
+                            logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
+                        sentence = part.output['sentence']
+                        logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
+                        if part.usage is not None:
+                            usage = {
+                                'end_time': part.output['sentence']['end_time'],
+                                'usage': part.usage
+                            }
+                            useags = [usage]
                     self._callback.on_event(
                         RecognitionResult(
@@ -260,6 +269,8 @@ class Recognition(BaseApi):
         if self._running:
             raise InvalidParameter('Speech recognition has started.')
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
         self._phrase = phrase_id
         self._kwargs.update(**kwargs)
         self._recognition_once = False
@@ -307,6 +318,7 @@ class Recognition(BaseApi):
         Returns:
             RecognitionResult: The result of speech recognition.
         """
+        self._start_stream_timestamp = time.time()*1000
         if self._running:
             raise InvalidParameter('Speech recognition has been called.')
@@ -350,7 +362,11 @@ class Recognition(BaseApi):
             for part in responses:
                 if part.status_code == HTTPStatus.OK:
                     if 'sentence' in part.output:
+                        if (self._first_package_timestamp < 0):
+                            self._first_package_timestamp = time.time()*1000
+                            logger.debug('first package delay {}'.format(self._first_package_timestamp - self._start_stream_timestamp))
                         sentence = part.output['sentence']
+                        logger.debug('Recv Result :{}, isEnd: {}'.format(sentence, RecognitionResult.is_sentence_end(sentence)))
                         if RecognitionResult.is_sentence_end(sentence):
                             sentences.append(sentence)
@@ -408,6 +424,9 @@ class Recognition(BaseApi):
         if self._running is False:
             raise InvalidParameter('Speech recognition has stopped.')
+        if (self._start_stream_timestamp < 0):
+            self._start_stream_timestamp = time.time() * 1000
+        logger.debug('send_audio_frame: {}'.format(len(buffer)))
         self._stream_data = self._stream_data + [buffer]
     def _tidy_kwargs(self):

dashscope/audio/tts_v2/speech_synthesizer.py CHANGED Viewed

@@ -5,9 +5,8 @@ import time
 import uuid
 from enum import Enum, unique
-import websocket
 import dashscope
+import websocket
 from dashscope.common.error import InputRequired, InvalidTask, ModelRequired
 from dashscope.common.logging import logger
 from dashscope.protocol.websocket import (ACTION_KEY, EVENT_KEY, HEADER,
@@ -239,6 +238,14 @@ class SpeechSynthesizer:
         self.headers = headers
         self.workspace = workspace
         self.additional_params = additional_params
+        self.model = model
+        self.voice = voice
+        self.aformat = format.format
+        if (self.aformat == 'DEFAULT'):
+            self.aformat = 'mp3'
+        self.sample_rate = format.sample_rate
+        if (self.sample_rate == 0):
+            self.sample_rate = 22050
         self.request = Request(
             apikey=self.apikey,
@@ -265,12 +272,18 @@ class SpeechSynthesizer:
         # since dashscope sdk will send first text in run-task
         if not self.callback:
             self.async_call = False
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
+        self._recv_audio_length = 0
     def __send_str(self, data: str):
         logger.debug('>>>send {}'.format(data))
         self.ws.send(data)
     def __start_stream(self, ):
+        self._start_stream_timestamp = time.time() * 1000
+        self._first_package_timestamp = -1
+        self._recv_audio_length = 0
         if self.callback is None:
             raise InputRequired('callback is required!')
         # reset inner params
@@ -332,7 +345,7 @@ class SpeechSynthesizer:
         self.__submit_text(text)
         return None
-    def streaming_complete(self, complete_timeout_millie=10000):
+    def streaming_complete(self, complete_timeout_millis=10000):
         """
         Synchronously stop the streaming input speech synthesis task.
         Wait for all remaining synthesized audio before returning
@@ -340,7 +353,9 @@ class SpeechSynthesizer:
         Parameters:
         -----------
         complete_timeout_millis: int
-            Throws TimeoutError exception if it times out.
+            Throws TimeoutError exception if it times out. If the timeout is not None
+            and greater than zero, it will wait for the corresponding number of
+            milliseconds; otherwise, it will wait indefinitely.
         """
         if not self._is_started:
             raise InvalidTask('speech synthesizer has not been started.')
@@ -348,27 +363,38 @@ class SpeechSynthesizer:
             raise InvalidTask('speech synthesizer task has stopped.')
         request = self.request.getFinishRequest()
         self.__send_str(request)
-        if not self.complete_event.wait(timeout=complete_timeout_millie):
-            raise TimeoutError(
-                'speech synthesizer wait for complete timeout {}ms'.format(
-                    complete_timeout_millie))
+        if complete_timeout_millis is not None and complete_timeout_millis > 0:
+            if not self.complete_event.wait(timeout=complete_timeout_millis /
+                                            1000):
+                raise TimeoutError(
+                    'speech synthesizer wait for complete timeout {}ms'.format(
+                        complete_timeout_millis))
+        else:
+            self.complete_event.wait()
         self.close()
         self._stopped.set()
         self._is_started = False
-    def __waiting_for_complete(self):
-        if not self.complete_event.wait(timeout=10000):
+    def __waiting_for_complete(self, timeout):
+        if not self.complete_event.wait(timeout=timeout / 1000):
             raise TimeoutError(
                 'speech synthesizer wait for complete timeout 10000ms')
         self.close()
         self._stopped.set()
         self._is_started = False
-    def async_streaming_complete(self):
+    def async_streaming_complete(self, complete_timeout_millis=10000):
         """
         Asynchronously stop the streaming input speech synthesis task, returns immediately.
         You need to listen and handle the STREAM_INPUT_TTS_EVENT_SYNTHESIS_COMPLETE event in the on_event callback.
         Do not destroy the object and callback before this event.
+        Parameters:
+        -----------
+        complete_timeout_millis: int
+            Throws TimeoutError exception if it times out. If the timeout is not None
+            and greater than zero, it will wait for the corresponding number of
+            milliseconds; otherwise, it will wait indefinitely.
         """
         if not self._is_started:
@@ -377,7 +403,8 @@ class SpeechSynthesizer:
             raise InvalidTask('speech synthesizer task has stopped.')
         request = self.request.getFinishRequest()
         self.__send_str(request)
-        thread = threading.Thread(target=self.__waiting_for_complete)
+        thread = threading.Thread(target=self.__waiting_for_complete,
+                                  args=(complete_timeout_millis, ))
         thread.start()
     def streaming_cancel(self):
@@ -428,6 +455,18 @@ class SpeechSynthesizer:
         elif isinstance(message, (bytes, bytearray)):
             # 如果失败，认为是二进制消息
             logger.debug('<<<recv binary {}'.format(len(message)))
+            if (self._recv_audio_length == 0):
+                self._first_package_timestamp = time.time() * 1000
+                logger.debug('first package delay {}'.format(
+                    self._first_package_timestamp -
+                    self._start_stream_timestamp))
+            self._recv_audio_length += len(message) / (2 * self.sample_rate /
+                                                       1000)
+            current = time.time() * 1000
+            current_rtf = (current - self._first_package_timestamp
+                           ) / self._recv_audio_length
+            logger.debug('total audio {} ms, current_rtf: {}'.format(
+                self._recv_audio_length, current_rtf))
             # 只有在非异步调用的时候保存音频
             if not self.async_call:
                 if self._audio_data is None:
@@ -437,7 +476,7 @@ class SpeechSynthesizer:
             if self.callback:
                 self.callback.on_data(message)
-    def call(self, text: str):
+    def call(self, text: str, timeout_millis=None):
         """
         Speech synthesis.
         If callback is set, the audio will be returned in real-time through the on_event interface.
@@ -447,9 +486,14 @@ class SpeechSynthesizer:
         -----------
         text: str
             utf-8 encoded text
+        timeoutMillis:
+            Integer or None
         return: bytes
             If a callback is not set during initialization, the complete audio is returned
             as the function's return value. Otherwise, the return value is null.
+            If the timeout is set to a value greater than zero and not None,
+            it will wait for the corresponding number of milliseconds;
+            otherwise, it will wait indefinitely.
         """
         # print('还不支持非流式语音合成sdk调用大模型，使用流式模拟')
         if not self.callback:
@@ -457,10 +501,10 @@ class SpeechSynthesizer:
         self.__start_stream()
         self.__submit_text(text)
         if self.async_call:
-            self.async_streaming_complete()
+            self.async_streaming_complete(timeout_millis)
             return None
         else:
-            self.streaming_complete()
+            self.streaming_complete(timeout_millis)
             return self._audio_data
     # WebSocket关闭的回调函数
@@ -478,4 +522,4 @@ class SpeechSynthesizer:
     # 获取上一个任务的taskId
     def get_last_request_id(self):
-        return self.last_request_id
+        return self.last_request_id

dashscope/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '1.20.1'
1	+ __version__ = '1.20.2'

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dashscope
-Version: 1.20.1
+Version: 1.20.2
 Summary: dashscope client sdk library
 Home-page: https://dashscope.aliyun.com/
 Author: Alibaba Cloud

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/RECORD RENAMED Viewed

@@ -6,12 +6,12 @@ dashscope/files.py,sha256=QgJjwhtn9F548nCA8jD8OvE6aQEj-20hZqJgYXsUdQU,3930
 dashscope/finetune.py,sha256=_tflDUvu0KagSoCzLaf0hofpG_P8NU6PylL8CPjVhrA,6243
 dashscope/model.py,sha256=UPOn1qMYFhX-ovXi3BMxZEBk8qOK7WLJOYHMbPZwYBo,1440
 dashscope/models.py,sha256=1-bc-Ue68zurgu_y6RhfFr9uzeQMF5AZq-C32lJGMGU,1224
-dashscope/version.py,sha256=50ZydhgZIM_dwezjbX2voC6nDV_wx5kxvUuz3Nl2Eig,23
+dashscope/version.py,sha256=6KIFuOk_uHObRUtO_hf7gfIcH-9norzWjEkjzb1Dclg,23
 dashscope/aigc/__init__.py,sha256=s-MCA87KYiVumYtKtJi5IMN7xelSF6TqEU3s3_7RF-Y,327
 dashscope/aigc/code_generation.py,sha256=KAJVrGp6tiNFBBg64Ovs9RfcP5SrIhrbW3wdA89NKso,10885
 dashscope/aigc/conversation.py,sha256=xRoJlCR-IXHjSdkDrK74a9ut1FJg0FZhTNXZAJC18MA,14231
 dashscope/aigc/generation.py,sha256=53oMCmN5ZbqeqAsKxmdunXlRh-XP8ZtnA7hB2id4Koo,17897
-dashscope/aigc/image_synthesis.py,sha256=Hg2r6H7Vj4MsXwm62lHf2lTpUb6nA3xWGEYX2o-2TLQ,10419
+dashscope/aigc/image_synthesis.py,sha256=_TGkh4L_yBNQNmoJUKlfXfljKfxX-SSyxHHQBDm1AC8,10418
 dashscope/aigc/multimodal_conversation.py,sha256=SlNnnsUPV19gdx8fYJAtsMFWPNGY6vhk5IGHZ5ZczpI,5369
 dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/api_entities/aiohttp_request.py,sha256=aE3AeWba8Ig_xHMYjrAdkq0N61l_L2VFTG6HYh912X0,10229
@@ -31,12 +31,12 @@ dashscope/assistants/files.py,sha256=pwLVJ_pjpRFWyfI_MRvhH7Si7FzGDj4ChzZgWTpLOhg
 dashscope/audio/__init__.py,sha256=-ZRxrK-gV4QsUtlThIT-XwqB6vmyEsnhxIxdLmhCUuc,61
 dashscope/audio/asr/__init__.py,sha256=-s180qWn_JPSpCo1q0aDJJ5HQ3zTzD4z5yUwsRqH4aU,275
 dashscope/audio/asr/asr_phrase_manager.py,sha256=EjtbI3zz9UQGS1qv6Yb4zzEMj4OJJVXmwkqZyIrzvEA,7642
-dashscope/audio/asr/recognition.py,sha256=F2iz6hyXg16Z6DGlPwGpKfRNcAZIIsqXnNPtaZp4Fzo,17369
+dashscope/audio/asr/recognition.py,sha256=cEooE3wGf8kKfJIVbaXEytl5X6F0hMsLe8g4Bj9Fn4w,18768
 dashscope/audio/asr/transcription.py,sha256=1WAg9WH89antVzRYEKXb5LQP9xylZmX4YKp7v5oMYjY,8931
 dashscope/audio/tts/__init__.py,sha256=fbnieZX9yNFNh5BsxLpLXb63jlxzxrdCJakV3ignjlQ,194
 dashscope/audio/tts/speech_synthesizer.py,sha256=dnKx9FDDdO_ETHAjhK8zaMVaH6SfoTtN5YxXXqgY1JA,7571
 dashscope/audio/tts_v2/__init__.py,sha256=ve5a81qTbWDcRaSuritZtJBzryOIol2_dxzfqqdCw-k,345
-dashscope/audio/tts_v2/speech_synthesizer.py,sha256=sv5f4vi17rFcYDif5bSS4UuX_2eVUL2q5rTnXK1EoAg,16650
+dashscope/audio/tts_v2/speech_synthesizer.py,sha256=RenOvxp6BOs_-E_K5akMEBFw9G0HXoLnvHw1IOTUgeU,18991
 dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/client/base_api.py,sha256=rXN97XGyDhCCaD_dz_clpFDjOJfpGjqiH7yX3LaD-GE,41233
 dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -82,9 +82,9 @@ dashscope/tokenizers/tokenizer.py,sha256=y6P91qTCYo__pEx_0VHAcj9YECfbUdRqZU1fdGT
 dashscope/tokenizers/tokenizer_base.py,sha256=REDhzRyDT13iequ61-a6_KcTy0GFKlihQve5HkyoyRs,656
 dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/utils/oss_utils.py,sha256=fi8-PPsN-iR-iv5k2NS5Z8nlWkpgUhr56FRWm4BDh4A,6984
-dashscope-1.20.1.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
-dashscope-1.20.1.dist-info/METADATA,sha256=pcY6Qv8KtqGlNAiqRr0JEfAzXBH8N56KlUD3o3expoA,6641
-dashscope-1.20.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-dashscope-1.20.1.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
-dashscope-1.20.1.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
-dashscope-1.20.1.dist-info/RECORD,,
+dashscope-1.20.2.dist-info/LICENSE,sha256=Izp5L1DF1Mbza6qojkqNNWlE_mYLnr4rmzx2EBF8YFw,11413
+dashscope-1.20.2.dist-info/METADATA,sha256=I52DKGYRGq77FO4pFbQwUHN-2g4qLLNz-jMO0DMQ008,6641
+dashscope-1.20.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+dashscope-1.20.2.dist-info/entry_points.txt,sha256=raEp5dOuj8whJ7yqZlDM8WQ5p2RfnGrGNo0QLQEnatY,50
+dashscope-1.20.2.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
+dashscope-1.20.2.dist-info/RECORD,,

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dashscope-1.20.1.dist-info → dashscope-1.20.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

dashscope 1.20.1__py3-none-any.whl → 1.20.2__py3-none-any.whl

Potentially problematic release.

dashscope 1.20.1py3-none-any.whl → 1.20.2py3-none-any.whl