PyPI - dashscope - Versions diffs - 1.24.3__py3-none-any.whl → 1.24.5__py3-none-any.whl - Mend

dashscope 1.24.3py3-none-any.whl → 1.24.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dashscope might be problematic. Click here for more details.

Files changed (16) hide show

dashscope/__init__.py CHANGED Viewed

@@ -24,7 +24,7 @@ from dashscope.embeddings.batch_text_embedding_response import \
     BatchTextEmbeddingResponse
 from dashscope.embeddings.multimodal_embedding import (
     MultiModalEmbedding, MultiModalEmbeddingItemAudio,
-    MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText)
+    MultiModalEmbeddingItemImage, MultiModalEmbeddingItemText, AioMultiModalEmbedding)
 from dashscope.embeddings.text_embedding import TextEmbedding
 from dashscope.files import Files
 from dashscope.models import Models
@@ -55,6 +55,7 @@ __all__ = [
     Models,
     TextEmbedding,
     MultiModalEmbedding,
+    AioMultiModalEmbedding,
     MultiModalEmbeddingItemAudio,
     MultiModalEmbeddingItemImage,
     MultiModalEmbeddingItemText,

dashscope/aigc/__init__.py CHANGED Viewed

@@ -1,18 +1,20 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .conversation import Conversation, History, HistoryItem
-from .generation import Generation
-from .image_synthesis import ImageSynthesis
+from .generation import Generation, AioGeneration
+from .image_synthesis import ImageSynthesis, AioImageSynthesis
 from .multimodal_conversation import MultiModalConversation, AioMultiModalConversation
-from .video_synthesis import VideoSynthesis
+from .video_synthesis import VideoSynthesis, AioVideoSynthesis
 __all__ = [
     Generation,
+    AioGeneration,
     Conversation,
     HistoryItem,
     History,
     ImageSynthesis,
+    AioImageSynthesis,
     MultiModalConversation,
     AioMultiModalConversation,
     VideoSynthesis,
+    AioVideoSynthesis,
 ]

dashscope/aigc/multimodal_conversation.py CHANGED Viewed

@@ -24,9 +24,10 @@ class MultiModalConversation(BaseApi):
     def call(
         cls,
         model: str,
-        messages: List,
+        messages: List = None,
         api_key: str = None,
         workspace: str = None,
+        text: str = None,
         **kwargs
     ) -> Union[MultiModalConversationResponse, Generator[
             MultiModalConversationResponse, None, None]]:
@@ -55,6 +56,7 @@ class MultiModalConversation(BaseApi):
                 if None, will retrieve by rule [1].
                 [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
             workspace (str): The dashscope workspace id.
+            text (str): The text to generate.
             **kwargs:
                 stream(bool, `optional`): Enable server-sent events
                     (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
@@ -68,8 +70,11 @@ class MultiModalConversation(BaseApi):
                     tokens with top_p probability mass. So 0.1 means only
                     the tokens comprising the top 10% probability mass are
                     considered[qwen-turbo,bailian-v1].
+                voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
+                    you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
                 top_k(float, `optional`):
         Raises:
             InvalidInput: The history and auto_history are mutually exclusive.
@@ -78,18 +83,24 @@ class MultiModalConversation(BaseApi):
                   Generator[MultiModalConversationResponse, None, None]]: If
             stream is True, return Generator, otherwise MultiModalConversationResponse.
         """
-        if (messages is None or not messages):
-            raise InputRequired('prompt or messages is required!')
         if model is None or not model:
             raise ModelRequired('Model is required!')
         task_group, _ = _get_task_group_and_task(__name__)
-        msg_copy = copy.deepcopy(messages)
-        has_upload = cls._preprocess_messages(model, msg_copy, api_key)
-        if has_upload:
-            headers = kwargs.pop('headers', {})
-            headers['X-DashScope-OssResourceResolve'] = 'enable'
-            kwargs['headers'] = headers
-        input = {'messages': msg_copy}
+        input = {}
+        msg_copy = None
+        if messages is not None and messages:
+            msg_copy = copy.deepcopy(messages)
+            has_upload = cls._preprocess_messages(model, msg_copy, api_key)
+            if has_upload:
+                headers = kwargs.pop('headers', {})
+                headers['X-DashScope-OssResourceResolve'] = 'enable'
+                kwargs['headers'] = headers
+        if text is not None and text:
+            input.update({'text': text})
+        if msg_copy is not None:
+            input.update({'messages': msg_copy})
         response = super().call(model=model,
                                 task_group=task_group,
                                 task=MultiModalConversation.task,
@@ -145,9 +156,10 @@ class AioMultiModalConversation(BaseAioApi):
     async def call(
         cls,
         model: str,
-        messages: List,
+        messages: List = None,
         api_key: str = None,
         workspace: str = None,
+        text: str = None,
         **kwargs
     ) -> Union[MultiModalConversationResponse, Generator[
             MultiModalConversationResponse, None, None]]:
@@ -176,6 +188,7 @@ class AioMultiModalConversation(BaseAioApi):
                 if None, will retrieve by rule [1].
                 [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
             workspace (str): The dashscope workspace id.
+            text (str): The text to generate.
             **kwargs:
                 stream(bool, `optional`): Enable server-sent events
                     (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
@@ -189,6 +202,8 @@ class AioMultiModalConversation(BaseAioApi):
                     tokens with top_p probability mass. So 0.1 means only
                     the tokens comprising the top 10% probability mass are
                     considered[qwen-turbo,bailian-v1].
+                voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
+                    you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
                 top_k(float, `optional`):
         Raises:
@@ -199,18 +214,24 @@ class AioMultiModalConversation(BaseAioApi):
                   Generator[MultiModalConversationResponse, None, None]]: If
             stream is True, return Generator, otherwise MultiModalConversationResponse.
         """
-        if (messages is None or not messages):
-            raise InputRequired('prompt or messages is required!')
         if model is None or not model:
             raise ModelRequired('Model is required!')
         task_group, _ = _get_task_group_and_task(__name__)
-        msg_copy = copy.deepcopy(messages)
-        has_upload = cls._preprocess_messages(model, msg_copy, api_key)
-        if has_upload:
-            headers = kwargs.pop('headers', {})
-            headers['X-DashScope-OssResourceResolve'] = 'enable'
-            kwargs['headers'] = headers
-        input = {'messages': msg_copy}
+        input = {}
+        msg_copy = None
+        if messages is not None and messages:
+            msg_copy = copy.deepcopy(messages)
+            has_upload = cls._preprocess_messages(model, msg_copy, api_key)
+            if has_upload:
+                headers = kwargs.pop('headers', {})
+                headers['X-DashScope-OssResourceResolve'] = 'enable'
+                kwargs['headers'] = headers
+        if text is not None and text:
+            input.update({'text': text})
+        if msg_copy is not None:
+            input.update({'messages': msg_copy})
         response = await super().call(model=model,
                                       task_group=task_group,
                                       task=AioMultiModalConversation.task,

dashscope/api_entities/dashscope_response.py CHANGED Viewed

@@ -152,6 +152,26 @@ class Choice(DictMixin):
                          **kwargs)
+@dataclass(init=False)
+class Audio(DictMixin):
+    data: str
+    url: str
+    id: str
+    expires_at: int
+    def __init__(self,
+                 data: str = None,
+                 url: str = None,
+                 id: str = None,
+                 expires_at: int = None,
+                 **kwargs):
+        super().__init__(data=data,
+                         url=url,
+                         id=id,
+                         expires_at=expires_at,
+                         **kwargs)
 @dataclass(init=False)
 class GenerationOutput(DictMixin):
     text: str
@@ -217,20 +237,25 @@ class GenerationResponse(DashScopeAPIResponse):
 @dataclass(init=False)
 class MultiModalConversationOutput(DictMixin):
     choices: List[Choice]
+    audio: Audio
     def __init__(self,
                  text: str = None,
                  finish_reason: str = None,
                  choices: List[Choice] = None,
+                 audio: Audio = None,
                  **kwargs):
         chs = None
         if choices is not None:
             chs = []
             for choice in choices:
                 chs.append(Choice(**choice))
+        if audio is not None:
+            audio = Audio(**audio)
         super().__init__(text=text,
                          finish_reason=finish_reason,
                          choices=chs,
+                         audio=audio,
                          **kwargs)
@@ -238,15 +263,18 @@ class MultiModalConversationOutput(DictMixin):
 class MultiModalConversationUsage(DictMixin):
     input_tokens: int
     output_tokens: int
+    characters: int
     # TODO add image usage info.
     def __init__(self,
                  input_tokens: int = 0,
                  output_tokens: int = 0,
+                 characters: int = 0,
                  **kwargs):
         super().__init__(input_tokens=input_tokens,
                          output_tokens=output_tokens,
+                         characters=characters,
                          **kwargs)
@@ -378,7 +406,7 @@ class RecognitionResponse(DashScopeAPIResponse):
         """
         result = False
         if sentence is not None and 'end_time' in sentence and sentence[
-                'end_time'] is not None:
+            'end_time'] is not None:
             result = True
         return result
@@ -445,8 +473,8 @@ class ImageSynthesisOutput(DictMixin):
     results: List[ImageSynthesisResult]
     def __init__(self,
-                 task_id: str =  None,
-                 task_status: str =  None,
+                 task_id: str = None,
+                 task_status: str = None,
                  results: List[ImageSynthesisResult] = [],
                  **kwargs):
         res = []

dashscope/audio/tts_v2/speech_synthesizer.py CHANGED Viewed

@@ -98,6 +98,10 @@ class Request:
         volume=50,
         speech_rate=1.0,
         pitch_rate=1.0,
+        seed=0,
+        synthesis_type=0,
+        instruction=None,
+        language_hints: list = None,
     ):
         self.task_id = self.genUid()
         self.apikey = apikey
@@ -109,6 +113,10 @@ class Request:
         self.volume = volume
         self.speech_rate = speech_rate
         self.pitch_rate = pitch_rate
+        self.seed = seed
+        self.synthesis_type = synthesis_type
+        self.instruction = instruction
+        self.language_hints = language_hints
     def genUid(self):
         # 生成随机UUID
@@ -156,6 +164,8 @@ class Request:
                     'rate': self.speech_rate,
                     'format': self.format,
                     'pitch': self.pitch_rate,
+                    'seed': self.seed,
+                    'type': self.synthesis_type
                 },
             },
         }
@@ -163,6 +173,10 @@ class Request:
             cmd['payload']['parameters']['bit_rate'] = self.bit_rate
         if additional_params:
             cmd['payload']['parameters'].update(additional_params)
+        if self.instruction is not None:
+            cmd['payload']['parameters']['instruction'] = self.instruction
+        if self.language_hints is not None:
+            cmd['payload']['parameters']['language_hints'] = self.language_hints
         return json.dumps(cmd)
     def getContinueRequest(self, text):
@@ -207,6 +221,10 @@ class SpeechSynthesizer:
         volume=50,
         speech_rate=1.0,
         pitch_rate=1.0,
+        seed=0,
+        synthesis_type=0,
+        instruction=None,
+        language_hints: list = None,
         headers=None,
         callback: ResultCallback = None,
         workspace=None,
@@ -237,6 +255,14 @@ class SpeechSynthesizer:
             Dashscope workspace ID.
         url: str
             Dashscope WebSocket URL.
+        seed: int
+            The seed of the synthesizer, with a range from 0 to 65535. Default is 0.
+        synthesis_type: int
+            The type of the synthesizer, Default is 0.
+        instruction: str
+            The instruction of the synthesizer, max length is 128.
+        language_hints: list
+            The language hints of the synthesizer. supported language: zh, en.
         additional_params: Dict
             Additional parameters for the Dashscope API.
         """
@@ -271,6 +297,10 @@ class SpeechSynthesizer:
             volume=volume,
             speech_rate=speech_rate,
             pitch_rate=pitch_rate,
+            seed=seed,
+            synthesis_type=synthesis_type,
+            instruction=instruction,
+            language_hints=language_hints
         )
         self.last_request_id = self.request.task_id
         self.start_event = threading.Event()

dashscope/embeddings/multimodal_embedding.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import List
 from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
                                                        DictMixin)
-from dashscope.client.base_api import BaseApi
+from dashscope.client.base_api import BaseApi, BaseAioApi
 from dashscope.common.error import InputRequired, ModelRequired
 from dashscope.common.utils import _get_task_group_and_task
 from dashscope.utils.oss_utils import preprocess_message_element
@@ -111,3 +111,72 @@ class MultiModalEmbedding(BaseApi):
                 if is_upload and not has_upload:
                     has_upload = True
         return has_upload
+class AioMultiModalEmbedding(BaseAioApi):
+    task = 'multimodal-embedding'
+    class Models:
+        multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
+    @classmethod
+    async def call(cls,
+         model: str,
+         input: List[MultiModalEmbeddingItemBase],
+         api_key: str = None,
+         workspace: str = None,
+         **kwargs) -> DashScopeAPIResponse:
+        """Get embedding multimodal contents..
+        Args:
+            model (str): The embedding model name.
+            input (List[MultiModalEmbeddingElement]): The embedding elements,
+                every element include data, modal, factor field.
+            workspace (str): The dashscope workspace id.
+            **kwargs:
+                auto_truncation(bool, `optional`): Automatically truncate
+                audio longer than 15 seconds or text longer than 70 words.
+                Default to false(Too long input will result in failure).
+        Returns:
+            DashScopeAPIResponse: The embedding result.
+        """
+        if input is None or not input:
+            raise InputRequired('prompt is required!')
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        embedding_input = {}
+        has_upload = cls._preprocess_message_inputs(model, input, api_key)
+        if has_upload:
+            headers = kwargs.pop('headers', {})
+            headers['X-DashScope-OssResourceResolve'] = 'enable'
+            kwargs['headers'] = headers
+        embedding_input['contents'] = input
+        kwargs.pop('stream', False)  # not support streaming output.
+        task_group, function = _get_task_group_and_task(__name__)
+        response = await super().call(
+            model=model,
+            input=embedding_input,
+            task_group=task_group,
+            task=MultiModalEmbedding.task,
+            function=function,
+            api_key=api_key,
+            workspace=workspace,
+            **kwargs)
+        return response
+    @classmethod
+    def _preprocess_message_inputs(cls, model: str, input: List[dict],
+                                   api_key: str):
+        """preprocess following inputs
+        input = [{'factor': 1, 'text': 'hello'},
+                {'factor': 2, 'audio': ''},
+                {'factor': 3, 'image': ''}]
+        """
+        has_upload = False
+        for elem in input:
+            if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
+                is_upload = preprocess_message_element(model, elem, api_key)
+                if is_upload and not has_upload:
+                    has_upload = True
+        return has_upload

dashscope/multimodal/__init__.py CHANGED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .tingwu import tingwu
+from .tingwu.tingwu import TingWu
+from .tingwu.tingwu_realtime import TingWuRealtime, TingWuRealtimeCallback
+from .multimodal_dialog import MultiModalDialog, MultiModalCallback
+from .dialog_state import DialogState
+from .multimodal_constants import *
+from .multimodal_request_params import *
+__all__ = [
+    'tingwu',
+    'TingWu',
+    'TingWuRealtime',
+    'TingWuRealtimeCallback',
+    'MultiModalDialog',
+    'MultiModalCallback',
+    'DialogState'
+]

dashscope/multimodal/tingwu/__init__.py CHANGED Viewed

@@ -0,0 +1,10 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .tingwu import TingWu
+from .tingwu_realtime import TingWuRealtime, TingWuRealtimeCallback
+__all__ = [
+    'TingWu',
+    'TingWuRealtime',
+    'TingWuRealtimeCallback'
+]

dashscope/multimodal/tingwu/tingwu_realtime.py ADDED Viewed

@@ -0,0 +1,579 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import platform
+import threading
+import time
+import uuid
+from dataclasses import dataclass, field
+from queue import Queue
+import dashscope
+from dashscope.client.base_api import BaseApi
+from dashscope.common.error import (InvalidParameter, ModelRequired)
+import websocket
+from dashscope.common.logging import logger
+from dashscope.protocol.websocket import ActionType
+class TingWuRealtimeCallback:
+    """An interface that defines callback methods for getting TingWu results.
+       Derive from this class and implement its function to provide your own data.
+    """
+    def on_open(self) -> None:
+        pass
+    def on_started(self, task_id: str) -> None:
+        pass
+    def on_speech_listen(self, result: dict):
+        pass
+    def on_recognize_result(self, result: dict):
+        pass
+    def on_ai_result(self, result: dict):
+        pass
+    def on_stopped(self) -> None:
+        pass
+    def on_error(self, error_code: str, error_msg: str) -> None:
+        pass
+    def on_close(self, close_status_code, close_msg):
+        """
+        callback when websocket connection is closed
+        :param close_status_code
+        :param close_msg
+        """
+        pass
+class TingWuRealtime(BaseApi):
+    """TingWuRealtime interface.
+    Args:
+        model (str): The requested model_id.
+        callback (TingWuRealtimeCallback): A callback that returns
+            speech recognition results.
+        app_id (str): The dashscope tingwu app id.
+        format (str): The input audio format for TingWu request.
+        sample_rate (int): The input audio sample rate.
+        terminology (str): The correct instruction set id.
+        workspace (str): The dashscope workspace id.
+        **kwargs:
+            max_end_silence (int): The maximum end silence time.
+            other_params (dict, `optional`): Other parameters.
+    Raises:
+        InputRequired: Input is required.
+    """
+    SILENCE_TIMEOUT_S = 60
+    def __init__(self,
+                 model: str,
+                 callback: TingWuRealtimeCallback,
+                 audio_format: str = "pcm",
+                 sample_rate: int = 16000,
+                 max_end_silence: int = None,
+                 app_id: str = None,
+                 terminology: str = None,
+                 workspace: str = None,
+                 api_key: str = None,
+                 base_address: str = None,
+                 data_id: str = None,
+                 **kwargs):
+        if api_key is None:
+            self.api_key = dashscope.api_key
+        else:
+            self.api_key = api_key
+        if base_address is None:
+            self.base_address = dashscope.base_websocket_api_url
+        else:
+            self.base_address = base_address
+        if model is None:
+            raise ModelRequired('Model is required!')
+        self.data_id = data_id
+        self.max_end_silence = max_end_silence
+        self.model = model
+        self.audio_format = audio_format
+        self.app_id = app_id
+        self.terminology = terminology
+        self.sample_rate = sample_rate
+        # continuous recognition with start() or once recognition with call()
+        self._recognition_once = False
+        self._callback = callback
+        self._running = False
+        self._stream_data = Queue()
+        self._worker = None
+        self._silence_timer = None
+        self._kwargs = kwargs
+        self._workspace = workspace
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
+        self._stop_stream_timestamp = -1
+        self._on_complete_timestamp = -1
+        self.request_id_confirmed = False
+        self.last_request_id = uuid.uuid4().hex
+        self.request = _Request()
+        self.response = _TingWuResponse(self._callback, self.close)  # 传递 self.close 作为回调
+    def _on_message(self, ws, message):
+        logger.debug(f"<<<<<<< Received message: {message}")
+        if isinstance(message, str):
+            self.response.handle_text_response(message)
+        elif isinstance(message, (bytes, bytearray)):
+            self.response.handle_binary_response(message)
+    def _on_error(self, ws, error):
+        logger.error(f"Error: {error}")
+        if self._callback:
+            error_code = ""  # 默认错误码
+            if "connection" in str(error).lower():
+                error_code = "1001"  # 连接错误
+            elif "timeout" in str(error).lower():
+                error_code = "1002"  # 超时错误
+            elif "authentication" in str(error).lower():
+                error_code = "1003"  # 认证错误
+            self._callback.on_error(error_code=error_code, error_msg=str(error))
+    def _on_close(self, ws, close_status_code, close_msg):
+        try:
+            logger.debug(
+                "WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
+            if close_status_code is None:
+                close_status_code = 1000
+            if close_msg is None:
+                close_msg = "websocket is closed"
+            self._callback.on_close(close_status_code, close_msg)
+        except Exception as e:
+            logger.error(f"Error: {e}")
+    def _on_open(self, ws):
+        self._callback.on_open()
+        self._running = True
+    # def _on_pong(self):
+    #     logger.debug("on pong")
+    def start(self, **kwargs):
+        """
+        interface for starting TingWu connection
+        """
+        assert self._callback is not None, 'Please set the callback to get the TingWu result.'  # noqa E501
+        if self._running:
+            raise InvalidParameter('TingWu client has started.')
+        # self._start_stream_timestamp = -1
+        # self._first_package_timestamp = -1
+        # self._stop_stream_timestamp = -1
+        # self._on_complete_timestamp = -1
+        if self._kwargs is not None and len(self._kwargs) != 0:
+            self._kwargs.update(**kwargs)
+        self._connect(self.api_key)
+        logger.debug("connected with server.")
+        self._send_start_request()
+    def send_audio_data(self, speech_data: bytes):
+        """send audio data to server"""
+        if self._running:
+            self.__send_binary_frame(speech_data)
+    def stop(self):
+        if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
+            self._callback.on_close(1001, "websocket is not connected")
+            return
+        _send_speech_json = self.request.generate_stop_request("stop")
+        self._send_text_frame(_send_speech_json)
+    """inner class"""
+    def _send_start_request(self):
+        """send start request"""
+        _start_json = self.request.generate_start_request(
+            workspace_id=self._workspace,
+            direction_name="start",
+            app_id=self.app_id,
+            model=self.model,
+            audio_format=self.audio_format,
+            sample_rate=self.sample_rate,
+            terminology=self.terminology,
+            max_end_silence=self.max_end_silence,
+            data_id=self.data_id,
+            **self._kwargs
+        )
+        # send start request
+        self._send_text_frame(_start_json)
+    def _run_forever(self):
+        self.ws.run_forever(ping_interval=5, ping_timeout=4)
+    def _connect(self, api_key: str):
+        """init websocket connection"""
+        self.ws = websocket.WebSocketApp(self.base_address, header=self.request.get_websocket_header(api_key),
+                                         on_open=self._on_open,
+                                         on_message=self._on_message,
+                                         on_error=self._on_error,
+                                         on_close=self._on_close)
+        self.thread = threading.Thread(target=self._run_forever)
+        # 统一心跳机制配置
+        self.ws.ping_interval = 5
+        self.ws.ping_timeout = 4
+        self.thread.daemon = True
+        self.thread.start()
+        self._wait_for_connection()
+    def close(self):
+        if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
+            return
+        self.ws.close()
+    def _wait_for_connection(self):
+        """wait for connection using event instead of busy waiting"""
+        timeout = 5
+        start_time = time.time()
+        while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
+            time.sleep(0.1)  # 短暂休眠，避免密集轮询
+    def _send_text_frame(self, text: str):
+        # 避免在日志中记录敏感信息，如API密钥等
+        # 只记录非敏感信息
+        if '"Authorization"' not in text:
+            logger.info('>>>>>> send text frame : %s' % text)
+        else:
+            logger.info('>>>>>> send text frame with authorization header')
+        self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
+    def __send_binary_frame(self, binary: bytes):
+        # _log.info('send binary frame length: %d' % len(binary))
+        self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.cleanup()
+        return False
+    def cleanup(self):
+        """cleanup resources"""
+        try:
+            if self.ws:
+                self.ws.close()
+            if self.thread and self.thread.is_alive():
+                # 设置标志位通知线程退出
+                self.thread.join(timeout=2)
+            # 清除引用
+            self.ws = None
+            self.thread = None
+            self._callback = None
+            self.response = None
+        except Exception as e:
+            logger.error(f"Error in cleanup: {e}")
+    def send_audio_frame(self, buffer: bytes):
+        """Push audio to server
+        Raises:
+            InvalidParameter: Cannot send data to an uninitiated recognition.
+        """
+        if self._running is False:
+            raise InvalidParameter('TingWu client has stopped.')
+        if self._start_stream_timestamp < 0:
+            self._start_stream_timestamp = time.time() * 1000
+        logger.debug('send_audio_frame: {}'.format(len(buffer)))
+        self.__send_binary_frame(buffer)
+class _Request:
+    def __init__(self):
+        # websocket header
+        self.ws_headers = None
+        # request body for voice chat
+        self.header = None
+        self.payload = None
+        # params
+        self.task_id = None
+        self.app_id = None
+        self.workspace_id = None
+    def get_websocket_header(self, api_key):
+        ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
+            '1.18.0',  # dashscope version
+            platform.python_version(),
+            platform.platform(),
+            platform.processor(),
+        )
+        self.ws_headers = {
+            "User-Agent": ua,
+            "Authorization": f"bearer {api_key}",
+            "Accept": "application/json"
+        }
+        logger.info('websocket header: {}'.format(self.ws_headers))
+        return self.ws_headers
+    def generate_start_request(self, direction_name: str,
+                               app_id: str,
+                               model: str = None,
+                               workspace_id: str = None,
+                               audio_format: str = None,
+                               sample_rate: int = None,
+                               terminology: str = None,
+                               max_end_silence: int = None,
+                               data_id: str = None,
+                               **kwargs
+                               ) -> str:
+        """
+        build start request.
+        :param app_id: web console app id
+        :param direction_name:
+        :param workspace_id: web console workspace id
+        :param model: model name
+        :param audio_format: audio format
+        :param sample_rate: sample rate
+        :param terminology:
+        :param max_end_silence:
+        :param data_id:
+        :return:
+        Args:
+            :
+        """
+        self._get_dash_request_header(ActionType.START)
+        parameters = self._get_start_parameters(audio_format=audio_format, sample_rate=sample_rate,
+                                                max_end_silence=max_end_silence,
+                                                terminology=terminology,
+                                                **kwargs)
+        self._get_dash_request_payload(direction_name=direction_name, app_id=app_id, workspace_id=workspace_id,
+                                       model=model,
+                                       data_id=data_id,
+                                       request_params=parameters)
+        cmd = {
+            "header": self.header,
+            "payload": self.payload
+        }
+        return json.dumps(cmd)
+    @staticmethod
+    def _get_start_parameters(audio_format: str = None,
+                              sample_rate: int = None,
+                              terminology: str = None,
+                              max_end_silence: int = None,
+                              **kwargs):
+        """
+        build start request parameters inner.
+        :param kwargs: parameters
+        :return
+        """
+        parameters = {}
+        if audio_format is not None:
+            parameters['format'] = audio_format
+        if sample_rate is not None:
+            parameters['sampleRate'] = sample_rate
+        if terminology is not None:
+            parameters['terminology'] = terminology
+        if max_end_silence is not None:
+            parameters['maxEndSilence'] = max_end_silence
+        if kwargs is not None and len(kwargs) != 0:
+            parameters.update(kwargs)
+        return parameters
+    def generate_stop_request(self, direction_name: str) -> str:
+        """
+        build stop request.
+        :param direction_name
+        :return
+        """
+        self._get_dash_request_header(ActionType.FINISHED)
+        self._get_dash_request_payload(direction_name, self.app_id)
+        cmd = {
+            "header": self.header,
+            "payload": self.payload
+        }
+        return json.dumps(cmd)
+    def _get_dash_request_header(self, action: str):
+        """
+        :param action: ActionType ：run-task, continue-task, finish-task
+        """
+        if self.task_id is None:
+            self.task_id = get_random_uuid()
+        self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
+    def _get_dash_request_payload(self, direction_name: str,
+                                  app_id: str,
+                                  workspace_id: str = None,
+                                  custom_input=None,
+                                  model: str = None,
+                                  data_id: str = None,
+                                  request_params=None,
+                                  ):
+        """
+        build start request payload inner.
+        :param direction_name: inner direction name
+        :param app_id: web console app id
+        :param request_params: start direction body parameters
+        :param custom_input: user custom input
+        :param data_id: data id
+        :param model: model name
+        """
+        if custom_input is not None:
+            input = custom_input
+        else:
+            input = RequestBodyInput(
+                workspace_id=workspace_id,
+                app_id=app_id,
+                directive=direction_name,
+                data_id=data_id
+            )
+        self.payload = DashPayload(
+            model=model,
+            input=input.to_dict(),
+            parameters=request_params
+        ).to_dict()
+class _TingWuResponse:
+    def __init__(self, callback: TingWuRealtimeCallback, close_callback=None):
+        super().__init__()
+        self.task_id = None  # 对话ID.
+        self._callback = callback
+        self._close_callback = close_callback  # 保存关闭回调函数
+    def handle_text_response(self, response_json: str):
+        """
+        handle text response.
+        :param response_json: json format response from server
+        """
+        logger.info("<<<<<< server response: %s" % response_json)
+        try:
+            # try to parse response as json
+            json_data = json.loads(response_json)
+            header = json_data.get('header', {})
+            if header.get('event') == 'task-failed':
+                logger.error('Server returned invalid message: %s' % response_json)
+                if self._callback:
+                    self._callback.on_error(error_code=header.get('error_code'),
+                                            error_msg=header.get('error_message'))
+                return
+            if header.get('event') == "task-started":
+                self._handle_started(header.get('task_id'))
+                return
+            payload = json_data.get('payload', {})
+            output = payload.get('output', {})
+            if output is not None:
+                action = output.get('action')
+                logger.info("Server response action: %s" % action)
+                self._handle_tingwu_agent_text_response(action=action, response_json=json_data)
+        except json.JSONDecodeError:
+            logger.error("Failed to parse message as JSON.")
+    def handle_binary_response(self, response_binary: bytes):
+        """
+        handle binary response.
+        :param response_binary: server response binary。
+        """
+        logger.info("<<<<<< server response binary length: %d" % len(response_binary))
+    def _handle_tingwu_agent_text_response(self, action: str, response_json: dict):
+        payload = response_json.get('payload', {})
+        output = payload.get('output', {})
+        if action == "task-failed":
+            self._callback.on_error(error_code=output.get('errorCode'),
+                                    error_msg=output.get('errorMessage'))
+        elif action == "speech-listen":
+            self._callback.on_speech_listen(response_json)
+        elif action == "recognize-result":
+            self._callback.on_recognize_result(response_json)
+        elif action == "ai-result":
+            self._callback.on_ai_result(response_json)
+        elif action == "speech-end":  # ai-result事件永远会先于speech-end事件
+            self._callback.on_stopped()
+            if self._close_callback is not None:
+                self._close_callback()
+        else:
+            logger.info("Unknown response name:" + action)
+    def _handle_started(self, task_id: str):
+        self.task_id = task_id
+        self._callback.on_started(self.task_id)
+def get_random_uuid() -> str:
+    """generate random uuid."""
+    return uuid.uuid4().hex
+@dataclass
+class RequestBodyInput():
+    app_id: str
+    directive: str
+    data_id: str = field(default=None)
+    workspace_id: str = field(default=None)
+    def to_dict(self):
+        body_input = {
+            "appId": self.app_id,
+            "directive": self.directive,
+        }
+        if self.workspace_id is not None:
+            body_input["workspace_id"] = self.workspace_id
+        if self.data_id is not None:
+            body_input["dataId"] = self.data_id
+        return body_input
+@dataclass
+class DashHeader:
+    action: str
+    task_id: str = field(default=get_random_uuid())
+    streaming: str = field(default="duplex")  # 默认为 duplex
+    def to_dict(self):
+        return {
+            "action": self.action,
+            "task_id": self.task_id,
+            "request_id": self.task_id,
+            "streaming": self.streaming
+        }
+@dataclass
+class DashPayload:
+    task_group: str = field(default="aigc")
+    function: str = field(default="generation")
+    model: str = field(default="")
+    task: str = field(default="multimodal-generation")
+    parameters: dict = field(default=None)
+    input: dict = field(default=None)
+    def to_dict(self):
+        payload = {
+            "task_group": self.task_group,
+            "function": self.function,
+            "model": self.model,
+            "task": self.task,
+        }
+        if self.parameters is not None:
+            payload["parameters"] = self.parameters
+        if self.input is not None:
+            payload["input"] = self.input
+        return payload

dashscope/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-__version__ = '1.24.3'
+__version__ = '1.24.5'

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dashscope
-Version: 1.24.3
+Version: 1.24.5
 Summary: dashscope client sdk library
 Home-page: https://dashscope.aliyun.com/
 Author: Alibaba Cloud

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
-dashscope/__init__.py,sha256=MJI4PJmnevfQA_GA30_L3hmsq49hooZpg25k6w39dts,3120
+dashscope/__init__.py,sha256=96J137Im9Ii9uxfVOOYkZDJNZXF1sEbcH4-QXFr4xEw,3172
 dashscope/cli.py,sha256=64oGkevgX0RHPPmMg0sevXDgaFLQNA_0vdtjQ7Z2pHM,26492
 dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
 dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
 dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
-dashscope/version.py,sha256=V0YmAKw8G9I8NSRumZjDtPh52jnAd9OGCwh9X-oeC_M,74
-dashscope/aigc/__init__.py,sha256=m51CHEKL3WPq-s14OF-G1Uk3rLj6B6KrU55bbCKU-Ak,500
+dashscope/version.py,sha256=2fvqw7bZLyWOIDvUb8DEkdi6y_VgyljhOeYdITEksWM,74
+dashscope/aigc/__init__.py,sha256=kYvYEoRK-NUHyMWpBDNQBz4fVA__uOhHRK2kDTBaWgk,617
 dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
 dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
 dashscope/aigc/conversation.py,sha256=95xEEY4ThZJysj5zy3aMw7ql9KLJVfD_1iHv9QZ17Ew,14282
 dashscope/aigc/generation.py,sha256=xMcMu16rICTdjZiD_sPqYV_Ltdp4ewGzzfC7JD9VApY,17948
 dashscope/aigc/image_synthesis.py,sha256=Itx9h5brEwC-d3Mj_ntDHGd4qaitqDg9DeGHMJouhMk,28178
-dashscope/aigc/multimodal_conversation.py,sha256=Kjg8Gtfhl_Ok8WVwD-AeT-VBN9hh6E74TfkCxkL5wbY,10821
+dashscope/aigc/multimodal_conversation.py,sha256=BXpUthyGxJHQs18-m_ZzAw6MI5nSM4_NVMUfTDSC1-k,11682
 dashscope/aigc/video_synthesis.py,sha256=RSPjar5-YiF9xclRmf9H7-5QbRxLcsNXO4zS7oTKi2I,24137
 dashscope/api_entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/api_entities/aiohttp_request.py,sha256=1L7XdIJ9L65cQmX8x9JCR4t5hNIMDrbiWADfKKp9yfo,10280
@@ -18,7 +18,7 @@ dashscope/api_entities/api_request_data.py,sha256=04rpYPNK1HkT3iTPJmZpquH621xcBb
 dashscope/api_entities/api_request_factory.py,sha256=ynpbFmxSne4dJkv5m40Vlwt4hJSxQPprAuUgMSQIQDg,5639
 dashscope/api_entities/base_request.py,sha256=W2SzrSAGFS6V8DErfSrayQtSL0T4iO7BrC8flr7nt1w,977
 dashscope/api_entities/chat_completion_types.py,sha256=1WMWPszhM3HaJBVz-ZXx-El4D8-RfVUL3ym65xsDRLk,11435
-dashscope/api_entities/dashscope_response.py,sha256=slc7o9jNS5yzv2giEPzz9CDOW6X797nkAocgZ1r84aU,22089
+dashscope/api_entities/dashscope_response.py,sha256=31guU41ePkLyFsVVN-1WODXdOHiURzRyxxhrUmX9dGM,22835
 dashscope/api_entities/encryption.py,sha256=rUCZx3wwVvS5oyKXEeWgyWPxM8Y5d4AaVdgxLhizBqA,5517
 dashscope/api_entities/http_request.py,sha256=MTxYsbkK8oYWDp8ZPjrkdY9YbnQ9SEIy87riyJidMXo,16484
 dashscope/api_entities/websocket_request.py,sha256=PS0FU854-HjTbKa68f4GHa7-noFRMzKySJGfPkrrBjw,16146
@@ -46,7 +46,7 @@ dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO
 dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
 dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
 dashscope/audio/tts_v2/enrollment.py,sha256=-nrlywYSOP73Bm9ETTSxNnlp-B8ezJcUmd59mVvyvgk,6361
-dashscope/audio/tts_v2/speech_synthesizer.py,sha256=jbTwybwJjhbgUa6TgeLUYnkDBkmk-tjzxta1FtYeWAk,20824
+dashscope/audio/tts_v2/speech_synthesizer.py,sha256=p764P4TYwLkvvPCpA4VnFwlNbIJbuNbp2d9mxgni7Ws,22047
 dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/client/base_api.py,sha256=znAJ65DeHiFw1H7FWK0YrkLz1CoNcyqUxF8EJ3gujeY,52523
 dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -65,17 +65,18 @@ dashscope/customize/finetunes.py,sha256=AL_kGTJXMvM2ej-EKsLLd1dUphPQdVTefFVCSVH-
 dashscope/embeddings/__init__.py,sha256=XQ7vKr8oZM2CmdOduE53BWy6_Qpn9xUPkma64yw8Gws,291
 dashscope/embeddings/batch_text_embedding.py,sha256=lVhvTS8McYfXuqt_8CmmhA6bPqD0nrGv965kjYG_j0E,8842
 dashscope/embeddings/batch_text_embedding_response.py,sha256=ZfkJMUq8GRsFA6XUTsiAsIySqGJH-VPi2P9Ba1KTU-s,2056
-dashscope/embeddings/multimodal_embedding.py,sha256=NwjQsdkKgUz51ozGjqFDzVlLcZjY0m1JNdH1EyAY0a4,4109
+dashscope/embeddings/multimodal_embedding.py,sha256=PEF7DmtE5cbrXw4k3WQcfmsBKaAY3CTIoei3SyhOl34,6774
 dashscope/embeddings/text_embedding.py,sha256=2MPEyMB99xueDbvFg9kKAe8bgHMDEaFLaFa6GzDWDHg,2108
 dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
-dashscope/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+dashscope/multimodal/__init__.py,sha256=fyqeolbDLWVn5wSpPZ3nAOnUBRF9k6mlsy6dCmgjPvI,533
 dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
 dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
 dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
 dashscope/multimodal/multimodal_request_params.py,sha256=Lbxf_kLnFUkhty8AU9wL7ws9tYbmhHPVmsiXLdynlJg,8402
-dashscope/multimodal/tingwu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+dashscope/multimodal/tingwu/__init__.py,sha256=Gi9GEM0bdeJlZpvyksSeHOc2--_tG5aF6QAx6TAS2fE,225
 dashscope/multimodal/tingwu/tingwu.py,sha256=01d-QOeuB1QmRhiZqbXJ8pHoGqT0C-xZTjIs_ZBXOyw,2613
+dashscope/multimodal/tingwu/tingwu_realtime.py,sha256=oBeqrZit3uBZHuyI7m9VILz2qaqJRMO0-Nm2eJ5Q63g,20215
 dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
 dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -99,9 +100,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
 dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
 dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/utils/oss_utils.py,sha256=aZIHlMN2JOfVw6kp0SVrMw_N1MfoTcR_-wiRbJ7DgHw,7501
-dashscope-1.24.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-dashscope-1.24.3.dist-info/METADATA,sha256=fLj2JRtUdQhu6TCj2GdodS2iFXLay7WWgMCcEq15jm4,7146
-dashscope-1.24.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dashscope-1.24.3.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
-dashscope-1.24.3.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
-dashscope-1.24.3.dist-info/RECORD,,
+dashscope-1.24.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+dashscope-1.24.5.dist-info/METADATA,sha256=eRxoK1TphMD4hn-vUM3aPMtaCsvCzstnDe-QPH9A4Q0,7146
+dashscope-1.24.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dashscope-1.24.5.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
+dashscope-1.24.5.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
+dashscope-1.24.5.dist-info/RECORD,,

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dashscope-1.24.3.dist-info → dashscope-1.24.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

dashscope 1.24.3__py3-none-any.whl → 1.24.5__py3-none-any.whl

Potentially problematic release.

dashscope 1.24.3py3-none-any.whl → 1.24.5py3-none-any.whl