PyPI - dashscope - Versions diffs - 1.23.7__py3-none-any.whl → 1.23.9__py3-none-any.whl - Mend

dashscope 1.23.7py3-none-any.whl → 1.23.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dashscope might be problematic. Click here for more details.

Files changed (15) hide show

dashscope/assistants/assistants.py CHANGED Viewed

@@ -23,8 +23,8 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
         name: str = None,
         description: str = None,
         instructions: str = None,
-        tools: Optional[str] = [],
-        file_ids: Optional[str] = [],
+        tools: Optional[List[Dict]] = None,
+        file_ids: Optional[List[str]] = [],
         metadata: Dict = {},
     ):
         obj = {}
@@ -36,7 +36,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
             obj['description'] = description
         if instructions:
             obj['instructions'] = instructions
-        if tools:
+        if tools is not None:
             obj['tools'] = tools
         obj['file_ids'] = file_ids
         obj['metadata'] = metadata
@@ -50,7 +50,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
              name: str = None,
              description: str = None,
              instructions: str = None,
-             tools: Optional[List[Dict]] = [],
+             tools: Optional[List[Dict]] = None,
              file_ids: Optional[List[str]] = [],
              metadata: Dict = None,
              workspace: str = None,
@@ -93,7 +93,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
                name: str = None,
                description: str = None,
                instructions: str = None,
-               tools: Optional[List[Dict]] = [],
+               tools: Optional[List[Dict]] = None,
                file_ids: Optional[List[str]] = [],
                metadata: Dict = None,
                workspace: str = None,
@@ -219,8 +219,8 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
                name: str = None,
                description: str = None,
                instructions: str = None,
-               tools: Optional[str] = [],
-               file_ids: Optional[str] = [],
+               tools: Optional[List[Dict]] = None,
+               file_ids: Optional[List[str]] = [],
                metadata: Dict = None,
                workspace: str = None,
                api_key: str = None,

dashscope/audio/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from . import asr, tts, tts_v2, qwen_tts
+from . import asr, tts, tts_v2, qwen_tts, qwen_tts_realtime, qwen_omni
-__all__ = [asr, tts, tts_v2, qwen_tts]
+__all__ = [asr, tts, tts_v2, qwen_tts, qwen_tts_realtime, qwen_omni]

dashscope/audio/qwen_omni/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .omni_realtime import (AudioFormat, MultiModality, OmniRealtimeCallback,
+                            OmniRealtimeConversation)
+__all__ = [
+    'OmniRealtimeCallback',
+    'AudioFormat',
+    'MultiModality',
+    'OmniRealtimeConversation',
+]

dashscope/audio/qwen_omni/omni_realtime.py ADDED Viewed

@@ -0,0 +1,415 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import platform
+import threading
+import time
+from typing import List
+import uuid
+from enum import Enum, unique
+import dashscope
+import websocket
+from dashscope.common.error import InputRequired, ModelRequired
+from dashscope.common.logging import logger
+class OmniRealtimeCallback:
+    """
+    An interface that defines callback methods for getting omni-realtime results. # noqa E501
+    Derive from this class and implement its function to provide your own data.
+    """
+    def on_open(self) -> None:
+        pass
+    def on_close(self, close_status_code, close_msg) -> None:
+        pass
+    def on_event(self, message: str) -> None:
+        pass
+@unique
+class AudioFormat(Enum):
+    # format, sample_rate, channels, bit_rate, name
+    PCM_16000HZ_MONO_16BIT = ('pcm', 16000, 'mono', '16bit', 'pcm16')
+    PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
+    def __init__(self, format, sample_rate, channels, bit_rate, format_str):
+        self.format = format
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.bit_rate = bit_rate
+        self.format_str = format_str
+    def __repr__(self):
+        return self.format_str
+    def __str__(self):
+        return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate:  {self.format_str}'
+class MultiModality(Enum):
+    """
+    MultiModality
+    """
+    TEXT = 'text'
+    AUDIO = 'audio'
+    def __str__(self):
+        return self.name
+class OmniRealtimeConversation:
+    def __init__(
+        self,
+        model,
+        callback: OmniRealtimeCallback,
+        headers=None,
+        workspace=None,
+        url=None,
+        additional_params=None,
+    ):
+        """
+        Qwen Omni Realtime SDK
+        Parameters:
+        -----------
+        model: str
+            Model name.
+        headers: Dict
+            User-defined headers.
+        callback: OmniRealtimeCallback
+            Callback to receive real-time omni results.
+        workspace: str
+            Dashscope workspace ID.
+        url: str
+            Dashscope WebSocket URL.
+        additional_params: Dict
+            Additional parameters for the Dashscope API.
+        """
+        if model is None:
+            raise ModelRequired('Model is required!')
+        if callback is None:
+            raise ModelRequired('Callback is required!')
+        if url is None:
+            url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
+        else:
+            url = f'{url}?model={model}'
+        self.url = url
+        self.apikey = dashscope.api_key
+        self.user_headers = headers
+        self.user_workspace = workspace
+        self.model = model
+        self.config = {}
+        self.callback = callback
+        self.ws = None
+        self.session_id = None
+        self.last_message = None
+        self.last_response_id = None
+        self.last_response_create_time = None
+        self.last_first_text_delay = None
+        self.last_first_audio_delay = None
+        self.metrics = []
+    def _generate_event_id(self):
+        '''
+        generate random event id: event_xxxx
+        '''
+        return 'event_' + uuid.uuid4().hex
+    def _get_websocket_header(self, ):
+        ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
+            '1.18.0',  # dashscope version
+            platform.python_version(),
+            platform.platform(),
+            platform.processor(),
+        )
+        headers = {
+            'user-agent': ua,
+            'Authorization': 'bearer ' + self.apikey,
+        }
+        if self.user_headers:
+            headers = {**self.user_headers, **headers}
+        if self.user_workspace:
+            headers = {
+                **headers,
+                'X-DashScope-WorkSpace': self.user_workspace,
+            }
+        return headers
+    def connect(self) -> None:
+        '''
+        connect to server, create session and return default session configuration
+        '''
+        self.ws = websocket.WebSocketApp(
+            self.url,
+            header=self._get_websocket_header(),
+            on_message=self.on_message,
+            on_error=self.on_error,
+            on_close=self.on_close,
+        )
+        self.thread = threading.Thread(target=self.ws.run_forever)
+        self.thread.daemon = True
+        self.thread.start()
+        timeout = 5  # 最长等待时间（秒）
+        start_time = time.time()
+        while (not (self.ws.sock and self.ws.sock.connected)
+               and (time.time() - start_time) < timeout):
+            time.sleep(0.1)  # 短暂休眠，避免密集轮询
+        if not (self.ws.sock and self.ws.sock.connected):
+            raise TimeoutError(
+                'websocket connection could not established within 5s. '
+                'Please check your network connection, firewall settings, or server status.'
+            )
+        self.callback.on_open()
+    def __send_str(self, data: str, enable_log: bool = True):
+        if enable_log:
+            logger.debug('[omni realtime] send string: {}'.format(data))
+        self.ws.send(data)
+    def update_session(self,
+                       output_modalities: List[MultiModality],
+                       voice: str,
+                       input_audio_format: AudioFormat = AudioFormat.
+                       PCM_16000HZ_MONO_16BIT,
+                       output_audio_format: AudioFormat = AudioFormat.
+                       PCM_24000HZ_MONO_16BIT,
+                       enable_input_audio_transcription: bool = True,
+                       input_audio_transcription_model: str = None,
+                       enable_turn_detection: bool = True,
+                       turn_detection_type: str = 'server_vad',
+                       prefix_padding_ms: int = 300,
+                       turn_detection_threshold: float = 0.2,
+                       turn_detection_silence_duration_ms: int = 800,
+                       turn_detection_param: dict = None,
+                       **kwargs) -> None:
+        '''
+        update session configuration, should be used before create response
+        Parameters
+        ----------
+        output_modalities: list[MultiModality]
+            omni output modalities to be used in session
+        voice: str
+            voice to be used in session
+        input_audio_format: AudioFormat
+            input audio format
+        output_audio_format: AudioFormat
+            output audio format
+        enable_turn_detection: bool
+            enable turn detection
+        turn_detection_threshold: float
+            turn detection threshold, range [-1, 1]
+            In a noisy environment, it may be necessary to increase the threshold to reduce false detections
+            In a quiet environment, it may be necessary to decrease the threshold to improve sensitivity
+        turn_detection_silence_duration_ms: int
+            duration of silence in milliseconds to detect turn, range [200, 6000]
+        '''
+        self.config = {
+            'modalities': [m.value for m in output_modalities],
+            'voice': voice,
+            'input_audio_format': input_audio_format.format_str,
+            'output_audio_format': output_audio_format.format_str,
+        }
+        if enable_input_audio_transcription:
+            self.config['input_audio_transcription'] = {
+                'model': input_audio_transcription_model,
+            }
+        else:
+            self.config['input_audio_transcription'] = None
+        if enable_turn_detection:
+            self.config['turn_detection'] = {
+                'type': turn_detection_type,
+                'threshold': turn_detection_threshold,
+                'prefix_padding_ms': prefix_padding_ms,
+                'silence_duration_ms': turn_detection_silence_duration_ms,
+            }
+            if turn_detection_param is not None:
+                self.config['turn_detection'].update(turn_detection_param)
+        else:
+            self.config['turn_detection'] = None
+        self.config.update(kwargs)
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'session.update',
+                'session': self.config
+            }))
+    def append_audio(self, audio_b64: str) -> None:
+        '''
+        send audio in base64 format
+        Parameters
+        ----------
+        audio_b64: str
+            base64 audio string
+        '''
+        logger.debug('[omni realtime] append audio: {}'.format(len(audio_b64)))
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_audio_buffer.append',
+                'audio': audio_b64
+            }), False)
+    def append_video(self, video_b64: str) -> None:
+        '''
+        send one image frame in video in base64 format
+        Parameters
+        ----------
+        video_b64: str
+            base64 image string
+        '''
+        logger.debug('[omni realtime] append video: {}'.format(len(video_b64)))
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_image_buffer.append',
+                'image': video_b64
+            }), False)
+    def commit(self, ) -> None:
+        '''
+        Commit the audio and video sent before.
+        When in Server VAD mode, the client does not need to use this method,
+        the server will commit the audio automatically after detecting vad end.
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_audio_buffer.commit'
+            }))
+    def clear_appended_audio(self, ) -> None:
+        '''
+        clear the audio sent to server before.
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_audio_buffer.clear'
+            }))
+    def create_response(self,
+                        instructions: str = None,
+                        output_modalities: List[MultiModality] = None) -> None:
+        '''
+        create response, use audio and video commited before to request llm.
+        When in Server VAD mode, the client does not need to use this method,
+        the server will create response automatically after detecting vad
+        and sending commit.
+        Parameters
+        ----------
+        instructions: str
+            instructions to llm
+        output_modalities: list[MultiModality]
+            omni output modalities to be used in session
+        '''
+        request = {
+            'event_id': self._generate_event_id(),
+            'type': 'response.create',
+            'response': {}
+        }
+        request['response']['instructions'] = instructions
+        if output_modalities:
+            request['response']['modalities'] = [
+                m.value for m in output_modalities
+            ]
+        self.__send_str(json.dumps(request))
+    def cancel_response(self, ) -> None:
+        '''
+        cancel the current response
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'response.cancel'
+            }))
+    def send_raw(self, raw_data: str) -> None:
+        '''
+        send raw data to server
+        '''
+        self.__send_str(raw_data)
+    def close(self, ) -> None:
+        '''
+        close the connection to server
+        '''
+        self.ws.close()
+    # 监听消息的回调函数
+    def on_message(self, ws, message):
+        if isinstance(message, str):
+            logger.debug('[omni realtime] receive string {}'.format(
+                message[:1024]))
+            try:
+                # 尝试将消息解析为JSON
+                json_data = json.loads(message)
+                self.last_message = json_data
+                self.callback.on_event(json_data)
+                if 'type' in message:
+                    if 'session.created' == json_data['type']:
+                        self.session_id = json_data['session']['id']
+                    if 'response.created' == json_data['type']:
+                        self.last_response_id = json_data['response']['id']
+                        self.last_response_create_time = time.time() * 1000
+                        self.last_first_audio_delay = None
+                        self.last_first_text_delay = None
+                    elif 'response.audio_transcript.delta' == json_data[
+                            'type']:
+                        if self.last_response_create_time and self.last_first_text_delay is None:
+                            self.last_first_text_delay = time.time(
+                            ) * 1000 - self.last_response_create_time
+                    elif 'response.audio.delta' == json_data['type']:
+                        if self.last_response_create_time and self.last_first_audio_delay is None:
+                            self.last_first_audio_delay = time.time(
+                            ) * 1000 - self.last_response_create_time
+                    elif 'response.done' == json_data['type']:
+                        logger.info(
+                            '[Metric] response: {}, first text delay: {}, first audio delay: {}'
+                            .format(self.last_response_id,
+                                    self.last_first_text_delay,
+                                    self.last_first_audio_delay))
+            except json.JSONDecodeError:
+                logger.error('Failed to parse message as JSON.')
+                raise Exception('Failed to parse message as JSON.')
+        elif isinstance(message, (bytes, bytearray)):
+            # 如果失败，认为是二进制消息
+            logger.error(
+                'should not receive binary message in omni realtime api')
+            logger.debug('[omni realtime] receive binary {} bytes'.format(
+                len(message)))
+    def on_close(self, ws, close_status_code, close_msg):
+        self.callback.on_close(close_status_code, close_msg)
+    # WebSocket发生错误的回调函数
+    def on_error(self, ws, error):
+        print(f'websocket closed due to {error}')
+        raise Exception(f'websocket closed due to {error}')
+    # 获取上一个任务的taskId
+    def get_session_id(self) -> str:
+        return self.session_id
+    def get_last_message(self) -> str:
+        return self.last_message
+    def get_last_message(self) -> str:
+        return self.last_message
+    def get_last_response_id(self) -> str:
+        return self.last_response_id
+    def get_last_first_text_delay(self):
+        return self.last_first_text_delay
+    def get_last_first_audio_delay(self):
+        return self.last_first_audio_delay

dashscope/audio/qwen_tts_realtime/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .qwen_tts_realtime import (AudioFormat, QwenTtsRealtimeCallback,
+                            QwenTtsRealtime)
+__all__ = [
+    'AudioFormat',
+    'QwenTtsRealtimeCallback',
+    'QwenTtsRealtime',
+]

dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py ADDED Viewed

@@ -0,0 +1,314 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import platform
+import threading
+import time
+import uuid
+from enum import Enum, unique
+import dashscope
+import websocket
+from dashscope.common.error import InputRequired, ModelRequired
+from dashscope.common.logging import logger
+class QwenTtsRealtimeCallback:
+    """
+    An interface that defines callback methods for getting omni-realtime results. # noqa E501
+    Derive from this class and implement its function to provide your own data.
+    """
+    def on_open(self) -> None:
+        pass
+    def on_close(self, close_status_code, close_msg) -> None:
+        pass
+    def on_event(self, message: str) -> None:
+        pass
+@unique
+class AudioFormat(Enum):
+    # format, sample_rate, channels, bit_rate, name
+    PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
+    def __init__(self, format, sample_rate, channels, bit_rate, format_str):
+        self.format = format
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.bit_rate = bit_rate
+        self.format_str = format_str
+    def __repr__(self):
+        return self.format_str
+    def __str__(self):
+        return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate:  {self.format_str}'
+class QwenTtsRealtime:
+    def __init__(
+        self,
+        model,
+        headers=None,
+        callback: QwenTtsRealtimeCallback = None,
+        workspace=None,
+        url=None,
+        additional_params=None,
+    ):
+        """
+        Qwen Tts Realtime SDK
+        Parameters:
+        -----------
+        model: str
+            Model name.
+        headers: Dict
+            User-defined headers.
+        callback: OmniRealtimeCallback
+            Callback to receive real-time omni results.
+        workspace: str
+            Dashscope workspace ID.
+        url: str
+            Dashscope WebSocket URL.
+        additional_params: Dict
+            Additional parameters for the Dashscope API.
+        """
+        if model is None:
+            raise ModelRequired('Model is required!')
+        if url is None:
+            url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
+        else:
+            url = f'{url}?model={model}'
+        self.url = url
+        self.apikey = dashscope.api_key
+        self.user_headers = headers
+        self.user_workspace = workspace
+        self.model = model
+        self.config = {}
+        self.callback = callback
+        self.ws = None
+        self.session_id = None
+        self.last_message = None
+        self.last_response_id = None
+        self.last_first_text_time = None
+        self.last_first_audio_delay = None
+        self.metrics = []
+    def _generate_event_id(self):
+        '''
+        generate random event id: event_xxxx
+        '''
+        return 'event_' + uuid.uuid4().hex
+    def _get_websocket_header(self, ):
+        ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
+            '1.18.0',  # dashscope version
+            platform.python_version(),
+            platform.platform(),
+            platform.processor(),
+        )
+        headers = {
+            'user-agent': ua,
+            'Authorization': 'bearer ' + self.apikey,
+        }
+        if self.user_headers:
+            headers = {**self.user_headers, **headers}
+        if self.user_workspace:
+            headers = {
+                **headers,
+                'X-DashScope-WorkSpace': self.user_workspace,
+            }
+        return headers
+    def connect(self) -> None:
+        '''
+        connect to server, create session and return default session configuration
+        '''
+        self.ws = websocket.WebSocketApp(
+            self.url,
+            header=self._get_websocket_header(),
+            on_message=self.on_message,
+            on_error=self.on_error,
+            on_close=self.on_close,
+        )
+        self.thread = threading.Thread(target=self.ws.run_forever)
+        self.thread.daemon = True
+        self.thread.start()
+        timeout = 5  # 最长等待时间（秒）
+        start_time = time.time()
+        while (not (self.ws.sock and self.ws.sock.connected)
+               and (time.time() - start_time) < timeout):
+            time.sleep(0.1)  # 短暂休眠，避免密集轮询
+        if not (self.ws.sock and self.ws.sock.connected):
+            raise TimeoutError(
+                'websocket connection could not established within 5s. '
+                'Please check your network connection, firewall settings, or server status.'
+            )
+        self.callback.on_open()
+    def __send_str(self, data: str, enable_log: bool = True):
+        if enable_log:
+            logger.debug('[qwen tts realtime] send string: {}'.format(data))
+        self.ws.send(data)
+    def update_session(self,
+                       voice: str,
+                       response_format: AudioFormat = AudioFormat.
+                       PCM_24000HZ_MONO_16BIT,
+                       mode: str = 'server_commit',
+                       **kwargs) -> None:
+        '''
+        update session configuration, should be used before create response
+        Parameters
+        ----------
+        voice: str
+            voice to be used in session
+        response_format: AudioFormat
+            output audio format
+        mode: str
+            response mode, server_commit or commit
+        '''
+        self.config = {
+            'voice': voice,
+            'mode': mode,
+            'response_format': response_format.format,
+            'sample_rate': response_format.sample_rate,
+        }
+        self.config.update(kwargs)
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'session.update',
+                'session': self.config
+            }))
+    def append_text(self, text: str) -> None:
+        '''
+        send text
+        Parameters
+        ----------
+        text: str
+            text to send
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_text_buffer.append',
+                'text': text
+            }))
+        if self.last_first_text_time is None:
+            self.last_first_text_time = time.time() * 1000
+    def commit(self, ) -> None:
+        '''
+        commit the text sent before, create response and start synthesis audio.
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_text_buffer.commit'
+            }))
+    def clear_appended_text(self, ) -> None:
+        '''
+        clear the text sent to server before.
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'input_text_buffer.clear'
+            }))
+    def cancel_response(self, ) -> None:
+        '''
+        cancel the current response
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'response.cancel'
+            }))
+    def send_raw(self, raw_data: str) -> None:
+        '''
+        send raw data to server
+        '''
+        self.__send_str(raw_data)
+    def finish(self, ) -> None:
+        '''
+        finish input text stream, server will synthesis all text in buffer and close the connection
+        '''
+        self.__send_str(
+            json.dumps({
+                'event_id': self._generate_event_id(),
+                'type': 'session.finish'
+            }))
+    def close(self, ) -> None:
+        '''
+        close the connection to server
+        '''
+        self.ws.close()
+    # 监听消息的回调函数
+    def on_message(self, ws, message):
+        if isinstance(message, str):
+            logger.debug('[omni realtime] receive string {}'.format(
+                message[:1024]))
+            try:
+                # 尝试将消息解析为JSON
+                json_data = json.loads(message)
+                self.last_message = json_data
+                self.callback.on_event(json_data)
+                if 'type' in message:
+                    if 'session.created' == json_data['type']:
+                        self.session_id = json_data['session']['id']
+                    if 'response.created' == json_data['type']:
+                        self.last_response_id = json_data['response']['id']
+                    elif 'response.audio.delta' == json_data['type']:
+                        if self.last_first_text_time and self.last_first_audio_delay is None:
+                            self.last_first_audio_delay = time.time(
+                            ) * 1000 - self.last_first_text_time
+                    elif 'response.done' == json_data['type']:
+                        logger.debug(
+                            '[Metric] response: {}, first audio delay: {}'
+                            .format(self.last_response_id,
+                                    self.last_first_audio_delay))
+            except json.JSONDecodeError:
+                logger.error('Failed to parse message as JSON.')
+                raise Exception('Failed to parse message as JSON.')
+        elif isinstance(message, (bytes, bytearray)):
+            # 如果失败，认为是二进制消息
+            logger.error(
+                'should not receive binary message in omni realtime api')
+            logger.debug('[omni realtime] receive binary {} bytes'.format(
+                len(message)))
+    def on_close(self, ws, close_status_code, close_msg):
+        logger.debug(
+            '[omni realtime] connection closed with code {} and message {}'.format(
+                close_status_code, close_msg))
+        self.callback.on_close(close_status_code, close_msg)
+    # WebSocket发生错误的回调函数
+    def on_error(self, ws, error):
+        print(f'websocket closed due to {error}')
+        raise Exception(f'websocket closed due to {error}')
+    # 获取上一个任务的taskId
+    def get_session_id(self):
+        return self.session_id
+    def get_last_message(self):
+        return self.last_message
+    def get_last_response_id(self):
+        return self.last_response_id
+    def get_first_audio_delay(self):
+        return self.last_first_audio_delay

dashscope/embeddings/text_embedding.py CHANGED Viewed

@@ -15,6 +15,7 @@ class TextEmbedding(BaseApi):
         text_embedding_v1 = 'text-embedding-v1'
         text_embedding_v2 = 'text-embedding-v2'
         text_embedding_v3 = 'text-embedding-v3'
+        text_embedding_v4 = 'text-embedding-v4'
     @classmethod
     def call(cls,

dashscope/multimodal/multimodal_request_params.py CHANGED Viewed

@@ -83,12 +83,15 @@ class Upstream:
     # sample_rate: int  # 合成音频采样率
     def to_dict(self):
-        return {
+        upstream: dict = {
             "type": self.type,
             "mode": self.mode,
             "audio_format": self.audio_format,
             # "sample_rate": self.sample_rate
         }
+        if self.pass_through_params is not None:
+            upstream.update(self.pass_through_params)
+        return upstream
 @dataclass
@@ -105,6 +108,7 @@ class Downstream:
     volume: int = field(default=50)  # 语音音量 0-100
     pitch_rate: int = field(default=100)  # 语音语调 50-200
     speech_rate: int = field(default=100)  # 语音语速 50-200
+    pass_through_params: dict = field(default=None)
     def to_dict(self):
         stream: dict = {
@@ -120,6 +124,8 @@ class Downstream:
             stream["voice"] = self.voice
         if self.sample_rate != 0:
             stream["sample_rate"] = self.sample_rate
+        if self.pass_through_params is not None:
+            stream.update(self.pass_through_params)
         return stream
@@ -199,6 +205,7 @@ class BizParams:
     user_prompt_params: dict = field(default=None)
     user_query_params: dict = field(default=None)
     videos: list = field(default=None)
+    pass_through_params: dict = field(default=None)
     def to_dict(self):
         params = {}
@@ -214,6 +221,8 @@ class BizParams:
             params["user_query_params"] = self.user_query_params
         if self.videos is not None:
             params["videos"] = self.videos
+        if self.pass_through_params is not None:
+            params.update(self.pass_through_params)
         return params

dashscope/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-__version__ = '1.23.7'
+__version__ = '1.23.9'

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dashscope
-Version: 1.23.7
+Version: 1.23.9
 Summary: dashscope client sdk library
 Home-page: https://dashscope.aliyun.com/
 Author: Alibaba Cloud

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ dashscope/cli.py,sha256=amegoTkGOs6TlHMdoo4JVOqBePo3lGs745rc7leEyrE,24020
 dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
 dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
 dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
-dashscope/version.py,sha256=HwgMDxa-K4tdaSJnt84_qDc7qTyV4eqR_QjdjT4ulxc,74
+dashscope/version.py,sha256=ACHUqhomdzZ7aih_Y368NUmXBBfj2GY5GqSf2NoTta4,74
 dashscope/aigc/__init__.py,sha256=AuRhu_vA1K0tbs_C6DgcZYhTvxMuzDgpwHJNHzEPIHg,442
 dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
 dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
@@ -27,17 +27,21 @@ dashscope/app/application.py,sha256=Whf_ij4RHOaY12_xdS8uj8HVNCwkTp_MRdrFTryF1Kg,
 dashscope/app/application_response.py,sha256=z9BKcb9QuV_TzHkqtwaCwap2GQ1mP48uDD7gIc2il98,7038
 dashscope/assistants/__init__.py,sha256=hjCTuv13yFaXyUqlexAU-RaO0Ahq3P7VK9_LkSbkGVU,434
 dashscope/assistants/assistant_types.py,sha256=DQ_lOust10wjiV38Nlsu3HaGYNbGlJoaLjEza82Wotk,4262
-dashscope/assistants/assistants.py,sha256=_ADX4s8Mlfhfq7kb8xMLztNX_09NAW7DouvbVLY0jpw,10884
+dashscope/assistants/assistants.py,sha256=t8BGu9K2YbNpBenUyFFDMx5WpZ5x5OtbssJkj0xfVo0,10930
 dashscope/assistants/files.py,sha256=Ol2h7L2vNV8kgWqum2B-3B9vtLEHB_2KWt0K7e96Bmg,6750
-dashscope/audio/__init__.py,sha256=RKw-A1PLA5F4jiXI3AnuRSYeHFzQl2OW2ZGW6xyHS1Q,132
+dashscope/audio/__init__.py,sha256=7e3ejVsDJxEbMHN-9E0nEDfU-CnnQ4JgtgUxqNs0IG4,192
 dashscope/audio/asr/__init__.py,sha256=JoCenJAUVOQXPmAn1toKeFYCfc8BqNn0NKpqjuJvNJc,1055
 dashscope/audio/asr/asr_phrase_manager.py,sha256=vHOLExaKCtjedkihIu7gyfQyarR9rN5JZn79LvlCpco,7693
 dashscope/audio/asr/recognition.py,sha256=b_aAPvOKjpWdSiYhM_hp30sZ06QdmNBSDJwhiv78kHM,20932
 dashscope/audio/asr/transcription.py,sha256=lYzPjh7jJQwjMoxx8-AY0YCMBKNKO0bi7xd5tZGSHPc,9094
 dashscope/audio/asr/translation_recognizer.py,sha256=JgBmhkIl_kqH8uVwop6Fba5KlXccftKFrhaygN9PKjU,39680
 dashscope/audio/asr/vocabulary.py,sha256=N0pMS2x1lDxqJ14FgTGKctfuVkR2_hlEsCNWFcgYpTY,6717
+dashscope/audio/qwen_omni/__init__.py,sha256=MEFxmyxr5H6bW22l_R9073Pl6Ka6knvhrATGT-4UBjI,298
+dashscope/audio/qwen_omni/omni_realtime.py,sha256=eBmoOxuKcfzMHuXsQWCrIIKmso9iEzYylOeYZ5upv-w,14869
 dashscope/audio/qwen_tts/__init__.py,sha256=JS3axY1grqO0aTIJufZ3KS1JsU6yf6y4K2CQlNvUK9I,132
 dashscope/audio/qwen_tts/speech_synthesizer.py,sha256=7LHR-PXhn-VE1cCOp_82Jq0zE9rMc3xy3dszUeyLLNs,2927
+dashscope/audio/qwen_tts_realtime/__init__.py,sha256=vVkmeJr_mEAn_O0Rh5AU3ICg6qIZqppUryJ5lY8VYPo,254
+dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py,sha256=8bOAMcDasTHwSLb9xAGJoj9eUPpQHh2aWvonV6Kf1U4,10367
 dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO0I,245
 dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
 dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
@@ -62,14 +66,14 @@ dashscope/embeddings/__init__.py,sha256=XQ7vKr8oZM2CmdOduE53BWy6_Qpn9xUPkma64yw8
 dashscope/embeddings/batch_text_embedding.py,sha256=lVhvTS8McYfXuqt_8CmmhA6bPqD0nrGv965kjYG_j0E,8842
 dashscope/embeddings/batch_text_embedding_response.py,sha256=ZfkJMUq8GRsFA6XUTsiAsIySqGJH-VPi2P9Ba1KTU-s,2056
 dashscope/embeddings/multimodal_embedding.py,sha256=NwjQsdkKgUz51ozGjqFDzVlLcZjY0m1JNdH1EyAY0a4,4109
-dashscope/embeddings/text_embedding.py,sha256=4W1V-Ggj0WJhY5MdP2xoUTteXlWk8TUtI6y2gRUVCUk,2060
+dashscope/embeddings/text_embedding.py,sha256=2MPEyMB99xueDbvFg9kKAe8bgHMDEaFLaFa6GzDWDHg,2108
 dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
 dashscope/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
 dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
 dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
-dashscope/multimodal/multimodal_request_params.py,sha256=7A4UhsbYjcX7aAJwWI1xZEt0e1bSgPcu5pJAinaZyx0,7907
+dashscope/multimodal/multimodal_request_params.py,sha256=9Dlvyy0u67K5FtMfWkCRLHKsevTM8jvT2V-OljZP5sM,8350
 dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
 dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,9 +97,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
 dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
 dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dashscope/utils/oss_utils.py,sha256=L5LN3lN8etVxSL_jkZydstvEKpnTG9CY0zcvPGQ5LBo,7383
-dashscope-1.23.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-dashscope-1.23.7.dist-info/METADATA,sha256=JxFpTi5zU9f1O1uLoTH1OoV6iV3K3aAqshhD1MTDtFY,7123
-dashscope-1.23.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dashscope-1.23.7.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
-dashscope-1.23.7.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
-dashscope-1.23.7.dist-info/RECORD,,
+dashscope-1.23.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+dashscope-1.23.9.dist-info/METADATA,sha256=tQQhkm5TxNwI2TCmWzT11dwWd9UGFb7cI1cojbo9Dpw,7123
+dashscope-1.23.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dashscope-1.23.9.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
+dashscope-1.23.9.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
+dashscope-1.23.9.dist-info/RECORD,,

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dashscope-1.23.7.dist-info → dashscope-1.23.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

dashscope 1.23.7__py3-none-any.whl → 1.23.9__py3-none-any.whl

Potentially problematic release.

dashscope 1.23.7py3-none-any.whl → 1.23.9py3-none-any.whl