PyPI - dashscope - Versions diffs - 1.12.0__py3-none-any.whl → 1.13.1__py3-none-any.whl - Mend

dashscope 1.12.0py3-none-any.whl → 1.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dashscope might be problematic. Click here for more details.

Files changed (32) hide show

dashscope/__init__.py +8 -27
dashscope/aigc/code_generation.py +14 -17
dashscope/aigc/conversation.py +26 -20
dashscope/aigc/generation.py +11 -1
dashscope/aigc/multimodal_conversation.py +39 -4
dashscope/api_entities/api_request_data.py +2 -2
dashscope/api_entities/api_request_factory.py +4 -10
dashscope/api_entities/dashscope_response.py +18 -9
dashscope/audio/asr/__init__.py +5 -1
dashscope/audio/asr/asr_phrase_manager.py +179 -0
dashscope/audio/asr/recognition.py +61 -3
dashscope/audio/asr/transcription.py +55 -2
dashscope/client/base_api.py +13 -8
dashscope/common/constants.py +5 -2
dashscope/common/error.py +4 -0
dashscope/common/utils.py +12 -2
dashscope/embeddings/batch_text_embedding.py +3 -2
dashscope/embeddings/multimodal_embedding.py +37 -9
dashscope/embeddings/text_embedding.py +1 -0
dashscope/finetune.py +2 -0
dashscope/nlp/understanding.py +11 -16
dashscope/tokenizers/__init__.py +1 -1
dashscope/utils/__init__.py +0 -0
dashscope/utils/oss_utils.py +133 -0
dashscope/version.py +1 -1
{dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/METADATA +1 -1
dashscope-1.13.1.dist-info/RECORD +59 -0
dashscope-1.12.0.dist-info/RECORD +0 -56
{dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/LICENSE +0 -0
{dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/WHEEL +0 -0
{dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/entry_points.txt +0 -0
{dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/top_level.txt +0 -0

dashscope/audio/asr/recognition.py CHANGED Viewed

@@ -112,8 +112,17 @@ class Recognition(BaseApi):
         sample_rate (int): The input audio sample rate for speech recognition.
         **kwargs:
+            phrase_id (list, `optional`): The ID of phrase.
             disfluency_removal_enabled(bool, `optional`): Filter mood words,
                 turned off by default.
+            diarization_enabled (bool, `optional`): Speech auto diarization,
+                turned off by default.
+            speaker_count (int, `optional`): The number of speakers.
+            timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
+                calibration, turned off by default.
+            special_word_filter(str, `optional`): Sensitive word filter.
+            audio_event_detection_enabled(bool, `optional`):
+                Audio event detection, turned off by default.
     Raises:
         InputRequired: Input is required.
@@ -190,6 +199,15 @@ class Recognition(BaseApi):
     def __launch_request(self):
         """Initiate real-time speech recognition requests.
         """
+        resources_list: list = []
+        if self._phrase is not None and len(self._phrase) > 0:
+            item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
+            resources_list.append(item)
+            if len(resources_list) > 0:
+                self._kwargs['resources'] = resources_list
+        self._tidy_kwargs()
         task_name, _ = _get_task_group_and_task(__name__)
         responses = super().call(model=self.model,
                                  task_group='audio',
@@ -205,10 +223,25 @@ class Recognition(BaseApi):
                                  **self._kwargs)
         return responses
-    def start(self):
+    def start(self, phrase_id: str = None, **kwargs):
         """Real-time speech recognition in asynchronous mode.
            Please call 'stop()' after you have completed recognition.
+        Args:
+            phrase_id (str, `optional`): The ID of phrase.
+            **kwargs:
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
         Raises:
             InvalidParameter: This interface cannot be called again
                 if it has already been started.
@@ -219,6 +252,8 @@ class Recognition(BaseApi):
         if self._running:
             raise InvalidParameter('Speech recognition has started.')
+        self._phrase = phrase_id
+        self._kwargs.update(**kwargs)
         self._recognition_once = False
         self._worker = threading.Thread(target=self.__receive_worker)
         self._worker.start()
@@ -234,11 +269,27 @@ class Recognition(BaseApi):
             self._running = False
             raise InvalidTask('Invalid task, task create failed.')
-    def call(self, file: str) -> RecognitionResult:
+    def call(self,
+             file: str,
+             phrase_id: str = None,
+             **kwargs) -> RecognitionResult:
         """Real-time speech recognition in synchronous mode.
         Args:
             file (str): The path to the local audio file.
+            phrase_id (str, `optional`): The ID of phrase.
+            **kwargs:
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
         Raises:
             InvalidParameter: This interface cannot be called again
@@ -258,12 +309,14 @@ class Recognition(BaseApi):
             raise FileNotFoundError('No such file or directory: ' + file)
         self._recognition_once = True
+        self._stream_data.clear()
+        self._phrase = phrase_id
+        self._kwargs.update(**kwargs)
         error_flag: bool = False
         sentences: List[Any] = []
         usages: List[Any] = []
         response: RecognitionResponse = None
         result: RecognitionResult = None
-        self._stream_data.clear()
         try:
             audio_data: bytes = None
@@ -349,6 +402,11 @@ class Recognition(BaseApi):
         self._stream_data = self._stream_data + [buffer]
+    def _tidy_kwargs(self):
+        for k in self._kwargs.copy():
+            if self._kwargs[k] is None:
+                self._kwargs.pop(k, None)
     def _input_stream_cycle(self):
         while self._running:
             while len(self._stream_data) == 0:

dashscope/audio/asr/transcription.py CHANGED Viewed

@@ -27,6 +27,7 @@ class Transcription(BaseAsyncApi):
     def call(cls,
              model: str,
              file_urls: List[str],
+             phrase_id: str = None,
              api_key: str = None,
              **kwargs) -> TranscriptionResponse:
         """Transcribe the given files synchronously.
@@ -34,11 +35,27 @@ class Transcription(BaseAsyncApi):
         Args:
             model (str): The requested model_id.
             file_urls (List[str]): List of stored URLs.
-            channel_id (List[int], optional): The selected channel_id of audio file. # noqa: E501
+            phrase_id (str, `optional`): The ID of phrase.
+            **kwargs:
+                channel_id (List[int], optional):
+                    The selected channel_id of audio file.
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
         Returns:
             TranscriptionResponse: The result of batch transcription.
         """
+        kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
+        kwargs = cls._tidy_kwargs(**kwargs)
         response = super().call(model, file_urls, api_key=api_key, **kwargs)
         return TranscriptionResponse.from_api_response(response)
@@ -46,6 +63,7 @@ class Transcription(BaseAsyncApi):
     def async_call(cls,
                    model: str,
                    file_urls: List[str],
+                   phrase_id: str = None,
                    api_key: str = None,
                    **kwargs) -> TranscriptionResponse:
         """Transcribe the given files asynchronously,
@@ -54,11 +72,27 @@ class Transcription(BaseAsyncApi):
         Args:
             model (str): The requested model, such as paraformer-16k-1
             file_urls (List[str]): List of stored URLs.
-            channel_id (List[int], optional): The selected channel_id of audio file. # noqa: E501
+            phrase_id (str, `optional`): The ID of phrase.
+        **kwargs:
+            channel_id (List[int], optional):
+                The selected channel_id of audio file.
+            disfluency_removal_enabled(bool, `optional`):
+                Filter mood words, turned off by default.
+            diarization_enabled (bool, `optional`):
+                Speech auto diarization, turned off by default.
+            speaker_count (int, `optional`): The number of speakers.
+            timestamp_alignment_enabled (bool, `optional`):
+                Timestamp-alignment calibration, turned off by default.
+            special_word_filter(str, `optional`): Sensitive word filter.
+            audio_event_detection_enabled(bool, `optional`):
+                Audio event detection, turned off by default.
         Returns:
             TranscriptionResponse: The response including task_id.
         """
+        kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
+        kwargs = cls._tidy_kwargs(**kwargs)
         response = cls._launch_request(model,
                                        file_urls,
                                        api_key=api_key,
@@ -154,3 +188,22 @@ class Transcription(BaseAsyncApi):
             break
         return response
+    @classmethod
+    def _fill_resource_id(cls, phrase_id: str, **kwargs):
+        resources_list: list = []
+        if phrase_id is not None and len(phrase_id) > 0:
+            item = {'resource_id': phrase_id, 'resource_type': 'asr_phrase'}
+            resources_list.append(item)
+            if len(resources_list) > 0:
+                kwargs['resources'] = resources_list
+        return kwargs
+    @classmethod
+    def _tidy_kwargs(cls, **kwargs):
+        for k in kwargs.copy():
+            if kwargs[k] is None:
+                kwargs.pop(k, None)
+        return kwargs

dashscope/client/base_api.py CHANGED Viewed

@@ -2,9 +2,8 @@ import time
 from http import HTTPStatus
 from typing import List, Union
-import requests
 import dashscope
+import requests
 from dashscope.api_entities.api_request_factory import _build_api_request
 from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
 from dashscope.common.api_key import get_default_api_key
@@ -231,8 +230,10 @@ class BaseAsyncApi(AsyncTaskGetMixin):
                 for example: 20230420000000. Defaults to None.
             end_time (str, optional): The tasks end time,
                 for example: 20230420000000. Defaults to None.
-            model_name (str, optional): The tasks model name. Defaults to None.
-            api_key_id (str, optional): The tasks api-key-id. Defaults to None.
+            model_name (str, optional): The tasks model name.
+                Defaults to None.
+            api_key_id (str, optional): The tasks api-key-id.
+                Defaults to None.
             region (str, optional): The service region,
                 for example: cn-beijing. Defaults to None.
             status (str, optional): The status of tasks[PENDING,
@@ -321,9 +322,12 @@ class BaseAsyncApi(AsyncTaskGetMixin):
         step = 0
         while True:
             step += 1
-            # we start by querying once every second, and double the query interval after
-            # every 3(increment_steps) intervals, until we hit the max waiting interval of 5(seconds）
-            # TODO: investigate if we can use long-poll (server side return immediately when ready)
+            # we start by querying once every second, and double
+            # the query interval after every 3(increment_steps)
+            # intervals, until we hit the max waiting interval
+            # of 5(seconds）
+            # TODO: investigate if we can use long-poll
+            # (server side return immediately when ready)
             if wait_seconds < max_wait_seconds and step % increment_steps == 0:
                 wait_seconds = min(wait_seconds * 2, max_wait_seconds)
             rsp = cls._get(task_id, api_key)
@@ -460,6 +464,7 @@ class GetMixin():
     def get(cls,
             target,
             api_key: str = None,
+            params: dict = {},
             **kwargs) -> DashScopeAPIResponse:
         """Get object information.
@@ -473,7 +478,7 @@ class GetMixin():
         """
         url = join_url(dashscope.base_http_api_url, cls.SUB_PATH.lower(),
                        target)
-        return _get(url, api_key, **kwargs)
+        return _get(url, api_key=api_key, params=params, **kwargs)
 class GetStatusMixin():

dashscope/common/constants.py CHANGED Viewed

@@ -28,9 +28,12 @@ SERVICE_503_MESSAGE = 'Service temporarily unavailable, possibly overloaded or n
 WEBSOCKET_ERROR_CODE = 44
 SSE_CONTENT_TYPE = 'text/event-stream'
 DEPRECATED_MESSAGE = 'history and auto_history are deprecated for qwen serial models and will be remove in future, use messages'  # noqa E501
-SCENE = "scene"
+SCENE = 'scene'
 MESSAGE = 'message'
+REQUEST_CONTENT_TEXT = 'text'
+REQUEST_CONTENT_IMAGE = 'image'
+REQUEST_CONTENT_AUDIO = 'audio'
+FILE_PATH_SCHEMA = 'file://'
 REPEATABLE_STATUS = [
     HTTPStatus.SERVICE_UNAVAILABLE, HTTPStatus.GATEWAY_TIMEOUT

dashscope/common/error.py CHANGED Viewed

@@ -98,3 +98,7 @@ class UnsupportedHTTPMethod(Exception):
 class AsyncTaskCreateFailed(Exception):
     pass
+class UploadFileException(Exception):
+    pass

dashscope/common/utils.py CHANGED Viewed

@@ -8,7 +8,6 @@ from urllib.parse import urlparse
 import aiohttp
 import requests
 from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
 from dashscope.common.api_key import get_default_api_key
 from dashscope.version import __version__
@@ -97,6 +96,16 @@ def async_to_sync(async_generator):
         yield message
+def get_user_agent():
+    ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
+        __version__,
+        platform.python_version(),
+        platform.platform(),
+        platform.processor(),
+    )
+    return ua
 def default_headers(api_key: str = None) -> Dict[str, str]:
     ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
         __version__,
@@ -117,7 +126,8 @@ def join_url(base_url, *args):
         base_url = base_url + '/'
     url = base_url
     for arg in args:
-        url += arg + '/'
+        if arg:
+            url += arg + '/'
     return url[:-1]

dashscope/embeddings/batch_text_embedding.py CHANGED Viewed

@@ -15,6 +15,7 @@ class BatchTextEmbedding(BaseAsyncApi):
     """
     class Models:
         text_embedding_async_v1 = 'text-embedding-async-v1'
+        text_embedding_async_v2 = 'text-embedding-async-v2'
     @classmethod
     def call(cls,
@@ -26,7 +27,7 @@ class BatchTextEmbedding(BaseAsyncApi):
         Args:
             model (str): The model, reference ``Models``.
-            url (Any): The async request file url, which contains text
+            url (Any): The async request file url, which contains text
                 to embedding line by line.
             api_key (str, optional): The api api_key. Defaults to None.
             **kwargs:
@@ -58,7 +59,7 @@ class BatchTextEmbedding(BaseAsyncApi):
         Args:
             model (str): The model, reference ``Models``.
-            url (Any): The async request file url, which contains text
+            url (Any): The async request file url, which contains text
                 to embedding line by line.
             api_key (str, optional): The api api_key. Defaults to None.
             **kwargs:

dashscope/embeddings/multimodal_embedding.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from dataclasses import dataclass
 from typing import List
-from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
+from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
                                                        DictMixin)
 from dashscope.client.base_api import BaseApi
 from dashscope.common.error import InputRequired, ModelRequired
 from dashscope.common.utils import _get_task_group_and_task
+from dashscope.utils.oss_utils import preprocess_message_element
 @dataclass(init=False)
@@ -15,30 +16,34 @@ class MultiModalEmbeddingItemBase(DictMixin):
     def __init__(self, factor: float, **kwargs):
         super().__init__(factor=factor, **kwargs)
 @dataclass(init=False)
 class MultiModalEmbeddingItemText(MultiModalEmbeddingItemBase):
     text: str
-    def __init__(self, text: str,  factor: float, **kwargs):
+    def __init__(self, text: str, factor: float, **kwargs):
         super().__init__(factor, **kwargs)
         self.text = text
 @dataclass(init=False)
 class MultiModalEmbeddingItemImage(MultiModalEmbeddingItemBase):
     image: str
     def __init__(self, image: str, factor: float, **kwargs):
         super().__init__(factor, **kwargs)
         self.image = image
 @dataclass(init=False)
 class MultiModalEmbeddingItemAudio(MultiModalEmbeddingItemBase):
     audio: str
     def __init__(self, audio: str, factor: float, **kwargs):
         super().__init__(factor, **kwargs)
         self.audio = audio
 class MultiModalEmbedding(BaseApi):
     task = 'multimodal-embedding'
@@ -46,7 +51,10 @@ class MultiModalEmbedding(BaseApi):
         multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
     @classmethod
-    def call(cls, model: str, input: List[MultiModalEmbeddingItemBase],
+    def call(cls,
+             model: str,
+             input: List[MultiModalEmbeddingItemBase],
+             api_key: str = None,
              **kwargs) -> DashScopeAPIResponse:
         """Get embedding multimodal contents..
@@ -55,7 +63,7 @@ class MultiModalEmbedding(BaseApi):
             input (List[MultiModalEmbeddingElement]): The embedding elements,
                 every element include data, modal, factor field.
             **kwargs:
-                auto_truncation(bool, `optional`): Automatically truncate
+                auto_truncation(bool, `optional`): Automatically truncate
                 audio longer than 15 seconds or text longer than 70 words.
                 Default to false(Too long input will result in failure).
@@ -67,6 +75,11 @@ class MultiModalEmbedding(BaseApi):
         if model is None or not model:
             raise ModelRequired('Model is required!')
         embedding_input = {}
+        has_upload = cls._preprocess_message_inputs(model, input, api_key)
+        if has_upload:
+            headers = kwargs.pop('headers', {})
+            headers['X-DashScope-OssResourceResolve'] = 'enable'
+            kwargs['headers'] = headers
         embedding_input['contents'] = input
         kwargs.pop('stream', False)  # not support streaming output.
         task_group, function = _get_task_group_and_task(__name__)
@@ -76,3 +89,18 @@ class MultiModalEmbedding(BaseApi):
                             task=MultiModalEmbedding.task,
                             function=function,
                             **kwargs)
+    @classmethod
+    def _preprocess_message_inputs(cls, model: str, input: List[dict],
+                                   api_key: str):
+        """preprocess following inputs
+        input = [{'factor': 1, 'text': 'hello'},
+                {'factor': 2, 'audio': ''},
+                {'factor': 3, 'image': ''}]
+        """
+        has_upload = False
+        for elem in input:
+            is_upload = preprocess_message_element(model, elem, api_key)
+            if is_upload and not has_upload:
+                has_upload = True
+        return has_upload

dashscope/embeddings/text_embedding.py CHANGED Viewed

@@ -11,6 +11,7 @@ class TextEmbedding(BaseApi):
     class Models:
         text_embedding_v1 = 'text-embedding-v1'
+        text_embedding_v2 = 'text-embedding-v2'
     @classmethod
     def call(cls, model: str, input: Union[str, List[str]],

dashscope/finetune.py CHANGED Viewed

@@ -43,6 +43,8 @@ class FineTune(CreateMixin, CancelMixin, DeleteMixin, ListMixin,
         }
         if mode is not None:
             request['training_type'] = mode
+        if 'finetuned_output' in kwargs:
+            request['finetuned_output'] = kwargs['finetuned_output']
         return super().call(request, api_key, **kwargs)
     @classmethod

dashscope/nlp/understanding.py CHANGED Viewed

@@ -10,20 +10,17 @@ class Understanding(BaseApi):
     """API for AI-Generated Content(AIGC) models.
     """
     class Models:
         opennlu_v1 = 'opennlu-v1'
     @classmethod
-    def call(
-            cls,
-            model: str,
-            sentence: str = None,
-            labels: str = None,
-            task: str = None,
-            api_key: str = None,
-            **kwargs
-    ) -> DashScopeAPIResponse:
+    def call(cls,
+             model: str,
+             sentence: str = None,
+             labels: str = None,
+             task: str = None,
+             api_key: str = None,
+             **kwargs) -> DashScopeAPIResponse:
         """Call generation model service.
         Args:
@@ -37,7 +34,8 @@ class Understanding(BaseApi):
         Returns:
             DashScopeAPIResponse: The understanding result.
         """
-        if (sentence is None or not sentence) or (labels is None or not labels):
+        if (sentence is None or not sentence) or (labels is None
+                                                  or not labels):
             raise InputRequired('sentence and labels is required!')
         if model is None or not model:
             raise ModelRequired('Model is required!')
@@ -57,11 +55,8 @@ class Understanding(BaseApi):
     @classmethod
     def _build_input_parameters(cls, model, sentence, labels, task, **kwargs):
         parameters = {}
-        input = {
-            "sentence": sentence,
-            "labels": labels
-        }
+        input = {'sentence': sentence, 'labels': labels}
         if task is not None and task:
-            input["task"] = task
+            input['task'] = task
         return input, {**parameters, **kwargs}

dashscope/tokenizers/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@ from .tokenization import Tokenization
 __all__ = [
     Tokenization,
-]
+]

dashscope/utils/__init__.py ADDED Viewed

File without changes

dashscope 1.12.0__py3-none-any.whl → 1.13.1__py3-none-any.whl

Potentially problematic release.

dashscope 1.12.0py3-none-any.whl → 1.13.1py3-none-any.whl