PyPI - dashscope - Versions diffs - 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl - Mend

dashscope 1.8.0py3-none-any.whl → 1.25.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

dashscope/__init__.py +61 -14
dashscope/aigc/__init__.py +10 -3
dashscope/aigc/chat_completion.py +282 -0
dashscope/aigc/code_generation.py +145 -0
dashscope/aigc/conversation.py +71 -12
dashscope/aigc/generation.py +288 -16
dashscope/aigc/image_synthesis.py +473 -31
dashscope/aigc/multimodal_conversation.py +299 -14
dashscope/aigc/video_synthesis.py +610 -0
dashscope/api_entities/aiohttp_request.py +8 -5
dashscope/api_entities/api_request_data.py +4 -2
dashscope/api_entities/api_request_factory.py +68 -20
dashscope/api_entities/base_request.py +20 -3
dashscope/api_entities/chat_completion_types.py +344 -0
dashscope/api_entities/dashscope_response.py +243 -15
dashscope/api_entities/encryption.py +179 -0
dashscope/api_entities/http_request.py +216 -62
dashscope/api_entities/websocket_request.py +43 -34
dashscope/app/__init__.py +5 -0
dashscope/app/application.py +203 -0
dashscope/app/application_response.py +246 -0
dashscope/assistants/__init__.py +16 -0
dashscope/assistants/assistant_types.py +175 -0
dashscope/assistants/assistants.py +311 -0
dashscope/assistants/files.py +197 -0
dashscope/audio/__init__.py +4 -2
dashscope/audio/asr/__init__.py +17 -1
dashscope/audio/asr/asr_phrase_manager.py +203 -0
dashscope/audio/asr/recognition.py +167 -27
dashscope/audio/asr/transcription.py +107 -14
dashscope/audio/asr/translation_recognizer.py +1006 -0
dashscope/audio/asr/vocabulary.py +177 -0
dashscope/audio/qwen_asr/__init__.py +7 -0
dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
dashscope/audio/qwen_omni/__init__.py +11 -0
dashscope/audio/qwen_omni/omni_realtime.py +524 -0
dashscope/audio/qwen_tts/__init__.py +5 -0
dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
dashscope/audio/tts/__init__.py +2 -0
dashscope/audio/tts/speech_synthesizer.py +5 -0
dashscope/audio/tts_v2/__init__.py +12 -0
dashscope/audio/tts_v2/enrollment.py +179 -0
dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
dashscope/cli.py +157 -37
dashscope/client/base_api.py +652 -87
dashscope/common/api_key.py +2 -0
dashscope/common/base_type.py +135 -0
dashscope/common/constants.py +13 -16
dashscope/common/env.py +2 -0
dashscope/common/error.py +58 -22
dashscope/common/logging.py +2 -0
dashscope/common/message_manager.py +2 -0
dashscope/common/utils.py +276 -46
dashscope/customize/__init__.py +0 -0
dashscope/customize/customize_types.py +192 -0
dashscope/customize/deployments.py +146 -0
dashscope/customize/finetunes.py +234 -0
dashscope/embeddings/__init__.py +5 -1
dashscope/embeddings/batch_text_embedding.py +208 -0
dashscope/embeddings/batch_text_embedding_response.py +65 -0
dashscope/embeddings/multimodal_embedding.py +118 -10
dashscope/embeddings/text_embedding.py +13 -1
dashscope/{file.py → files.py} +19 -4
dashscope/io/input_output.py +2 -0
dashscope/model.py +11 -2
dashscope/models.py +43 -0
dashscope/multimodal/__init__.py +20 -0
dashscope/multimodal/dialog_state.py +56 -0
dashscope/multimodal/multimodal_constants.py +28 -0
dashscope/multimodal/multimodal_dialog.py +648 -0
dashscope/multimodal/multimodal_request_params.py +313 -0
dashscope/multimodal/tingwu/__init__.py +10 -0
dashscope/multimodal/tingwu/tingwu.py +80 -0
dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
dashscope/nlp/__init__.py +0 -0
dashscope/nlp/understanding.py +64 -0
dashscope/protocol/websocket.py +3 -0
dashscope/rerank/__init__.py +0 -0
dashscope/rerank/text_rerank.py +69 -0
dashscope/resources/qwen.tiktoken +151643 -0
dashscope/threads/__init__.py +26 -0
dashscope/threads/messages/__init__.py +0 -0
dashscope/threads/messages/files.py +113 -0
dashscope/threads/messages/messages.py +220 -0
dashscope/threads/runs/__init__.py +0 -0
dashscope/threads/runs/runs.py +501 -0
dashscope/threads/runs/steps.py +112 -0
dashscope/threads/thread_types.py +665 -0
dashscope/threads/threads.py +212 -0
dashscope/tokenizers/__init__.py +7 -0
dashscope/tokenizers/qwen_tokenizer.py +111 -0
dashscope/tokenizers/tokenization.py +125 -0
dashscope/tokenizers/tokenizer.py +45 -0
dashscope/tokenizers/tokenizer_base.py +32 -0
dashscope/utils/__init__.py +0 -0
dashscope/utils/message_utils.py +838 -0
dashscope/utils/oss_utils.py +243 -0
dashscope/utils/param_utils.py +29 -0
dashscope/version.py +3 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
dashscope-1.25.6.dist-info/RECORD +112 -0
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
dashscope/deployment.py +0 -129
dashscope/finetune.py +0 -149
dashscope-1.8.0.dist-info/RECORD +0 -49
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0

dashscope/aigc/multimodal_conversation.py CHANGED Viewed

@@ -1,9 +1,17 @@
-from typing import Generator, List, Union
+# Copyright (c) Alibaba, Inc. and its affiliates.
-from dashscope.api_entities.dashscope_response import (MultiModalConversationResponse)
-from dashscope.client.base_api import BaseApi
+import copy
+from typing import AsyncGenerator, Generator, List, Union
+from dashscope.api_entities.dashscope_response import \
+    MultiModalConversationResponse
+from dashscope.client.base_api import BaseAioApi, BaseApi
 from dashscope.common.error import InputRequired, ModelRequired
 from dashscope.common.utils import _get_task_group_and_task
+from dashscope.utils.oss_utils import preprocess_message_element
+from dashscope.utils.param_utils import ParamUtil
+from dashscope.utils.message_utils import merge_multimodal_single_response
 class MultiModalConversation(BaseApi):
     """MultiModal conversational robot interface.
@@ -18,10 +26,15 @@ class MultiModalConversation(BaseApi):
     def call(
         cls,
         model: str,
-        messages: List,
+        messages: List = None,
         api_key: str = None,
+        workspace: str = None,
+        text: str = None,
+        voice: str = None,
+        language_type: str = None,
         **kwargs
-    ) -> Union[MultiModalConversationResponse, Generator[MultiModalConversationResponse, None, None]]:
+    ) -> Union[MultiModalConversationResponse, Generator[
+            MultiModalConversationResponse, None, None]]:
         """Call the conversation model service.
         Args:
@@ -46,20 +59,26 @@ class MultiModalConversation(BaseApi):
             api_key (str, optional): The api api_key, can be None,
                 if None, will retrieve by rule [1].
                 [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
+            workspace (str): The dashscope workspace id.
+            text (str): The text to generate.
+            voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
+                    you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
+            language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
             **kwargs:
                 stream(bool, `optional`): Enable server-sent events
                     (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
-                    the result will back partially[qwen-v1,bailian-v1].
+                    the result will back partially[qwen-turbo,bailian-v1].
                 max_length(int, `optional`): The maximum length of tokens to
                     generate. The token count of your prompt plus max_length
                     cannot exceed the model's context length. Most models
-                    have a context length of 2000 tokens[qwen-v1,bailian-v1]. # noqa E501
+                    have a context length of 2000 tokens[qwen-turbo,bailian-v1]. # noqa E501
                 top_p(float, `optional`): A sampling strategy, called nucleus
                     sampling, where the model considers the results of the
                     tokens with top_p probability mass. So 0.1 means only
                     the tokens comprising the top 10% probability mass are
-                    considered[qwen-v1,bailian-v1].
-                top_k(float, `optional`):
+                    considered[qwen-turbo,bailian-v1].
+                top_k(float, `optional`):
         Raises:
             InvalidInput: The history and auto_history are mutually exclusive.
@@ -69,22 +88,288 @@ class MultiModalConversation(BaseApi):
                   Generator[MultiModalConversationResponse, None, None]]: If
             stream is True, return Generator, otherwise MultiModalConversationResponse.
         """
-        if (messages is None or not messages):
-            raise InputRequired('prompt or messages is required!')
         if model is None or not model:
             raise ModelRequired('Model is required!')
         task_group, _ = _get_task_group_and_task(__name__)
-        input = {'messages': messages}
+        input = {}
+        msg_copy = None
+        if messages is not None and messages:
+            msg_copy = copy.deepcopy(messages)
+            has_upload = cls._preprocess_messages(model, msg_copy, api_key)
+            if has_upload:
+                headers = kwargs.pop('headers', {})
+                headers['X-DashScope-OssResourceResolve'] = 'enable'
+                kwargs['headers'] = headers
+        if text is not None and text:
+            input.update({'text': text})
+        if voice is not None and voice:
+            input.update({'voice': voice})
+        if language_type is not None and language_type:
+            input.update({'language_type': language_type})
+        if msg_copy is not None:
+            input.update({'messages': msg_copy})
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
+        is_stream = kwargs.get('stream', False)
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is not None and is_incremental_output is False):
+            to_merge_incremental_output = True
+            kwargs['incremental_output'] = True
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in kwargs:
+            kwargs['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        kwargs['headers']['user-agent'] = f'incremental_to_full/{flag}'
         response = super().call(model=model,
                                 task_group=task_group,
                                 task=MultiModalConversation.task,
                                 function=MultiModalConversation.function,
                                 api_key=api_key,
                                 input=input,
+                                workspace=workspace,
                                 **kwargs)
+        if is_stream:
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = kwargs.get('n', 1)
+                return cls._merge_multimodal_response(response, n)
+            else:
+                return (MultiModalConversationResponse.from_api_response(rsp)
+                        for rsp in response)
+        else:
+            return MultiModalConversationResponse.from_api_response(response)
+    @classmethod
+    def _preprocess_messages(cls, model: str, messages: List[dict],
+                             api_key: str):
+        """
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"image": ""},
+                        {"text": ""},
+                    ]
+                }
+            ]
+        """
+        has_upload = False
+        upload_certificate = None
+        for message in messages:
+            content = message['content']
+            for elem in content:
+                if not isinstance(elem,
+                                  (int, float, bool, str, bytes, bytearray)):
+                    is_upload, upload_certificate = preprocess_message_element(
+                        model, elem, api_key, upload_certificate)
+                    if is_upload and not has_upload:
+                        has_upload = True
+        return has_upload
+    @classmethod
+    def _merge_multimodal_response(cls, response, n=1) -> Generator[MultiModalConversationResponse, None, None]:
+        """Merge incremental response chunks to simulate non-incremental output."""
+        accumulated_data = {}
+        for rsp in response:
+            parsed_response = MultiModalConversationResponse.from_api_response(rsp)
+            result = merge_multimodal_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp
+class AioMultiModalConversation(BaseAioApi):
+    """Async MultiModal conversational robot interface.
+    """
+    task = 'multimodal-generation'
+    function = 'generation'
+    class Models:
+        qwen_vl_chat_v1 = 'qwen-vl-chat-v1'
+    @classmethod
+    async def call(
+        cls,
+        model: str,
+        messages: List = None,
+        api_key: str = None,
+        workspace: str = None,
+        text: str = None,
+        voice: str = None,
+        language_type: str = None,
+        **kwargs
+    ) -> Union[MultiModalConversationResponse, AsyncGenerator[
+            MultiModalConversationResponse, None]]:
+        """Call the conversation model service asynchronously.
+        Args:
+            model (str): The requested model, such as 'qwen-multimodal-v1'
+            messages (list): The generation messages.
+                examples:
+                    [
+                        {
+                            "role": "system",
+                            "content": [
+                                {"text": "你是达摩院的生活助手机器人。"}
+                            ]
+                        },
+                        {
+                            "role": "user",
+                            "content": [
+                                {"image": "http://XXXX"},
+                                {"text": "这个图片是哪里？"},
+                            ]
+                        }
+                    ]
+            api_key (str, optional): The api api_key, can be None,
+                if None, will retrieve by rule [1].
+                [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
+            workspace (str): The dashscope workspace id.
+            text (str): The text to generate.
+            voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
+                    you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
+            language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
+            **kwargs:
+                stream(bool, `optional`): Enable server-sent events
+                    (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
+                    the result will back partially[qwen-turbo,bailian-v1].
+                max_length(int, `optional`): The maximum length of tokens to
+                    generate. The token count of your prompt plus max_length
+                    cannot exceed the model's context length. Most models
+                    have a context length of 2000 tokens[qwen-turbo,bailian-v1]. # noqa E501
+                top_p(float, `optional`): A sampling strategy, called nucleus
+                    sampling, where the model considers the results of the
+                    tokens with top_p probability mass. So 0.1 means only
+                    the tokens comprising the top 10% probability mass are
+                    considered[qwen-turbo,bailian-v1].
+                top_k(float, `optional`):
+        Raises:
+            InvalidInput: The history and auto_history are mutually exclusive.
+        Returns:
+            Union[MultiModalConversationResponse,
+                  AsyncGenerator[MultiModalConversationResponse, None]]: If
+            stream is True, return AsyncGenerator, otherwise MultiModalConversationResponse.
+        """
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        task_group, _ = _get_task_group_and_task(__name__)
+        input = {}
+        msg_copy = None
+        if messages is not None and messages:
+            msg_copy = copy.deepcopy(messages)
+            has_upload = cls._preprocess_messages(model, msg_copy, api_key)
+            if has_upload:
+                headers = kwargs.pop('headers', {})
+                headers['X-DashScope-OssResourceResolve'] = 'enable'
+                kwargs['headers'] = headers
+        if text is not None and text:
+            input.update({'text': text})
+        if voice is not None and voice:
+            input.update({'voice': voice})
+        if language_type is not None and language_type:
+            input.update({'language_type': language_type})
+        if msg_copy is not None:
+            input.update({'messages': msg_copy})
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
         is_stream = kwargs.get('stream', False)
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is not None and is_incremental_output is False):
+            to_merge_incremental_output = True
+            kwargs['incremental_output'] = True
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in kwargs:
+            kwargs['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        kwargs['headers']['user-agent'] = (
+            kwargs['headers'].get('user-agent', '') +
+            f'; incremental_to_full/{flag}'
+        )
+        response = await super().call(model=model,
+                                      task_group=task_group,
+                                      task=AioMultiModalConversation.task,
+                                      function=AioMultiModalConversation.function,
+                                      api_key=api_key,
+                                      input=input,
+                                      workspace=workspace,
+                                      **kwargs)
         if is_stream:
-            return (MultiModalConversationResponse.from_api_response(rsp)
-                    for rsp in response)
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = kwargs.get('n', 1)
+                return cls._merge_multimodal_response(response, n)
+            else:
+                return cls._stream_responses(response)
         else:
             return MultiModalConversationResponse.from_api_response(response)
+    @classmethod
+    def _preprocess_messages(cls, model: str, messages: List[dict],
+                             api_key: str):
+        """
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"image": ""},
+                        {"text": ""},
+                    ]
+                }
+            ]
+        """
+        has_upload = False
+        upload_certificate = None
+        for message in messages:
+            content = message['content']
+            for elem in content:
+                if not isinstance(elem,
+                                  (int, float, bool, str, bytes, bytearray)):
+                    is_upload, upload_certificate = preprocess_message_element(
+                        model, elem, api_key, upload_certificate)
+                    if is_upload and not has_upload:
+                        has_upload = True
+        return has_upload
+    @classmethod
+    async def _stream_responses(cls, response) -> AsyncGenerator[MultiModalConversationResponse, None]:
+        """Convert async response stream to MultiModalConversationResponse stream."""
+        # Type hint: when stream=True, response is actually an AsyncIterable
+        async for rsp in response:  # type: ignore
+            yield MultiModalConversationResponse.from_api_response(rsp)
+    @classmethod
+    async def _merge_multimodal_response(cls, response, n=1) -> AsyncGenerator[MultiModalConversationResponse, None]:
+        """Async version of merge incremental response chunks."""
+        accumulated_data = {}
+        async for rsp in response:
+            parsed_response = MultiModalConversationResponse.from_api_response(rsp)
+            result = merge_multimodal_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp

dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

dashscope 1.8.0py3-none-any.whl → 1.25.6py3-none-any.whl