PyPI - dashscope - Versions diffs - 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl - Mend

dashscope 1.8.0py3-none-any.whl → 1.25.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

dashscope/__init__.py +61 -14
dashscope/aigc/__init__.py +10 -3
dashscope/aigc/chat_completion.py +282 -0
dashscope/aigc/code_generation.py +145 -0
dashscope/aigc/conversation.py +71 -12
dashscope/aigc/generation.py +288 -16
dashscope/aigc/image_synthesis.py +473 -31
dashscope/aigc/multimodal_conversation.py +299 -14
dashscope/aigc/video_synthesis.py +610 -0
dashscope/api_entities/aiohttp_request.py +8 -5
dashscope/api_entities/api_request_data.py +4 -2
dashscope/api_entities/api_request_factory.py +68 -20
dashscope/api_entities/base_request.py +20 -3
dashscope/api_entities/chat_completion_types.py +344 -0
dashscope/api_entities/dashscope_response.py +243 -15
dashscope/api_entities/encryption.py +179 -0
dashscope/api_entities/http_request.py +216 -62
dashscope/api_entities/websocket_request.py +43 -34
dashscope/app/__init__.py +5 -0
dashscope/app/application.py +203 -0
dashscope/app/application_response.py +246 -0
dashscope/assistants/__init__.py +16 -0
dashscope/assistants/assistant_types.py +175 -0
dashscope/assistants/assistants.py +311 -0
dashscope/assistants/files.py +197 -0
dashscope/audio/__init__.py +4 -2
dashscope/audio/asr/__init__.py +17 -1
dashscope/audio/asr/asr_phrase_manager.py +203 -0
dashscope/audio/asr/recognition.py +167 -27
dashscope/audio/asr/transcription.py +107 -14
dashscope/audio/asr/translation_recognizer.py +1006 -0
dashscope/audio/asr/vocabulary.py +177 -0
dashscope/audio/qwen_asr/__init__.py +7 -0
dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
dashscope/audio/qwen_omni/__init__.py +11 -0
dashscope/audio/qwen_omni/omni_realtime.py +524 -0
dashscope/audio/qwen_tts/__init__.py +5 -0
dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
dashscope/audio/tts/__init__.py +2 -0
dashscope/audio/tts/speech_synthesizer.py +5 -0
dashscope/audio/tts_v2/__init__.py +12 -0
dashscope/audio/tts_v2/enrollment.py +179 -0
dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
dashscope/cli.py +157 -37
dashscope/client/base_api.py +652 -87
dashscope/common/api_key.py +2 -0
dashscope/common/base_type.py +135 -0
dashscope/common/constants.py +13 -16
dashscope/common/env.py +2 -0
dashscope/common/error.py +58 -22
dashscope/common/logging.py +2 -0
dashscope/common/message_manager.py +2 -0
dashscope/common/utils.py +276 -46
dashscope/customize/__init__.py +0 -0
dashscope/customize/customize_types.py +192 -0
dashscope/customize/deployments.py +146 -0
dashscope/customize/finetunes.py +234 -0
dashscope/embeddings/__init__.py +5 -1
dashscope/embeddings/batch_text_embedding.py +208 -0
dashscope/embeddings/batch_text_embedding_response.py +65 -0
dashscope/embeddings/multimodal_embedding.py +118 -10
dashscope/embeddings/text_embedding.py +13 -1
dashscope/{file.py → files.py} +19 -4
dashscope/io/input_output.py +2 -0
dashscope/model.py +11 -2
dashscope/models.py +43 -0
dashscope/multimodal/__init__.py +20 -0
dashscope/multimodal/dialog_state.py +56 -0
dashscope/multimodal/multimodal_constants.py +28 -0
dashscope/multimodal/multimodal_dialog.py +648 -0
dashscope/multimodal/multimodal_request_params.py +313 -0
dashscope/multimodal/tingwu/__init__.py +10 -0
dashscope/multimodal/tingwu/tingwu.py +80 -0
dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
dashscope/nlp/__init__.py +0 -0
dashscope/nlp/understanding.py +64 -0
dashscope/protocol/websocket.py +3 -0
dashscope/rerank/__init__.py +0 -0
dashscope/rerank/text_rerank.py +69 -0
dashscope/resources/qwen.tiktoken +151643 -0
dashscope/threads/__init__.py +26 -0
dashscope/threads/messages/__init__.py +0 -0
dashscope/threads/messages/files.py +113 -0
dashscope/threads/messages/messages.py +220 -0
dashscope/threads/runs/__init__.py +0 -0
dashscope/threads/runs/runs.py +501 -0
dashscope/threads/runs/steps.py +112 -0
dashscope/threads/thread_types.py +665 -0
dashscope/threads/threads.py +212 -0
dashscope/tokenizers/__init__.py +7 -0
dashscope/tokenizers/qwen_tokenizer.py +111 -0
dashscope/tokenizers/tokenization.py +125 -0
dashscope/tokenizers/tokenizer.py +45 -0
dashscope/tokenizers/tokenizer_base.py +32 -0
dashscope/utils/__init__.py +0 -0
dashscope/utils/message_utils.py +838 -0
dashscope/utils/oss_utils.py +243 -0
dashscope/utils/param_utils.py +29 -0
dashscope/version.py +3 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
dashscope-1.25.6.dist-info/RECORD +112 -0
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
dashscope/deployment.py +0 -129
dashscope/finetune.py +0 -149
dashscope-1.8.0.dist-info/RECORD +0 -49
{dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0

dashscope/aigc/conversation.py CHANGED Viewed

@@ -1,12 +1,14 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
 from copy import deepcopy
 from http import HTTPStatus
-from typing import Any, Generator, List, Union
+from typing import Any, Dict, Generator, List, Union
 from dashscope.api_entities.dashscope_response import (ConversationResponse,
                                                        Message, Role)
 from dashscope.client.base_api import BaseApi
-from dashscope.common.constants import (DEPRECATED_MESSAGE, HISTORY, MESSAGES,
-                                        PROMPT)
+from dashscope.common.constants import DEPRECATED_MESSAGE, HISTORY, PROMPT
 from dashscope.common.error import InputRequired, InvalidInput, ModelRequired
 from dashscope.common.logging import logger
 from dashscope.common.utils import _get_task_group_and_task
@@ -93,11 +95,16 @@ class Conversation(BaseApi):
     task = 'generation'
     class Models:
+        """@deprecated, use qwen_turbo instead"""
         qwen_v1 = 'qwen-v1'
+        """@deprecated, use qwen_plus instead"""
         qwen_plus_v1 = 'qwen-plus-v1'
-    def __init__(self,
-                 history: History = None) -> None:
+        qwen_turbo = 'qwen-turbo'
+        qwen_plus = 'qwen-plus'
+        qwen_max = 'qwen-max'
+    def __init__(self, history: History = None) -> None:
         """Init a chat.
         Args:
@@ -121,6 +128,8 @@ class Conversation(BaseApi):
         n_history: int = -1,
         api_key: str = None,
         messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        workspace: str = None,
         **kwargs
     ) -> Union[ConversationResponse, Generator[ConversationResponse, None,
                                                None]]:
@@ -145,25 +154,59 @@ class Conversation(BaseApi):
                     [{'role': 'user',
                       'content': 'The weather is fine today.'},
                       {'role': 'assistant', 'content': 'Suitable for outings'}]
-            **kwargs(qwen-v1, qawen-plus-v1):
+            plugins (Any): The plugin config, Can be plugins config str, or dict.
+            **kwargs(qwen-turbo, qwen-plus):
                 stream(bool, `optional`): Enable server-sent events
                     (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
                     the result will back partially.
-                max_length(int, `optional`): The maximum length of tokens to
-                    generate. The token count of your prompt plus max_length
-                    cannot exceed the model's context length. Most models
-                    have a context length of 2000 tokens.
+                temperature(float, `optional`): Used to control the degree
+                    of randomness and diversity. Specifically, the temperature
+                    value controls the degree to which the probability distribution
+                    of each candidate word is smoothed when generating text.
+                    A higher temperature value will reduce the peak value of
+                    the probability, allowing more low-probability words to be
+                    selected, and the generated results will be more diverse;
+                    while a lower temperature value will enhance the peak value
+                    of the probability, making it easier for high-probability
+                    words to be selected, the generated results are more
+                    deterministic,range(0, 2) .[qwen-turbo,qwen-plus].
                 top_p(float, `optional`): A sampling strategy, called nucleus
                     sampling, where the model considers the results of the
                     tokens with top_p probability mass. So 0.1 means only
                     the tokens comprising the top 10% probability mass are
                     considered.
+                top_k(int, `optional`): The size of the sample candidate set when generated.  # noqa E501
+                    For example, when the value is 50, only the 50 highest-scoring tokens  # noqa E501
+                    in a single generation form a randomly sampled candidate set. # noqa E501
+                    The larger the value, the higher the randomness generated;  # noqa E501
+                    the smaller the value, the higher the certainty generated. # noqa E501
+                    The default value is 0, which means the top_k policy is  # noqa E501
+                    not enabled. At this time, only the top_p policy takes effect. # noqa E501
                 enable_search(bool, `optional`): Whether to enable web search(quark).  # noqa E501
                     Currently works best only on the first round of conversation.
-                    Default to False.
+                    Default to False, support model: [qwen-turbo].
+                customized_model_id(str, required) The enterprise-specific
+                    large model id, which needs to be generated from the
+                    operation background of the enterprise-specific
+                    large model product, support model: [bailian-v1].
                 result_format(str, `optional`): [message|text] Set result result format. # noqa E501
                     Default result is text
+                incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
+                    If true, the subsequent output will include the previously input content. # noqa E501
+                    Otherwise, the subsequent output will not include the previously output # noqa E501
+                    content. Default false.
+                stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop  # noqa E501
+                    when encountering setting str or token ids, the result will not include # noqa E501
+                    stop words or tokens.
+                max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
+                    noted that the length generated by the model will only be less than max_tokens,  # noqa E501
+                    not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
+                    directly prompt that the length exceeds the limit. It is generally # noqa E501
+                    not recommended to set this value.
+                repetition_penalty(float, `optional`): Used to control the repeatability when generating models.  # noqa E501
+                    Increasing repetition_penalty can reduce the duplication of model generation.  # noqa E501
+                    1.0 means no punishment.
+            workspace (str): The dashscope workspace id.
         Raises:
             InputRequired: The prompt cannot be empty.
             InvalidInput: The history and auto_history are mutually exclusive.
@@ -180,6 +223,13 @@ class Conversation(BaseApi):
         if model is None or not model:
             raise ModelRequired('Model is required!')
         task_group, _ = _get_task_group_and_task(__name__)
+        if plugins is not None:
+            headers = kwargs.pop('headers', {})
+            if isinstance(plugins, str):
+                headers['X-DashScope-Plugin'] = plugins
+            else:
+                headers['X-DashScope-Plugin'] = json.dumps(plugins)
+            kwargs['headers'] = headers
         input, parameters = self._build_input_parameters(
             model, prompt, history, auto_history, n_history, messages,
             **kwargs)
@@ -189,6 +239,7 @@ class Conversation(BaseApi):
                                 function='generation',
                                 api_key=api_key,
                                 input=input,
+                                workspace=workspace,
                                 **parameters)
         is_stream = kwargs.get('stream', False)
         return self._handle_response(prompt, response, is_stream)
@@ -220,6 +271,14 @@ class Conversation(BaseApi):
     def _build_input_parameters(self, model, prompt, history, auto_history,
                                 n_history, messages, **kwargs):
+        if model == Conversation.Models.qwen_v1:
+            logger.warning(
+                'Model %s is deprecated, use %s instead!' %
+                (Conversation.Models.qwen_v1, Conversation.Models.qwen_turbo))
+        if model == Conversation.Models.qwen_plus_v1:
+            logger.warning('Model %s is deprecated, use %s instead!' %
+                           (Conversation.Models.qwen_plus_v1,
+                            Conversation.Models.qwen_plus))
         parameters = {}
         if history is not None and auto_history:
             raise InvalidInput('auto_history is True, history must None')

dashscope/aigc/generation.py CHANGED Viewed

@@ -1,15 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
 import copy
-from typing import Any, Generator, List, Union
+import json
+from typing import Any, Dict, Generator, List, Union, AsyncGenerator
 from dashscope.api_entities.dashscope_response import (GenerationResponse,
                                                        Message, Role)
-from dashscope.client.base_api import BaseApi
+from dashscope.client.base_api import BaseAioApi, BaseApi
 from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
                                         DEPRECATED_MESSAGE, HISTORY, MESSAGES,
                                         PROMPT)
 from dashscope.common.error import InputRequired, ModelRequired
 from dashscope.common.logging import logger
 from dashscope.common.utils import _get_task_group_and_task
+from dashscope.utils.param_utils import ParamUtil
+from dashscope.utils.message_utils import merge_single_response
 class Generation(BaseApi):
@@ -18,10 +23,16 @@ class Generation(BaseApi):
     """
     class Models:
+        """@deprecated, use qwen_turbo instead"""
         qwen_v1 = 'qwen-v1'
+        """@deprecated, use qwen_plus instead"""
         qwen_plus_v1 = 'qwen-plus-v1'
         bailian_v1 = 'bailian-v1'
         dolly_12b_v2 = 'dolly-12b-v2'
+        qwen_turbo = 'qwen-turbo'
+        qwen_plus = 'qwen-plus'
+        qwen_max = 'qwen-max'
     @classmethod
     def call(
@@ -31,12 +42,14 @@ class Generation(BaseApi):
         history: list = None,
         api_key: str = None,
         messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        workspace: str = None,
         **kwargs
     ) -> Union[GenerationResponse, Generator[GenerationResponse, None, None]]:
         """Call generation model service.
         Args:
-            model (str): The requested model, such as gpt3-v2
+            model (str): The requested model, such as qwen-turbo
             prompt (Any): The input prompt.
             history (list):The user provided history, deprecated
                 examples:
@@ -50,29 +63,59 @@ class Generation(BaseApi):
                     [{'role': 'user',
                       'content': 'The weather is fine today.'},
                       {'role': 'assistant', 'content': 'Suitable for outings'}]
+            plugins (Any): The plugin config. Can be plugins config str, or dict.
             **kwargs:
                 stream(bool, `optional`): Enable server-sent events
                     (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
-                    the result will back partially[qwen-v1,bailian-v1].
-                max_length(int, `optional`): The maximum length of tokens to
-                    generate. The token count of your prompt plus max_length
-                    cannot exceed the model's context length. Most models
-                    have a context length of 2000 tokens[qwen-v1,bailian-v1]. # noqa E501
+                    the result will back partially[qwen-turbo,bailian-v1].
+                temperature(float, `optional`): Used to control the degree
+                    of randomness and diversity. Specifically, the temperature
+                    value controls the degree to which the probability distribution
+                    of each candidate word is smoothed when generating text.
+                    A higher temperature value will reduce the peak value of
+                    the probability, allowing more low-probability words to be
+                    selected, and the generated results will be more diverse;
+                    while a lower temperature value will enhance the peak value
+                    of the probability, making it easier for high-probability
+                    words to be selected, the generated results are more
+                    deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
                 top_p(float, `optional`): A sampling strategy, called nucleus
                     sampling, where the model considers the results of the
                     tokens with top_p probability mass. So 0.1 means only
                     the tokens comprising the top 10% probability mass are
-                    considered[qwen-v1,bailian-v1].
+                    considered[qwen-turbo,bailian-v1].
+                top_k(int, `optional`): The size of the sample candidate set when generated.  # noqa E501
+                    For example, when the value is 50, only the 50 highest-scoring tokens  # noqa E501
+                    in a single generation form a randomly sampled candidate set. # noqa E501
+                    The larger the value, the higher the randomness generated;  # noqa E501
+                    the smaller the value, the higher the certainty generated. # noqa E501
+                    The default value is 0, which means the top_k policy is  # noqa E501
+                    not enabled. At this time, only the top_p policy takes effect. # noqa E501
                 enable_search(bool, `optional`): Whether to enable web search(quark).  # noqa E501
                     Currently works best only on the first round of conversation.
-                    Default to False, support model: [qwen-v1].
+                    Default to False, support model: [qwen-turbo].
                 customized_model_id(str, required) The enterprise-specific
                     large model id, which needs to be generated from the
                     operation background of the enterprise-specific
                     large model product, support model: [bailian-v1].
                 result_format(str, `optional`): [message|text] Set result result format. # noqa E501
                     Default result is text
+                incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
+                    If true, the subsequent output will include the previously input content. # noqa E501
+                    Otherwise, the subsequent output will not include the previously output # noqa E501
+                    content. Default false.
+                stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop  # noqa E501
+                    when encountering setting str or token ids, the result will not include # noqa E501
+                    stop words or tokens.
+                max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
+                    noted that the length generated by the model will only be less than max_tokens,  # noqa E501
+                    not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
+                    directly prompt that the length exceeds the limit. It is generally # noqa E501
+                    not recommended to set this value.
+                repetition_penalty(float, `optional`): Used to control the repeatability when generating models.  # noqa E501
+                    Increasing repetition_penalty can reduce the duplication of model generation.  # noqa E501
+                    1.0 means no punishment.
+            workspace (str): The dashscope workspace id.
         Raises:
             InvalidInput: The history and auto_history are mutually exclusive.
@@ -87,32 +130,68 @@ class Generation(BaseApi):
         if model is None or not model:
             raise ModelRequired('Model is required!')
         task_group, function = _get_task_group_and_task(__name__)
+        if plugins is not None:
+            headers = kwargs.pop('headers', {})
+            if isinstance(plugins, str):
+                headers['X-DashScope-Plugin'] = plugins
+            else:
+                headers['X-DashScope-Plugin'] = json.dumps(plugins)
+            kwargs['headers'] = headers
         input, parameters = cls._build_input_parameters(
             model, prompt, history, messages, **kwargs)
+        is_stream = parameters.get('stream', False)
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is False):
+            to_merge_incremental_output = True
+            parameters['incremental_output'] = True
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in parameters:
+            parameters['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
         response = super().call(model=model,
                                 task_group=task_group,
                                 task=Generation.task,
                                 function=function,
                                 api_key=api_key,
                                 input=input,
+                                workspace=workspace,
                                 **parameters)
-        is_stream = kwargs.get('stream', False)
         if is_stream:
-            return (GenerationResponse.from_api_response(rsp)
-                    for rsp in response)
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = parameters.get('n', 1)
+                return cls._merge_generation_response(response, n)
+            else:
+                return (GenerationResponse.from_api_response(rsp)
+                        for rsp in response)
         else:
             return GenerationResponse.from_api_response(response)
     @classmethod
     def _build_input_parameters(cls, model, prompt, history, messages,
                                 **kwargs):
+        if model == Generation.Models.qwen_v1:
+            logger.warning(
+                'Model %s is deprecated, use %s instead!' %
+                (Generation.Models.qwen_v1, Generation.Models.qwen_turbo))
+        if model == Generation.Models.qwen_plus_v1:
+            logger.warning(
+                'Model %s is deprecated, use %s instead!' %
+                (Generation.Models.qwen_plus_v1, Generation.Models.qwen_plus))
         parameters = {}
         input = {}
         if history is not None:
             logger.warning(DEPRECATED_MESSAGE)
             input[HISTORY] = history
             if prompt is not None and prompt:
-                input[PROMPT] = prompt
+                input[PROMPT] = prompt
         elif messages is not None:
             msgs = copy.deepcopy(messages)
             if prompt is not None and prompt:
@@ -120,7 +199,7 @@ class Generation(BaseApi):
             input = {MESSAGES: msgs}
         else:
             input[PROMPT] = prompt
         if model.startswith('qwen'):
             enable_search = kwargs.pop('enable_search', False)
             if enable_search:
@@ -133,3 +212,196 @@ class Generation(BaseApi):
             input[CUSTOMIZED_MODEL_ID] = customized_model_id
         return input, {**parameters, **kwargs}
+    @classmethod
+    def _merge_generation_response(cls, response, n=1) -> Generator[GenerationResponse, None, None]:
+        """Merge incremental response chunks to simulate non-incremental output."""
+        accumulated_data = {}
+        for rsp in response:
+            parsed_response = GenerationResponse.from_api_response(rsp)
+            result = merge_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp
+class AioGeneration(BaseAioApi):
+    task = 'text-generation'
+    """API for AI-Generated Content(AIGC) models.
+    """
+    class Models:
+        """@deprecated, use qwen_turbo instead"""
+        qwen_v1 = 'qwen-v1'
+        """@deprecated, use qwen_plus instead"""
+        qwen_plus_v1 = 'qwen-plus-v1'
+        bailian_v1 = 'bailian-v1'
+        dolly_12b_v2 = 'dolly-12b-v2'
+        qwen_turbo = 'qwen-turbo'
+        qwen_plus = 'qwen-plus'
+        qwen_max = 'qwen-max'
+    @classmethod
+    async def call(
+        cls,
+        model: str,
+        prompt: Any = None,
+        history: list = None,
+        api_key: str = None,
+        messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        workspace: str = None,
+        **kwargs
+    ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
+        """Call generation model service.
+        Args:
+            model (str): The requested model, such as qwen-turbo
+            prompt (Any): The input prompt.
+            history (list):The user provided history, deprecated
+                examples:
+                    [{'user':'The weather is fine today.',
+                    'bot': 'Suitable for outings'}].
+                Defaults to None.
+            api_key (str, optional): The api api_key, can be None,
+                if None, will get by default rule(TODO: api key doc).
+            messages (list): The generation messages.
+                examples:
+                    [{'role': 'user',
+                      'content': 'The weather is fine today.'},
+                      {'role': 'assistant', 'content': 'Suitable for outings'}]
+            plugins (Any): The plugin config. Can be plugins config str, or dict.
+            **kwargs:
+                stream(bool, `optional`): Enable server-sent events
+                    (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
+                    the result will back partially[qwen-turbo,bailian-v1].
+                temperature(float, `optional`): Used to control the degree
+                    of randomness and diversity. Specifically, the temperature
+                    value controls the degree to which the probability distribution
+                    of each candidate word is smoothed when generating text.
+                    A higher temperature value will reduce the peak value of
+                    the probability, allowing more low-probability words to be
+                    selected, and the generated results will be more diverse;
+                    while a lower temperature value will enhance the peak value
+                    of the probability, making it easier for high-probability
+                    words to be selected, the generated results are more
+                    deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
+                top_p(float, `optional`): A sampling strategy, called nucleus
+                    sampling, where the model considers the results of the
+                    tokens with top_p probability mass. So 0.1 means only
+                    the tokens comprising the top 10% probability mass are
+                    considered[qwen-turbo,bailian-v1].
+                top_k(int, `optional`): The size of the sample candidate set when generated.  # noqa E501
+                    For example, when the value is 50, only the 50 highest-scoring tokens  # noqa E501
+                    in a single generation form a randomly sampled candidate set. # noqa E501
+                    The larger the value, the higher the randomness generated;  # noqa E501
+                    the smaller the value, the higher the certainty generated. # noqa E501
+                    The default value is 0, which means the top_k policy is  # noqa E501
+                    not enabled. At this time, only the top_p policy takes effect. # noqa E501
+                enable_search(bool, `optional`): Whether to enable web search(quark).  # noqa E501
+                    Currently works best only on the first round of conversation.
+                    Default to False, support model: [qwen-turbo].
+                customized_model_id(str, required) The enterprise-specific
+                    large model id, which needs to be generated from the
+                    operation background of the enterprise-specific
+                    large model product, support model: [bailian-v1].
+                result_format(str, `optional`): [message|text] Set result result format. # noqa E501
+                    Default result is text
+                incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
+                    If true, the subsequent output will include the previously input content. # noqa E501
+                    Otherwise, the subsequent output will not include the previously output # noqa E501
+                    content. Default false.
+                stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop  # noqa E501
+                    when encountering setting str or token ids, the result will not include # noqa E501
+                    stop words or tokens.
+                max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
+                    noted that the length generated by the model will only be less than max_tokens,  # noqa E501
+                    not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
+                    directly prompt that the length exceeds the limit. It is generally # noqa E501
+                    not recommended to set this value.
+                repetition_penalty(float, `optional`): Used to control the repeatability when generating models.  # noqa E501
+                    Increasing repetition_penalty can reduce the duplication of model generation.  # noqa E501
+                    1.0 means no punishment.
+            workspace (str): The dashscope workspace id.
+        Raises:
+            InvalidInput: The history and auto_history are mutually exclusive.
+        Returns:
+            Union[GenerationResponse,
+                  AsyncGenerator[GenerationResponse, None]]: If
+            stream is True, return AsyncGenerator, otherwise GenerationResponse.
+        """
+        if (prompt is None or not prompt) and (messages is None
+                                               or not messages):
+            raise InputRequired('prompt or messages is required!')
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        task_group, function = _get_task_group_and_task(__name__)
+        if plugins is not None:
+            headers = kwargs.pop('headers', {})
+            if isinstance(plugins, str):
+                headers['X-DashScope-Plugin'] = plugins
+            else:
+                headers['X-DashScope-Plugin'] = json.dumps(plugins)
+            kwargs['headers'] = headers
+        input, parameters = Generation._build_input_parameters(
+            model, prompt, history, messages, **kwargs)
+        is_stream = parameters.get('stream', False)
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is False):
+            to_merge_incremental_output = True
+            parameters['incremental_output'] = True
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in parameters:
+            parameters['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
+        response = await super().call(model=model,
+                                      task_group=task_group,
+                                      task=Generation.task,
+                                      function=function,
+                                      api_key=api_key,
+                                      input=input,
+                                      workspace=workspace,
+                                      **parameters)
+        if is_stream:
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = parameters.get('n', 1)
+                return cls._merge_generation_response(response, n)
+            else:
+                return cls._stream_responses(response)
+        else:
+            return GenerationResponse.from_api_response(response)
+    @classmethod
+    async def _stream_responses(cls, response) -> AsyncGenerator[GenerationResponse, None]:
+        """Convert async response stream to GenerationResponse stream."""
+        # Type hint: when stream=True, response is actually an AsyncIterable
+        async for rsp in response:  # type: ignore
+            yield GenerationResponse.from_api_response(rsp)
+    @classmethod
+    async def _merge_generation_response(cls, response, n=1) -> AsyncGenerator[GenerationResponse, None]:
+        """Async version of merge incremental response chunks."""
+        accumulated_data = {}
+        async for rsp in response:  # type: ignore
+            parsed_response = GenerationResponse.from_api_response(rsp)
+            result = merge_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp

dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

dashscope 1.8.0py3-none-any.whl → 1.25.6py3-none-any.whl