dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import json
1
4
  from copy import deepcopy
2
5
  from http import HTTPStatus
3
- from typing import Any, Generator, List, Union
6
+ from typing import Any, Dict, Generator, List, Union
4
7
 
5
8
  from dashscope.api_entities.dashscope_response import (ConversationResponse,
6
9
  Message, Role)
7
10
  from dashscope.client.base_api import BaseApi
8
- from dashscope.common.constants import (DEPRECATED_MESSAGE, HISTORY, MESSAGES,
9
- PROMPT)
11
+ from dashscope.common.constants import DEPRECATED_MESSAGE, HISTORY, PROMPT
10
12
  from dashscope.common.error import InputRequired, InvalidInput, ModelRequired
11
13
  from dashscope.common.logging import logger
12
14
  from dashscope.common.utils import _get_task_group_and_task
@@ -93,11 +95,16 @@ class Conversation(BaseApi):
93
95
  task = 'generation'
94
96
 
95
97
  class Models:
98
+ """@deprecated, use qwen_turbo instead"""
96
99
  qwen_v1 = 'qwen-v1'
100
+ """@deprecated, use qwen_plus instead"""
97
101
  qwen_plus_v1 = 'qwen-plus-v1'
98
102
 
99
- def __init__(self,
100
- history: History = None) -> None:
103
+ qwen_turbo = 'qwen-turbo'
104
+ qwen_plus = 'qwen-plus'
105
+ qwen_max = 'qwen-max'
106
+
107
+ def __init__(self, history: History = None) -> None:
101
108
  """Init a chat.
102
109
 
103
110
  Args:
@@ -121,6 +128,8 @@ class Conversation(BaseApi):
121
128
  n_history: int = -1,
122
129
  api_key: str = None,
123
130
  messages: List[Message] = None,
131
+ plugins: Union[str, Dict[str, Any]] = None,
132
+ workspace: str = None,
124
133
  **kwargs
125
134
  ) -> Union[ConversationResponse, Generator[ConversationResponse, None,
126
135
  None]]:
@@ -145,25 +154,59 @@ class Conversation(BaseApi):
145
154
  [{'role': 'user',
146
155
  'content': 'The weather is fine today.'},
147
156
  {'role': 'assistant', 'content': 'Suitable for outings'}]
148
- **kwargs(qwen-v1, qawen-plus-v1):
157
+ plugins (Any): The plugin config, Can be plugins config str, or dict.
158
+ **kwargs(qwen-turbo, qwen-plus):
149
159
  stream(bool, `optional`): Enable server-sent events
150
160
  (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
151
161
  the result will back partially.
152
- max_length(int, `optional`): The maximum length of tokens to
153
- generate. The token count of your prompt plus max_length
154
- cannot exceed the model's context length. Most models
155
- have a context length of 2000 tokens.
162
+ temperature(float, `optional`): Used to control the degree
163
+ of randomness and diversity. Specifically, the temperature
164
+ value controls the degree to which the probability distribution
165
+ of each candidate word is smoothed when generating text.
166
+ A higher temperature value will reduce the peak value of
167
+ the probability, allowing more low-probability words to be
168
+ selected, and the generated results will be more diverse;
169
+ while a lower temperature value will enhance the peak value
170
+ of the probability, making it easier for high-probability
171
+ words to be selected, the generated results are more
172
+ deterministic,range(0, 2) .[qwen-turbo,qwen-plus].
156
173
  top_p(float, `optional`): A sampling strategy, called nucleus
157
174
  sampling, where the model considers the results of the
158
175
  tokens with top_p probability mass. So 0.1 means only
159
176
  the tokens comprising the top 10% probability mass are
160
177
  considered.
178
+ top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
179
+ For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
180
+ in a single generation form a randomly sampled candidate set. # noqa E501
181
+ The larger the value, the higher the randomness generated; # noqa E501
182
+ the smaller the value, the higher the certainty generated. # noqa E501
183
+ The default value is 0, which means the top_k policy is # noqa E501
184
+ not enabled. At this time, only the top_p policy takes effect. # noqa E501
161
185
  enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
162
186
  Currently works best only on the first round of conversation.
163
- Default to False.
187
+ Default to False, support model: [qwen-turbo].
188
+ customized_model_id(str, required) The enterprise-specific
189
+ large model id, which needs to be generated from the
190
+ operation background of the enterprise-specific
191
+ large model product, support model: [bailian-v1].
164
192
  result_format(str, `optional`): [message|text] Set result result format. # noqa E501
165
193
  Default result is text
166
-
194
+ incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
195
+ If true, the subsequent output will include the previously input content. # noqa E501
196
+ Otherwise, the subsequent output will not include the previously output # noqa E501
197
+ content. Default false.
198
+ stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
199
+ when encountering setting str or token ids, the result will not include # noqa E501
200
+ stop words or tokens.
201
+ max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
202
+ noted that the length generated by the model will only be less than max_tokens, # noqa E501
203
+ not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
204
+ directly prompt that the length exceeds the limit. It is generally # noqa E501
205
+ not recommended to set this value.
206
+ repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
207
+ Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
208
+ 1.0 means no punishment.
209
+ workspace (str): The dashscope workspace id.
167
210
  Raises:
168
211
  InputRequired: The prompt cannot be empty.
169
212
  InvalidInput: The history and auto_history are mutually exclusive.
@@ -180,6 +223,13 @@ class Conversation(BaseApi):
180
223
  if model is None or not model:
181
224
  raise ModelRequired('Model is required!')
182
225
  task_group, _ = _get_task_group_and_task(__name__)
226
+ if plugins is not None:
227
+ headers = kwargs.pop('headers', {})
228
+ if isinstance(plugins, str):
229
+ headers['X-DashScope-Plugin'] = plugins
230
+ else:
231
+ headers['X-DashScope-Plugin'] = json.dumps(plugins)
232
+ kwargs['headers'] = headers
183
233
  input, parameters = self._build_input_parameters(
184
234
  model, prompt, history, auto_history, n_history, messages,
185
235
  **kwargs)
@@ -189,6 +239,7 @@ class Conversation(BaseApi):
189
239
  function='generation',
190
240
  api_key=api_key,
191
241
  input=input,
242
+ workspace=workspace,
192
243
  **parameters)
193
244
  is_stream = kwargs.get('stream', False)
194
245
  return self._handle_response(prompt, response, is_stream)
@@ -220,6 +271,14 @@ class Conversation(BaseApi):
220
271
 
221
272
  def _build_input_parameters(self, model, prompt, history, auto_history,
222
273
  n_history, messages, **kwargs):
274
+ if model == Conversation.Models.qwen_v1:
275
+ logger.warning(
276
+ 'Model %s is deprecated, use %s instead!' %
277
+ (Conversation.Models.qwen_v1, Conversation.Models.qwen_turbo))
278
+ if model == Conversation.Models.qwen_plus_v1:
279
+ logger.warning('Model %s is deprecated, use %s instead!' %
280
+ (Conversation.Models.qwen_plus_v1,
281
+ Conversation.Models.qwen_plus))
223
282
  parameters = {}
224
283
  if history is not None and auto_history:
225
284
  raise InvalidInput('auto_history is True, history must None')
@@ -1,15 +1,20 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  import copy
2
- from typing import Any, Generator, List, Union
4
+ import json
5
+ from typing import Any, Dict, Generator, List, Union, AsyncGenerator
3
6
 
4
7
  from dashscope.api_entities.dashscope_response import (GenerationResponse,
5
8
  Message, Role)
6
- from dashscope.client.base_api import BaseApi
9
+ from dashscope.client.base_api import BaseAioApi, BaseApi
7
10
  from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
8
11
  DEPRECATED_MESSAGE, HISTORY, MESSAGES,
9
12
  PROMPT)
10
13
  from dashscope.common.error import InputRequired, ModelRequired
11
14
  from dashscope.common.logging import logger
12
15
  from dashscope.common.utils import _get_task_group_and_task
16
+ from dashscope.utils.param_utils import ParamUtil
17
+ from dashscope.utils.message_utils import merge_single_response
13
18
 
14
19
 
15
20
  class Generation(BaseApi):
@@ -18,10 +23,16 @@ class Generation(BaseApi):
18
23
 
19
24
  """
20
25
  class Models:
26
+ """@deprecated, use qwen_turbo instead"""
21
27
  qwen_v1 = 'qwen-v1'
28
+ """@deprecated, use qwen_plus instead"""
22
29
  qwen_plus_v1 = 'qwen-plus-v1'
30
+
23
31
  bailian_v1 = 'bailian-v1'
24
32
  dolly_12b_v2 = 'dolly-12b-v2'
33
+ qwen_turbo = 'qwen-turbo'
34
+ qwen_plus = 'qwen-plus'
35
+ qwen_max = 'qwen-max'
25
36
 
26
37
  @classmethod
27
38
  def call(
@@ -31,12 +42,14 @@ class Generation(BaseApi):
31
42
  history: list = None,
32
43
  api_key: str = None,
33
44
  messages: List[Message] = None,
45
+ plugins: Union[str, Dict[str, Any]] = None,
46
+ workspace: str = None,
34
47
  **kwargs
35
48
  ) -> Union[GenerationResponse, Generator[GenerationResponse, None, None]]:
36
49
  """Call generation model service.
37
50
 
38
51
  Args:
39
- model (str): The requested model, such as gpt3-v2
52
+ model (str): The requested model, such as qwen-turbo
40
53
  prompt (Any): The input prompt.
41
54
  history (list):The user provided history, deprecated
42
55
  examples:
@@ -50,29 +63,59 @@ class Generation(BaseApi):
50
63
  [{'role': 'user',
51
64
  'content': 'The weather is fine today.'},
52
65
  {'role': 'assistant', 'content': 'Suitable for outings'}]
66
+ plugins (Any): The plugin config. Can be plugins config str, or dict.
53
67
  **kwargs:
54
68
  stream(bool, `optional`): Enable server-sent events
55
69
  (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
56
- the result will back partially[qwen-v1,bailian-v1].
57
- max_length(int, `optional`): The maximum length of tokens to
58
- generate. The token count of your prompt plus max_length
59
- cannot exceed the model's context length. Most models
60
- have a context length of 2000 tokens[qwen-v1,bailian-v1]. # noqa E501
70
+ the result will back partially[qwen-turbo,bailian-v1].
71
+ temperature(float, `optional`): Used to control the degree
72
+ of randomness and diversity. Specifically, the temperature
73
+ value controls the degree to which the probability distribution
74
+ of each candidate word is smoothed when generating text.
75
+ A higher temperature value will reduce the peak value of
76
+ the probability, allowing more low-probability words to be
77
+ selected, and the generated results will be more diverse;
78
+ while a lower temperature value will enhance the peak value
79
+ of the probability, making it easier for high-probability
80
+ words to be selected, the generated results are more
81
+ deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
61
82
  top_p(float, `optional`): A sampling strategy, called nucleus
62
83
  sampling, where the model considers the results of the
63
84
  tokens with top_p probability mass. So 0.1 means only
64
85
  the tokens comprising the top 10% probability mass are
65
- considered[qwen-v1,bailian-v1].
86
+ considered[qwen-turbo,bailian-v1].
87
+ top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
88
+ For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
89
+ in a single generation form a randomly sampled candidate set. # noqa E501
90
+ The larger the value, the higher the randomness generated; # noqa E501
91
+ the smaller the value, the higher the certainty generated. # noqa E501
92
+ The default value is 0, which means the top_k policy is # noqa E501
93
+ not enabled. At this time, only the top_p policy takes effect. # noqa E501
66
94
  enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
67
95
  Currently works best only on the first round of conversation.
68
- Default to False, support model: [qwen-v1].
96
+ Default to False, support model: [qwen-turbo].
69
97
  customized_model_id(str, required) The enterprise-specific
70
98
  large model id, which needs to be generated from the
71
99
  operation background of the enterprise-specific
72
100
  large model product, support model: [bailian-v1].
73
101
  result_format(str, `optional`): [message|text] Set result result format. # noqa E501
74
102
  Default result is text
75
-
103
+ incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
104
+ If true, the subsequent output will include the previously input content. # noqa E501
105
+ Otherwise, the subsequent output will not include the previously output # noqa E501
106
+ content. Default false.
107
+ stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
108
+ when encountering setting str or token ids, the result will not include # noqa E501
109
+ stop words or tokens.
110
+ max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
111
+ noted that the length generated by the model will only be less than max_tokens, # noqa E501
112
+ not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
113
+ directly prompt that the length exceeds the limit. It is generally # noqa E501
114
+ not recommended to set this value.
115
+ repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
116
+ Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
117
+ 1.0 means no punishment.
118
+ workspace (str): The dashscope workspace id.
76
119
  Raises:
77
120
  InvalidInput: The history and auto_history are mutually exclusive.
78
121
 
@@ -87,32 +130,68 @@ class Generation(BaseApi):
87
130
  if model is None or not model:
88
131
  raise ModelRequired('Model is required!')
89
132
  task_group, function = _get_task_group_and_task(__name__)
133
+ if plugins is not None:
134
+ headers = kwargs.pop('headers', {})
135
+ if isinstance(plugins, str):
136
+ headers['X-DashScope-Plugin'] = plugins
137
+ else:
138
+ headers['X-DashScope-Plugin'] = json.dumps(plugins)
139
+ kwargs['headers'] = headers
90
140
  input, parameters = cls._build_input_parameters(
91
141
  model, prompt, history, messages, **kwargs)
142
+
143
+ is_stream = parameters.get('stream', False)
144
+ # Check if we need to merge incremental output
145
+ is_incremental_output = kwargs.get('incremental_output', None)
146
+ to_merge_incremental_output = False
147
+ if (ParamUtil.should_modify_incremental_output(model) and
148
+ is_stream and is_incremental_output is False):
149
+ to_merge_incremental_output = True
150
+ parameters['incremental_output'] = True
151
+
152
+ # Pass incremental_to_full flag via headers user-agent
153
+ if 'headers' not in parameters:
154
+ parameters['headers'] = {}
155
+ flag = '1' if to_merge_incremental_output else '0'
156
+ parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
157
+
92
158
  response = super().call(model=model,
93
159
  task_group=task_group,
94
160
  task=Generation.task,
95
161
  function=function,
96
162
  api_key=api_key,
97
163
  input=input,
164
+ workspace=workspace,
98
165
  **parameters)
99
- is_stream = kwargs.get('stream', False)
100
166
  if is_stream:
101
- return (GenerationResponse.from_api_response(rsp)
102
- for rsp in response)
167
+ if to_merge_incremental_output:
168
+ # Extract n parameter for merge logic
169
+ n = parameters.get('n', 1)
170
+ return cls._merge_generation_response(response, n)
171
+ else:
172
+ return (GenerationResponse.from_api_response(rsp)
173
+ for rsp in response)
103
174
  else:
104
175
  return GenerationResponse.from_api_response(response)
105
176
 
106
177
  @classmethod
107
178
  def _build_input_parameters(cls, model, prompt, history, messages,
108
179
  **kwargs):
180
+ if model == Generation.Models.qwen_v1:
181
+ logger.warning(
182
+ 'Model %s is deprecated, use %s instead!' %
183
+ (Generation.Models.qwen_v1, Generation.Models.qwen_turbo))
184
+ if model == Generation.Models.qwen_plus_v1:
185
+ logger.warning(
186
+ 'Model %s is deprecated, use %s instead!' %
187
+ (Generation.Models.qwen_plus_v1, Generation.Models.qwen_plus))
109
188
  parameters = {}
110
189
  input = {}
111
190
  if history is not None:
112
191
  logger.warning(DEPRECATED_MESSAGE)
113
192
  input[HISTORY] = history
114
193
  if prompt is not None and prompt:
115
- input[PROMPT] = prompt
194
+ input[PROMPT] = prompt
116
195
  elif messages is not None:
117
196
  msgs = copy.deepcopy(messages)
118
197
  if prompt is not None and prompt:
@@ -120,7 +199,7 @@ class Generation(BaseApi):
120
199
  input = {MESSAGES: msgs}
121
200
  else:
122
201
  input[PROMPT] = prompt
123
-
202
+
124
203
  if model.startswith('qwen'):
125
204
  enable_search = kwargs.pop('enable_search', False)
126
205
  if enable_search:
@@ -133,3 +212,196 @@ class Generation(BaseApi):
133
212
  input[CUSTOMIZED_MODEL_ID] = customized_model_id
134
213
 
135
214
  return input, {**parameters, **kwargs}
215
+
216
+ @classmethod
217
+ def _merge_generation_response(cls, response, n=1) -> Generator[GenerationResponse, None, None]:
218
+ """Merge incremental response chunks to simulate non-incremental output."""
219
+ accumulated_data = {}
220
+ for rsp in response:
221
+ parsed_response = GenerationResponse.from_api_response(rsp)
222
+ result = merge_single_response(parsed_response, accumulated_data, n)
223
+ if result is True:
224
+ yield parsed_response
225
+ elif isinstance(result, list):
226
+ # Multiple responses to yield (for n>1 non-stop cases)
227
+ for resp in result:
228
+ yield resp
229
+
230
+
231
+ class AioGeneration(BaseAioApi):
232
+ task = 'text-generation'
233
+ """API for AI-Generated Content(AIGC) models.
234
+
235
+ """
236
+ class Models:
237
+ """@deprecated, use qwen_turbo instead"""
238
+ qwen_v1 = 'qwen-v1'
239
+ """@deprecated, use qwen_plus instead"""
240
+ qwen_plus_v1 = 'qwen-plus-v1'
241
+
242
+ bailian_v1 = 'bailian-v1'
243
+ dolly_12b_v2 = 'dolly-12b-v2'
244
+ qwen_turbo = 'qwen-turbo'
245
+ qwen_plus = 'qwen-plus'
246
+ qwen_max = 'qwen-max'
247
+
248
+ @classmethod
249
+ async def call(
250
+ cls,
251
+ model: str,
252
+ prompt: Any = None,
253
+ history: list = None,
254
+ api_key: str = None,
255
+ messages: List[Message] = None,
256
+ plugins: Union[str, Dict[str, Any]] = None,
257
+ workspace: str = None,
258
+ **kwargs
259
+ ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
260
+ """Call generation model service.
261
+
262
+ Args:
263
+ model (str): The requested model, such as qwen-turbo
264
+ prompt (Any): The input prompt.
265
+ history (list):The user provided history, deprecated
266
+ examples:
267
+ [{'user':'The weather is fine today.',
268
+ 'bot': 'Suitable for outings'}].
269
+ Defaults to None.
270
+ api_key (str, optional): The api api_key, can be None,
271
+ if None, will get by default rule(TODO: api key doc).
272
+ messages (list): The generation messages.
273
+ examples:
274
+ [{'role': 'user',
275
+ 'content': 'The weather is fine today.'},
276
+ {'role': 'assistant', 'content': 'Suitable for outings'}]
277
+ plugins (Any): The plugin config. Can be plugins config str, or dict.
278
+ **kwargs:
279
+ stream(bool, `optional`): Enable server-sent events
280
+ (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
281
+ the result will back partially[qwen-turbo,bailian-v1].
282
+ temperature(float, `optional`): Used to control the degree
283
+ of randomness and diversity. Specifically, the temperature
284
+ value controls the degree to which the probability distribution
285
+ of each candidate word is smoothed when generating text.
286
+ A higher temperature value will reduce the peak value of
287
+ the probability, allowing more low-probability words to be
288
+ selected, and the generated results will be more diverse;
289
+ while a lower temperature value will enhance the peak value
290
+ of the probability, making it easier for high-probability
291
+ words to be selected, the generated results are more
292
+ deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
293
+ top_p(float, `optional`): A sampling strategy, called nucleus
294
+ sampling, where the model considers the results of the
295
+ tokens with top_p probability mass. So 0.1 means only
296
+ the tokens comprising the top 10% probability mass are
297
+ considered[qwen-turbo,bailian-v1].
298
+ top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
299
+ For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
300
+ in a single generation form a randomly sampled candidate set. # noqa E501
301
+ The larger the value, the higher the randomness generated; # noqa E501
302
+ the smaller the value, the higher the certainty generated. # noqa E501
303
+ The default value is 0, which means the top_k policy is # noqa E501
304
+ not enabled. At this time, only the top_p policy takes effect. # noqa E501
305
+ enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
306
+ Currently works best only on the first round of conversation.
307
+ Default to False, support model: [qwen-turbo].
308
+ customized_model_id(str, required) The enterprise-specific
309
+ large model id, which needs to be generated from the
310
+ operation background of the enterprise-specific
311
+ large model product, support model: [bailian-v1].
312
+ result_format(str, `optional`): [message|text] Set result result format. # noqa E501
313
+ Default result is text
314
+ incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
315
+ If true, the subsequent output will include the previously input content. # noqa E501
316
+ Otherwise, the subsequent output will not include the previously output # noqa E501
317
+ content. Default false.
318
+ stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
319
+ when encountering setting str or token ids, the result will not include # noqa E501
320
+ stop words or tokens.
321
+ max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
322
+ noted that the length generated by the model will only be less than max_tokens, # noqa E501
323
+ not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
324
+ directly prompt that the length exceeds the limit. It is generally # noqa E501
325
+ not recommended to set this value.
326
+ repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
327
+ Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
328
+ 1.0 means no punishment.
329
+ workspace (str): The dashscope workspace id.
330
+ Raises:
331
+ InvalidInput: The history and auto_history are mutually exclusive.
332
+
333
+ Returns:
334
+ Union[GenerationResponse,
335
+ AsyncGenerator[GenerationResponse, None]]: If
336
+ stream is True, return AsyncGenerator, otherwise GenerationResponse.
337
+ """
338
+ if (prompt is None or not prompt) and (messages is None
339
+ or not messages):
340
+ raise InputRequired('prompt or messages is required!')
341
+ if model is None or not model:
342
+ raise ModelRequired('Model is required!')
343
+ task_group, function = _get_task_group_and_task(__name__)
344
+ if plugins is not None:
345
+ headers = kwargs.pop('headers', {})
346
+ if isinstance(plugins, str):
347
+ headers['X-DashScope-Plugin'] = plugins
348
+ else:
349
+ headers['X-DashScope-Plugin'] = json.dumps(plugins)
350
+ kwargs['headers'] = headers
351
+ input, parameters = Generation._build_input_parameters(
352
+ model, prompt, history, messages, **kwargs)
353
+
354
+ is_stream = parameters.get('stream', False)
355
+ # Check if we need to merge incremental output
356
+ is_incremental_output = kwargs.get('incremental_output', None)
357
+ to_merge_incremental_output = False
358
+ if (ParamUtil.should_modify_incremental_output(model) and
359
+ is_stream and is_incremental_output is False):
360
+ to_merge_incremental_output = True
361
+ parameters['incremental_output'] = True
362
+
363
+ # Pass incremental_to_full flag via headers user-agent
364
+ if 'headers' not in parameters:
365
+ parameters['headers'] = {}
366
+ flag = '1' if to_merge_incremental_output else '0'
367
+ parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
368
+
369
+ response = await super().call(model=model,
370
+ task_group=task_group,
371
+ task=Generation.task,
372
+ function=function,
373
+ api_key=api_key,
374
+ input=input,
375
+ workspace=workspace,
376
+ **parameters)
377
+ if is_stream:
378
+ if to_merge_incremental_output:
379
+ # Extract n parameter for merge logic
380
+ n = parameters.get('n', 1)
381
+ return cls._merge_generation_response(response, n)
382
+ else:
383
+ return cls._stream_responses(response)
384
+ else:
385
+ return GenerationResponse.from_api_response(response)
386
+
387
+ @classmethod
388
+ async def _stream_responses(cls, response) -> AsyncGenerator[GenerationResponse, None]:
389
+ """Convert async response stream to GenerationResponse stream."""
390
+ # Type hint: when stream=True, response is actually an AsyncIterable
391
+ async for rsp in response: # type: ignore
392
+ yield GenerationResponse.from_api_response(rsp)
393
+
394
+ @classmethod
395
+ async def _merge_generation_response(cls, response, n=1) -> AsyncGenerator[GenerationResponse, None]:
396
+ """Async version of merge incremental response chunks."""
397
+ accumulated_data = {}
398
+
399
+ async for rsp in response: # type: ignore
400
+ parsed_response = GenerationResponse.from_api_response(rsp)
401
+ result = merge_single_response(parsed_response, accumulated_data, n)
402
+ if result is True:
403
+ yield parsed_response
404
+ elif isinstance(result, list):
405
+ # Multiple responses to yield (for n>1 non-stop cases)
406
+ for resp in result:
407
+ yield resp