dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashscope/__init__.py +61 -14
- dashscope/aigc/__init__.py +10 -3
- dashscope/aigc/chat_completion.py +282 -0
- dashscope/aigc/code_generation.py +145 -0
- dashscope/aigc/conversation.py +71 -12
- dashscope/aigc/generation.py +288 -16
- dashscope/aigc/image_synthesis.py +473 -31
- dashscope/aigc/multimodal_conversation.py +299 -14
- dashscope/aigc/video_synthesis.py +610 -0
- dashscope/api_entities/aiohttp_request.py +8 -5
- dashscope/api_entities/api_request_data.py +4 -2
- dashscope/api_entities/api_request_factory.py +68 -20
- dashscope/api_entities/base_request.py +20 -3
- dashscope/api_entities/chat_completion_types.py +344 -0
- dashscope/api_entities/dashscope_response.py +243 -15
- dashscope/api_entities/encryption.py +179 -0
- dashscope/api_entities/http_request.py +216 -62
- dashscope/api_entities/websocket_request.py +43 -34
- dashscope/app/__init__.py +5 -0
- dashscope/app/application.py +203 -0
- dashscope/app/application_response.py +246 -0
- dashscope/assistants/__init__.py +16 -0
- dashscope/assistants/assistant_types.py +175 -0
- dashscope/assistants/assistants.py +311 -0
- dashscope/assistants/files.py +197 -0
- dashscope/audio/__init__.py +4 -2
- dashscope/audio/asr/__init__.py +17 -1
- dashscope/audio/asr/asr_phrase_manager.py +203 -0
- dashscope/audio/asr/recognition.py +167 -27
- dashscope/audio/asr/transcription.py +107 -14
- dashscope/audio/asr/translation_recognizer.py +1006 -0
- dashscope/audio/asr/vocabulary.py +177 -0
- dashscope/audio/qwen_asr/__init__.py +7 -0
- dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
- dashscope/audio/qwen_omni/__init__.py +11 -0
- dashscope/audio/qwen_omni/omni_realtime.py +524 -0
- dashscope/audio/qwen_tts/__init__.py +5 -0
- dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
- dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
- dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
- dashscope/audio/tts/__init__.py +2 -0
- dashscope/audio/tts/speech_synthesizer.py +5 -0
- dashscope/audio/tts_v2/__init__.py +12 -0
- dashscope/audio/tts_v2/enrollment.py +179 -0
- dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
- dashscope/cli.py +157 -37
- dashscope/client/base_api.py +652 -87
- dashscope/common/api_key.py +2 -0
- dashscope/common/base_type.py +135 -0
- dashscope/common/constants.py +13 -16
- dashscope/common/env.py +2 -0
- dashscope/common/error.py +58 -22
- dashscope/common/logging.py +2 -0
- dashscope/common/message_manager.py +2 -0
- dashscope/common/utils.py +276 -46
- dashscope/customize/__init__.py +0 -0
- dashscope/customize/customize_types.py +192 -0
- dashscope/customize/deployments.py +146 -0
- dashscope/customize/finetunes.py +234 -0
- dashscope/embeddings/__init__.py +5 -1
- dashscope/embeddings/batch_text_embedding.py +208 -0
- dashscope/embeddings/batch_text_embedding_response.py +65 -0
- dashscope/embeddings/multimodal_embedding.py +118 -10
- dashscope/embeddings/text_embedding.py +13 -1
- dashscope/{file.py → files.py} +19 -4
- dashscope/io/input_output.py +2 -0
- dashscope/model.py +11 -2
- dashscope/models.py +43 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +28 -0
- dashscope/multimodal/multimodal_dialog.py +648 -0
- dashscope/multimodal/multimodal_request_params.py +313 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu.py +80 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/nlp/__init__.py +0 -0
- dashscope/nlp/understanding.py +64 -0
- dashscope/protocol/websocket.py +3 -0
- dashscope/rerank/__init__.py +0 -0
- dashscope/rerank/text_rerank.py +69 -0
- dashscope/resources/qwen.tiktoken +151643 -0
- dashscope/threads/__init__.py +26 -0
- dashscope/threads/messages/__init__.py +0 -0
- dashscope/threads/messages/files.py +113 -0
- dashscope/threads/messages/messages.py +220 -0
- dashscope/threads/runs/__init__.py +0 -0
- dashscope/threads/runs/runs.py +501 -0
- dashscope/threads/runs/steps.py +112 -0
- dashscope/threads/thread_types.py +665 -0
- dashscope/threads/threads.py +212 -0
- dashscope/tokenizers/__init__.py +7 -0
- dashscope/tokenizers/qwen_tokenizer.py +111 -0
- dashscope/tokenizers/tokenization.py +125 -0
- dashscope/tokenizers/tokenizer.py +45 -0
- dashscope/tokenizers/tokenizer_base.py +32 -0
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/message_utils.py +838 -0
- dashscope/utils/oss_utils.py +243 -0
- dashscope/utils/param_utils.py +29 -0
- dashscope/version.py +3 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
- dashscope-1.25.6.dist-info/RECORD +112 -0
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
- dashscope/deployment.py +0 -129
- dashscope/finetune.py +0 -149
- dashscope-1.8.0.dist-info/RECORD +0 -49
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
dashscope/aigc/conversation.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import json
|
|
1
4
|
from copy import deepcopy
|
|
2
5
|
from http import HTTPStatus
|
|
3
|
-
from typing import Any, Generator, List, Union
|
|
6
|
+
from typing import Any, Dict, Generator, List, Union
|
|
4
7
|
|
|
5
8
|
from dashscope.api_entities.dashscope_response import (ConversationResponse,
|
|
6
9
|
Message, Role)
|
|
7
10
|
from dashscope.client.base_api import BaseApi
|
|
8
|
-
from dashscope.common.constants import
|
|
9
|
-
PROMPT)
|
|
11
|
+
from dashscope.common.constants import DEPRECATED_MESSAGE, HISTORY, PROMPT
|
|
10
12
|
from dashscope.common.error import InputRequired, InvalidInput, ModelRequired
|
|
11
13
|
from dashscope.common.logging import logger
|
|
12
14
|
from dashscope.common.utils import _get_task_group_and_task
|
|
@@ -93,11 +95,16 @@ class Conversation(BaseApi):
|
|
|
93
95
|
task = 'generation'
|
|
94
96
|
|
|
95
97
|
class Models:
|
|
98
|
+
"""@deprecated, use qwen_turbo instead"""
|
|
96
99
|
qwen_v1 = 'qwen-v1'
|
|
100
|
+
"""@deprecated, use qwen_plus instead"""
|
|
97
101
|
qwen_plus_v1 = 'qwen-plus-v1'
|
|
98
102
|
|
|
99
|
-
|
|
100
|
-
|
|
103
|
+
qwen_turbo = 'qwen-turbo'
|
|
104
|
+
qwen_plus = 'qwen-plus'
|
|
105
|
+
qwen_max = 'qwen-max'
|
|
106
|
+
|
|
107
|
+
def __init__(self, history: History = None) -> None:
|
|
101
108
|
"""Init a chat.
|
|
102
109
|
|
|
103
110
|
Args:
|
|
@@ -121,6 +128,8 @@ class Conversation(BaseApi):
|
|
|
121
128
|
n_history: int = -1,
|
|
122
129
|
api_key: str = None,
|
|
123
130
|
messages: List[Message] = None,
|
|
131
|
+
plugins: Union[str, Dict[str, Any]] = None,
|
|
132
|
+
workspace: str = None,
|
|
124
133
|
**kwargs
|
|
125
134
|
) -> Union[ConversationResponse, Generator[ConversationResponse, None,
|
|
126
135
|
None]]:
|
|
@@ -145,25 +154,59 @@ class Conversation(BaseApi):
|
|
|
145
154
|
[{'role': 'user',
|
|
146
155
|
'content': 'The weather is fine today.'},
|
|
147
156
|
{'role': 'assistant', 'content': 'Suitable for outings'}]
|
|
148
|
-
|
|
157
|
+
plugins (Any): The plugin config, Can be plugins config str, or dict.
|
|
158
|
+
**kwargs(qwen-turbo, qwen-plus):
|
|
149
159
|
stream(bool, `optional`): Enable server-sent events
|
|
150
160
|
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
|
|
151
161
|
the result will back partially.
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
162
|
+
temperature(float, `optional`): Used to control the degree
|
|
163
|
+
of randomness and diversity. Specifically, the temperature
|
|
164
|
+
value controls the degree to which the probability distribution
|
|
165
|
+
of each candidate word is smoothed when generating text.
|
|
166
|
+
A higher temperature value will reduce the peak value of
|
|
167
|
+
the probability, allowing more low-probability words to be
|
|
168
|
+
selected, and the generated results will be more diverse;
|
|
169
|
+
while a lower temperature value will enhance the peak value
|
|
170
|
+
of the probability, making it easier for high-probability
|
|
171
|
+
words to be selected, the generated results are more
|
|
172
|
+
deterministic,range(0, 2) .[qwen-turbo,qwen-plus].
|
|
156
173
|
top_p(float, `optional`): A sampling strategy, called nucleus
|
|
157
174
|
sampling, where the model considers the results of the
|
|
158
175
|
tokens with top_p probability mass. So 0.1 means only
|
|
159
176
|
the tokens comprising the top 10% probability mass are
|
|
160
177
|
considered.
|
|
178
|
+
top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
|
|
179
|
+
For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
|
|
180
|
+
in a single generation form a randomly sampled candidate set. # noqa E501
|
|
181
|
+
The larger the value, the higher the randomness generated; # noqa E501
|
|
182
|
+
the smaller the value, the higher the certainty generated. # noqa E501
|
|
183
|
+
The default value is 0, which means the top_k policy is # noqa E501
|
|
184
|
+
not enabled. At this time, only the top_p policy takes effect. # noqa E501
|
|
161
185
|
enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
|
|
162
186
|
Currently works best only on the first round of conversation.
|
|
163
|
-
Default to False.
|
|
187
|
+
Default to False, support model: [qwen-turbo].
|
|
188
|
+
customized_model_id(str, required) The enterprise-specific
|
|
189
|
+
large model id, which needs to be generated from the
|
|
190
|
+
operation background of the enterprise-specific
|
|
191
|
+
large model product, support model: [bailian-v1].
|
|
164
192
|
result_format(str, `optional`): [message|text] Set result result format. # noqa E501
|
|
165
193
|
Default result is text
|
|
166
|
-
|
|
194
|
+
incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
|
|
195
|
+
If true, the subsequent output will include the previously input content. # noqa E501
|
|
196
|
+
Otherwise, the subsequent output will not include the previously output # noqa E501
|
|
197
|
+
content. Default false.
|
|
198
|
+
stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
|
|
199
|
+
when encountering setting str or token ids, the result will not include # noqa E501
|
|
200
|
+
stop words or tokens.
|
|
201
|
+
max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
|
|
202
|
+
noted that the length generated by the model will only be less than max_tokens, # noqa E501
|
|
203
|
+
not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
|
|
204
|
+
directly prompt that the length exceeds the limit. It is generally # noqa E501
|
|
205
|
+
not recommended to set this value.
|
|
206
|
+
repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
|
|
207
|
+
Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
|
|
208
|
+
1.0 means no punishment.
|
|
209
|
+
workspace (str): The dashscope workspace id.
|
|
167
210
|
Raises:
|
|
168
211
|
InputRequired: The prompt cannot be empty.
|
|
169
212
|
InvalidInput: The history and auto_history are mutually exclusive.
|
|
@@ -180,6 +223,13 @@ class Conversation(BaseApi):
|
|
|
180
223
|
if model is None or not model:
|
|
181
224
|
raise ModelRequired('Model is required!')
|
|
182
225
|
task_group, _ = _get_task_group_and_task(__name__)
|
|
226
|
+
if plugins is not None:
|
|
227
|
+
headers = kwargs.pop('headers', {})
|
|
228
|
+
if isinstance(plugins, str):
|
|
229
|
+
headers['X-DashScope-Plugin'] = plugins
|
|
230
|
+
else:
|
|
231
|
+
headers['X-DashScope-Plugin'] = json.dumps(plugins)
|
|
232
|
+
kwargs['headers'] = headers
|
|
183
233
|
input, parameters = self._build_input_parameters(
|
|
184
234
|
model, prompt, history, auto_history, n_history, messages,
|
|
185
235
|
**kwargs)
|
|
@@ -189,6 +239,7 @@ class Conversation(BaseApi):
|
|
|
189
239
|
function='generation',
|
|
190
240
|
api_key=api_key,
|
|
191
241
|
input=input,
|
|
242
|
+
workspace=workspace,
|
|
192
243
|
**parameters)
|
|
193
244
|
is_stream = kwargs.get('stream', False)
|
|
194
245
|
return self._handle_response(prompt, response, is_stream)
|
|
@@ -220,6 +271,14 @@ class Conversation(BaseApi):
|
|
|
220
271
|
|
|
221
272
|
def _build_input_parameters(self, model, prompt, history, auto_history,
|
|
222
273
|
n_history, messages, **kwargs):
|
|
274
|
+
if model == Conversation.Models.qwen_v1:
|
|
275
|
+
logger.warning(
|
|
276
|
+
'Model %s is deprecated, use %s instead!' %
|
|
277
|
+
(Conversation.Models.qwen_v1, Conversation.Models.qwen_turbo))
|
|
278
|
+
if model == Conversation.Models.qwen_plus_v1:
|
|
279
|
+
logger.warning('Model %s is deprecated, use %s instead!' %
|
|
280
|
+
(Conversation.Models.qwen_plus_v1,
|
|
281
|
+
Conversation.Models.qwen_plus))
|
|
223
282
|
parameters = {}
|
|
224
283
|
if history is not None and auto_history:
|
|
225
284
|
raise InvalidInput('auto_history is True, history must None')
|
dashscope/aigc/generation.py
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
1
3
|
import copy
|
|
2
|
-
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any, Dict, Generator, List, Union, AsyncGenerator
|
|
3
6
|
|
|
4
7
|
from dashscope.api_entities.dashscope_response import (GenerationResponse,
|
|
5
8
|
Message, Role)
|
|
6
|
-
from dashscope.client.base_api import BaseApi
|
|
9
|
+
from dashscope.client.base_api import BaseAioApi, BaseApi
|
|
7
10
|
from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
|
|
8
11
|
DEPRECATED_MESSAGE, HISTORY, MESSAGES,
|
|
9
12
|
PROMPT)
|
|
10
13
|
from dashscope.common.error import InputRequired, ModelRequired
|
|
11
14
|
from dashscope.common.logging import logger
|
|
12
15
|
from dashscope.common.utils import _get_task_group_and_task
|
|
16
|
+
from dashscope.utils.param_utils import ParamUtil
|
|
17
|
+
from dashscope.utils.message_utils import merge_single_response
|
|
13
18
|
|
|
14
19
|
|
|
15
20
|
class Generation(BaseApi):
|
|
@@ -18,10 +23,16 @@ class Generation(BaseApi):
|
|
|
18
23
|
|
|
19
24
|
"""
|
|
20
25
|
class Models:
|
|
26
|
+
"""@deprecated, use qwen_turbo instead"""
|
|
21
27
|
qwen_v1 = 'qwen-v1'
|
|
28
|
+
"""@deprecated, use qwen_plus instead"""
|
|
22
29
|
qwen_plus_v1 = 'qwen-plus-v1'
|
|
30
|
+
|
|
23
31
|
bailian_v1 = 'bailian-v1'
|
|
24
32
|
dolly_12b_v2 = 'dolly-12b-v2'
|
|
33
|
+
qwen_turbo = 'qwen-turbo'
|
|
34
|
+
qwen_plus = 'qwen-plus'
|
|
35
|
+
qwen_max = 'qwen-max'
|
|
25
36
|
|
|
26
37
|
@classmethod
|
|
27
38
|
def call(
|
|
@@ -31,12 +42,14 @@ class Generation(BaseApi):
|
|
|
31
42
|
history: list = None,
|
|
32
43
|
api_key: str = None,
|
|
33
44
|
messages: List[Message] = None,
|
|
45
|
+
plugins: Union[str, Dict[str, Any]] = None,
|
|
46
|
+
workspace: str = None,
|
|
34
47
|
**kwargs
|
|
35
48
|
) -> Union[GenerationResponse, Generator[GenerationResponse, None, None]]:
|
|
36
49
|
"""Call generation model service.
|
|
37
50
|
|
|
38
51
|
Args:
|
|
39
|
-
model (str): The requested model, such as
|
|
52
|
+
model (str): The requested model, such as qwen-turbo
|
|
40
53
|
prompt (Any): The input prompt.
|
|
41
54
|
history (list):The user provided history, deprecated
|
|
42
55
|
examples:
|
|
@@ -50,29 +63,59 @@ class Generation(BaseApi):
|
|
|
50
63
|
[{'role': 'user',
|
|
51
64
|
'content': 'The weather is fine today.'},
|
|
52
65
|
{'role': 'assistant', 'content': 'Suitable for outings'}]
|
|
66
|
+
plugins (Any): The plugin config. Can be plugins config str, or dict.
|
|
53
67
|
**kwargs:
|
|
54
68
|
stream(bool, `optional`): Enable server-sent events
|
|
55
69
|
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
|
|
56
|
-
the result will back partially[qwen-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
70
|
+
the result will back partially[qwen-turbo,bailian-v1].
|
|
71
|
+
temperature(float, `optional`): Used to control the degree
|
|
72
|
+
of randomness and diversity. Specifically, the temperature
|
|
73
|
+
value controls the degree to which the probability distribution
|
|
74
|
+
of each candidate word is smoothed when generating text.
|
|
75
|
+
A higher temperature value will reduce the peak value of
|
|
76
|
+
the probability, allowing more low-probability words to be
|
|
77
|
+
selected, and the generated results will be more diverse;
|
|
78
|
+
while a lower temperature value will enhance the peak value
|
|
79
|
+
of the probability, making it easier for high-probability
|
|
80
|
+
words to be selected, the generated results are more
|
|
81
|
+
deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
|
|
61
82
|
top_p(float, `optional`): A sampling strategy, called nucleus
|
|
62
83
|
sampling, where the model considers the results of the
|
|
63
84
|
tokens with top_p probability mass. So 0.1 means only
|
|
64
85
|
the tokens comprising the top 10% probability mass are
|
|
65
|
-
considered[qwen-
|
|
86
|
+
considered[qwen-turbo,bailian-v1].
|
|
87
|
+
top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
|
|
88
|
+
For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
|
|
89
|
+
in a single generation form a randomly sampled candidate set. # noqa E501
|
|
90
|
+
The larger the value, the higher the randomness generated; # noqa E501
|
|
91
|
+
the smaller the value, the higher the certainty generated. # noqa E501
|
|
92
|
+
The default value is 0, which means the top_k policy is # noqa E501
|
|
93
|
+
not enabled. At this time, only the top_p policy takes effect. # noqa E501
|
|
66
94
|
enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
|
|
67
95
|
Currently works best only on the first round of conversation.
|
|
68
|
-
Default to False, support model: [qwen-
|
|
96
|
+
Default to False, support model: [qwen-turbo].
|
|
69
97
|
customized_model_id(str, required) The enterprise-specific
|
|
70
98
|
large model id, which needs to be generated from the
|
|
71
99
|
operation background of the enterprise-specific
|
|
72
100
|
large model product, support model: [bailian-v1].
|
|
73
101
|
result_format(str, `optional`): [message|text] Set result result format. # noqa E501
|
|
74
102
|
Default result is text
|
|
75
|
-
|
|
103
|
+
incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
|
|
104
|
+
If true, the subsequent output will include the previously input content. # noqa E501
|
|
105
|
+
Otherwise, the subsequent output will not include the previously output # noqa E501
|
|
106
|
+
content. Default false.
|
|
107
|
+
stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
|
|
108
|
+
when encountering setting str or token ids, the result will not include # noqa E501
|
|
109
|
+
stop words or tokens.
|
|
110
|
+
max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
|
|
111
|
+
noted that the length generated by the model will only be less than max_tokens, # noqa E501
|
|
112
|
+
not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
|
|
113
|
+
directly prompt that the length exceeds the limit. It is generally # noqa E501
|
|
114
|
+
not recommended to set this value.
|
|
115
|
+
repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
|
|
116
|
+
Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
|
|
117
|
+
1.0 means no punishment.
|
|
118
|
+
workspace (str): The dashscope workspace id.
|
|
76
119
|
Raises:
|
|
77
120
|
InvalidInput: The history and auto_history are mutually exclusive.
|
|
78
121
|
|
|
@@ -87,32 +130,68 @@ class Generation(BaseApi):
|
|
|
87
130
|
if model is None or not model:
|
|
88
131
|
raise ModelRequired('Model is required!')
|
|
89
132
|
task_group, function = _get_task_group_and_task(__name__)
|
|
133
|
+
if plugins is not None:
|
|
134
|
+
headers = kwargs.pop('headers', {})
|
|
135
|
+
if isinstance(plugins, str):
|
|
136
|
+
headers['X-DashScope-Plugin'] = plugins
|
|
137
|
+
else:
|
|
138
|
+
headers['X-DashScope-Plugin'] = json.dumps(plugins)
|
|
139
|
+
kwargs['headers'] = headers
|
|
90
140
|
input, parameters = cls._build_input_parameters(
|
|
91
141
|
model, prompt, history, messages, **kwargs)
|
|
142
|
+
|
|
143
|
+
is_stream = parameters.get('stream', False)
|
|
144
|
+
# Check if we need to merge incremental output
|
|
145
|
+
is_incremental_output = kwargs.get('incremental_output', None)
|
|
146
|
+
to_merge_incremental_output = False
|
|
147
|
+
if (ParamUtil.should_modify_incremental_output(model) and
|
|
148
|
+
is_stream and is_incremental_output is False):
|
|
149
|
+
to_merge_incremental_output = True
|
|
150
|
+
parameters['incremental_output'] = True
|
|
151
|
+
|
|
152
|
+
# Pass incremental_to_full flag via headers user-agent
|
|
153
|
+
if 'headers' not in parameters:
|
|
154
|
+
parameters['headers'] = {}
|
|
155
|
+
flag = '1' if to_merge_incremental_output else '0'
|
|
156
|
+
parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
|
|
157
|
+
|
|
92
158
|
response = super().call(model=model,
|
|
93
159
|
task_group=task_group,
|
|
94
160
|
task=Generation.task,
|
|
95
161
|
function=function,
|
|
96
162
|
api_key=api_key,
|
|
97
163
|
input=input,
|
|
164
|
+
workspace=workspace,
|
|
98
165
|
**parameters)
|
|
99
|
-
is_stream = kwargs.get('stream', False)
|
|
100
166
|
if is_stream:
|
|
101
|
-
|
|
102
|
-
|
|
167
|
+
if to_merge_incremental_output:
|
|
168
|
+
# Extract n parameter for merge logic
|
|
169
|
+
n = parameters.get('n', 1)
|
|
170
|
+
return cls._merge_generation_response(response, n)
|
|
171
|
+
else:
|
|
172
|
+
return (GenerationResponse.from_api_response(rsp)
|
|
173
|
+
for rsp in response)
|
|
103
174
|
else:
|
|
104
175
|
return GenerationResponse.from_api_response(response)
|
|
105
176
|
|
|
106
177
|
@classmethod
|
|
107
178
|
def _build_input_parameters(cls, model, prompt, history, messages,
|
|
108
179
|
**kwargs):
|
|
180
|
+
if model == Generation.Models.qwen_v1:
|
|
181
|
+
logger.warning(
|
|
182
|
+
'Model %s is deprecated, use %s instead!' %
|
|
183
|
+
(Generation.Models.qwen_v1, Generation.Models.qwen_turbo))
|
|
184
|
+
if model == Generation.Models.qwen_plus_v1:
|
|
185
|
+
logger.warning(
|
|
186
|
+
'Model %s is deprecated, use %s instead!' %
|
|
187
|
+
(Generation.Models.qwen_plus_v1, Generation.Models.qwen_plus))
|
|
109
188
|
parameters = {}
|
|
110
189
|
input = {}
|
|
111
190
|
if history is not None:
|
|
112
191
|
logger.warning(DEPRECATED_MESSAGE)
|
|
113
192
|
input[HISTORY] = history
|
|
114
193
|
if prompt is not None and prompt:
|
|
115
|
-
input[PROMPT] = prompt
|
|
194
|
+
input[PROMPT] = prompt
|
|
116
195
|
elif messages is not None:
|
|
117
196
|
msgs = copy.deepcopy(messages)
|
|
118
197
|
if prompt is not None and prompt:
|
|
@@ -120,7 +199,7 @@ class Generation(BaseApi):
|
|
|
120
199
|
input = {MESSAGES: msgs}
|
|
121
200
|
else:
|
|
122
201
|
input[PROMPT] = prompt
|
|
123
|
-
|
|
202
|
+
|
|
124
203
|
if model.startswith('qwen'):
|
|
125
204
|
enable_search = kwargs.pop('enable_search', False)
|
|
126
205
|
if enable_search:
|
|
@@ -133,3 +212,196 @@ class Generation(BaseApi):
|
|
|
133
212
|
input[CUSTOMIZED_MODEL_ID] = customized_model_id
|
|
134
213
|
|
|
135
214
|
return input, {**parameters, **kwargs}
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def _merge_generation_response(cls, response, n=1) -> Generator[GenerationResponse, None, None]:
|
|
218
|
+
"""Merge incremental response chunks to simulate non-incremental output."""
|
|
219
|
+
accumulated_data = {}
|
|
220
|
+
for rsp in response:
|
|
221
|
+
parsed_response = GenerationResponse.from_api_response(rsp)
|
|
222
|
+
result = merge_single_response(parsed_response, accumulated_data, n)
|
|
223
|
+
if result is True:
|
|
224
|
+
yield parsed_response
|
|
225
|
+
elif isinstance(result, list):
|
|
226
|
+
# Multiple responses to yield (for n>1 non-stop cases)
|
|
227
|
+
for resp in result:
|
|
228
|
+
yield resp
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class AioGeneration(BaseAioApi):
|
|
232
|
+
task = 'text-generation'
|
|
233
|
+
"""API for AI-Generated Content(AIGC) models.
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
class Models:
|
|
237
|
+
"""@deprecated, use qwen_turbo instead"""
|
|
238
|
+
qwen_v1 = 'qwen-v1'
|
|
239
|
+
"""@deprecated, use qwen_plus instead"""
|
|
240
|
+
qwen_plus_v1 = 'qwen-plus-v1'
|
|
241
|
+
|
|
242
|
+
bailian_v1 = 'bailian-v1'
|
|
243
|
+
dolly_12b_v2 = 'dolly-12b-v2'
|
|
244
|
+
qwen_turbo = 'qwen-turbo'
|
|
245
|
+
qwen_plus = 'qwen-plus'
|
|
246
|
+
qwen_max = 'qwen-max'
|
|
247
|
+
|
|
248
|
+
@classmethod
|
|
249
|
+
async def call(
|
|
250
|
+
cls,
|
|
251
|
+
model: str,
|
|
252
|
+
prompt: Any = None,
|
|
253
|
+
history: list = None,
|
|
254
|
+
api_key: str = None,
|
|
255
|
+
messages: List[Message] = None,
|
|
256
|
+
plugins: Union[str, Dict[str, Any]] = None,
|
|
257
|
+
workspace: str = None,
|
|
258
|
+
**kwargs
|
|
259
|
+
) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
|
|
260
|
+
"""Call generation model service.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
model (str): The requested model, such as qwen-turbo
|
|
264
|
+
prompt (Any): The input prompt.
|
|
265
|
+
history (list):The user provided history, deprecated
|
|
266
|
+
examples:
|
|
267
|
+
[{'user':'The weather is fine today.',
|
|
268
|
+
'bot': 'Suitable for outings'}].
|
|
269
|
+
Defaults to None.
|
|
270
|
+
api_key (str, optional): The api api_key, can be None,
|
|
271
|
+
if None, will get by default rule(TODO: api key doc).
|
|
272
|
+
messages (list): The generation messages.
|
|
273
|
+
examples:
|
|
274
|
+
[{'role': 'user',
|
|
275
|
+
'content': 'The weather is fine today.'},
|
|
276
|
+
{'role': 'assistant', 'content': 'Suitable for outings'}]
|
|
277
|
+
plugins (Any): The plugin config. Can be plugins config str, or dict.
|
|
278
|
+
**kwargs:
|
|
279
|
+
stream(bool, `optional`): Enable server-sent events
|
|
280
|
+
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
|
|
281
|
+
the result will back partially[qwen-turbo,bailian-v1].
|
|
282
|
+
temperature(float, `optional`): Used to control the degree
|
|
283
|
+
of randomness and diversity. Specifically, the temperature
|
|
284
|
+
value controls the degree to which the probability distribution
|
|
285
|
+
of each candidate word is smoothed when generating text.
|
|
286
|
+
A higher temperature value will reduce the peak value of
|
|
287
|
+
the probability, allowing more low-probability words to be
|
|
288
|
+
selected, and the generated results will be more diverse;
|
|
289
|
+
while a lower temperature value will enhance the peak value
|
|
290
|
+
of the probability, making it easier for high-probability
|
|
291
|
+
words to be selected, the generated results are more
|
|
292
|
+
deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
|
|
293
|
+
top_p(float, `optional`): A sampling strategy, called nucleus
|
|
294
|
+
sampling, where the model considers the results of the
|
|
295
|
+
tokens with top_p probability mass. So 0.1 means only
|
|
296
|
+
the tokens comprising the top 10% probability mass are
|
|
297
|
+
considered[qwen-turbo,bailian-v1].
|
|
298
|
+
top_k(int, `optional`): The size of the sample candidate set when generated. # noqa E501
|
|
299
|
+
For example, when the value is 50, only the 50 highest-scoring tokens # noqa E501
|
|
300
|
+
in a single generation form a randomly sampled candidate set. # noqa E501
|
|
301
|
+
The larger the value, the higher the randomness generated; # noqa E501
|
|
302
|
+
the smaller the value, the higher the certainty generated. # noqa E501
|
|
303
|
+
The default value is 0, which means the top_k policy is # noqa E501
|
|
304
|
+
not enabled. At this time, only the top_p policy takes effect. # noqa E501
|
|
305
|
+
enable_search(bool, `optional`): Whether to enable web search(quark). # noqa E501
|
|
306
|
+
Currently works best only on the first round of conversation.
|
|
307
|
+
Default to False, support model: [qwen-turbo].
|
|
308
|
+
customized_model_id(str, required) The enterprise-specific
|
|
309
|
+
large model id, which needs to be generated from the
|
|
310
|
+
operation background of the enterprise-specific
|
|
311
|
+
large model product, support model: [bailian-v1].
|
|
312
|
+
result_format(str, `optional`): [message|text] Set result result format. # noqa E501
|
|
313
|
+
Default result is text
|
|
314
|
+
incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
|
|
315
|
+
If true, the subsequent output will include the previously input content. # noqa E501
|
|
316
|
+
Otherwise, the subsequent output will not include the previously output # noqa E501
|
|
317
|
+
content. Default false.
|
|
318
|
+
stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop # noqa E501
|
|
319
|
+
when encountering setting str or token ids, the result will not include # noqa E501
|
|
320
|
+
stop words or tokens.
|
|
321
|
+
max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
|
|
322
|
+
noted that the length generated by the model will only be less than max_tokens, # noqa E501
|
|
323
|
+
not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
|
|
324
|
+
directly prompt that the length exceeds the limit. It is generally # noqa E501
|
|
325
|
+
not recommended to set this value.
|
|
326
|
+
repetition_penalty(float, `optional`): Used to control the repeatability when generating models. # noqa E501
|
|
327
|
+
Increasing repetition_penalty can reduce the duplication of model generation. # noqa E501
|
|
328
|
+
1.0 means no punishment.
|
|
329
|
+
workspace (str): The dashscope workspace id.
|
|
330
|
+
Raises:
|
|
331
|
+
InvalidInput: The history and auto_history are mutually exclusive.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
Union[GenerationResponse,
|
|
335
|
+
AsyncGenerator[GenerationResponse, None]]: If
|
|
336
|
+
stream is True, return AsyncGenerator, otherwise GenerationResponse.
|
|
337
|
+
"""
|
|
338
|
+
if (prompt is None or not prompt) and (messages is None
|
|
339
|
+
or not messages):
|
|
340
|
+
raise InputRequired('prompt or messages is required!')
|
|
341
|
+
if model is None or not model:
|
|
342
|
+
raise ModelRequired('Model is required!')
|
|
343
|
+
task_group, function = _get_task_group_and_task(__name__)
|
|
344
|
+
if plugins is not None:
|
|
345
|
+
headers = kwargs.pop('headers', {})
|
|
346
|
+
if isinstance(plugins, str):
|
|
347
|
+
headers['X-DashScope-Plugin'] = plugins
|
|
348
|
+
else:
|
|
349
|
+
headers['X-DashScope-Plugin'] = json.dumps(plugins)
|
|
350
|
+
kwargs['headers'] = headers
|
|
351
|
+
input, parameters = Generation._build_input_parameters(
|
|
352
|
+
model, prompt, history, messages, **kwargs)
|
|
353
|
+
|
|
354
|
+
is_stream = parameters.get('stream', False)
|
|
355
|
+
# Check if we need to merge incremental output
|
|
356
|
+
is_incremental_output = kwargs.get('incremental_output', None)
|
|
357
|
+
to_merge_incremental_output = False
|
|
358
|
+
if (ParamUtil.should_modify_incremental_output(model) and
|
|
359
|
+
is_stream and is_incremental_output is False):
|
|
360
|
+
to_merge_incremental_output = True
|
|
361
|
+
parameters['incremental_output'] = True
|
|
362
|
+
|
|
363
|
+
# Pass incremental_to_full flag via headers user-agent
|
|
364
|
+
if 'headers' not in parameters:
|
|
365
|
+
parameters['headers'] = {}
|
|
366
|
+
flag = '1' if to_merge_incremental_output else '0'
|
|
367
|
+
parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
|
|
368
|
+
|
|
369
|
+
response = await super().call(model=model,
|
|
370
|
+
task_group=task_group,
|
|
371
|
+
task=Generation.task,
|
|
372
|
+
function=function,
|
|
373
|
+
api_key=api_key,
|
|
374
|
+
input=input,
|
|
375
|
+
workspace=workspace,
|
|
376
|
+
**parameters)
|
|
377
|
+
if is_stream:
|
|
378
|
+
if to_merge_incremental_output:
|
|
379
|
+
# Extract n parameter for merge logic
|
|
380
|
+
n = parameters.get('n', 1)
|
|
381
|
+
return cls._merge_generation_response(response, n)
|
|
382
|
+
else:
|
|
383
|
+
return cls._stream_responses(response)
|
|
384
|
+
else:
|
|
385
|
+
return GenerationResponse.from_api_response(response)
|
|
386
|
+
|
|
387
|
+
@classmethod
|
|
388
|
+
async def _stream_responses(cls, response) -> AsyncGenerator[GenerationResponse, None]:
|
|
389
|
+
"""Convert async response stream to GenerationResponse stream."""
|
|
390
|
+
# Type hint: when stream=True, response is actually an AsyncIterable
|
|
391
|
+
async for rsp in response: # type: ignore
|
|
392
|
+
yield GenerationResponse.from_api_response(rsp)
|
|
393
|
+
|
|
394
|
+
@classmethod
|
|
395
|
+
async def _merge_generation_response(cls, response, n=1) -> AsyncGenerator[GenerationResponse, None]:
|
|
396
|
+
"""Async version of merge incremental response chunks."""
|
|
397
|
+
accumulated_data = {}
|
|
398
|
+
|
|
399
|
+
async for rsp in response: # type: ignore
|
|
400
|
+
parsed_response = GenerationResponse.from_api_response(rsp)
|
|
401
|
+
result = merge_single_response(parsed_response, accumulated_data, n)
|
|
402
|
+
if result is True:
|
|
403
|
+
yield parsed_response
|
|
404
|
+
elif isinstance(result, list):
|
|
405
|
+
# Multiple responses to yield (for n>1 non-stop cases)
|
|
406
|
+
for resp in result:
|
|
407
|
+
yield resp
|