dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,17 @@
1
- from typing import Generator, List, Union
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- from dashscope.api_entities.dashscope_response import (MultiModalConversationResponse)
4
- from dashscope.client.base_api import BaseApi
3
+ import copy
4
+ from typing import AsyncGenerator, Generator, List, Union
5
+
6
+ from dashscope.api_entities.dashscope_response import \
7
+ MultiModalConversationResponse
8
+ from dashscope.client.base_api import BaseAioApi, BaseApi
5
9
  from dashscope.common.error import InputRequired, ModelRequired
6
10
  from dashscope.common.utils import _get_task_group_and_task
11
+ from dashscope.utils.oss_utils import preprocess_message_element
12
+ from dashscope.utils.param_utils import ParamUtil
13
+ from dashscope.utils.message_utils import merge_multimodal_single_response
14
+
7
15
 
8
16
  class MultiModalConversation(BaseApi):
9
17
  """MultiModal conversational robot interface.
@@ -18,10 +26,15 @@ class MultiModalConversation(BaseApi):
18
26
  def call(
19
27
  cls,
20
28
  model: str,
21
- messages: List,
29
+ messages: List = None,
22
30
  api_key: str = None,
31
+ workspace: str = None,
32
+ text: str = None,
33
+ voice: str = None,
34
+ language_type: str = None,
23
35
  **kwargs
24
- ) -> Union[MultiModalConversationResponse, Generator[MultiModalConversationResponse, None, None]]:
36
+ ) -> Union[MultiModalConversationResponse, Generator[
37
+ MultiModalConversationResponse, None, None]]:
25
38
  """Call the conversation model service.
26
39
 
27
40
  Args:
@@ -46,20 +59,26 @@ class MultiModalConversation(BaseApi):
46
59
  api_key (str, optional): The api api_key, can be None,
47
60
  if None, will retrieve by rule [1].
48
61
  [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
62
+ workspace (str): The dashscope workspace id.
63
+ text (str): The text to generate.
64
+ voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
65
+ you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
66
+ language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
49
67
  **kwargs:
50
68
  stream(bool, `optional`): Enable server-sent events
51
69
  (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
52
- the result will back partially[qwen-v1,bailian-v1].
70
+ the result will back partially[qwen-turbo,bailian-v1].
53
71
  max_length(int, `optional`): The maximum length of tokens to
54
72
  generate. The token count of your prompt plus max_length
55
73
  cannot exceed the model's context length. Most models
56
- have a context length of 2000 tokens[qwen-v1,bailian-v1]. # noqa E501
74
+ have a context length of 2000 tokens[qwen-turbo,bailian-v1]. # noqa E501
57
75
  top_p(float, `optional`): A sampling strategy, called nucleus
58
76
  sampling, where the model considers the results of the
59
77
  tokens with top_p probability mass. So 0.1 means only
60
78
  the tokens comprising the top 10% probability mass are
61
- considered[qwen-v1,bailian-v1].
62
- top_k(float, `optional`):
79
+ considered[qwen-turbo,bailian-v1].
80
+ top_k(float, `optional`):
81
+
63
82
 
64
83
  Raises:
65
84
  InvalidInput: The history and auto_history are mutually exclusive.
@@ -69,22 +88,288 @@ class MultiModalConversation(BaseApi):
69
88
  Generator[MultiModalConversationResponse, None, None]]: If
70
89
  stream is True, return Generator, otherwise MultiModalConversationResponse.
71
90
  """
72
- if (messages is None or not messages):
73
- raise InputRequired('prompt or messages is required!')
74
91
  if model is None or not model:
75
92
  raise ModelRequired('Model is required!')
76
93
  task_group, _ = _get_task_group_and_task(__name__)
77
- input = {'messages': messages}
94
+ input = {}
95
+ msg_copy = None
96
+
97
+ if messages is not None and messages:
98
+ msg_copy = copy.deepcopy(messages)
99
+ has_upload = cls._preprocess_messages(model, msg_copy, api_key)
100
+ if has_upload:
101
+ headers = kwargs.pop('headers', {})
102
+ headers['X-DashScope-OssResourceResolve'] = 'enable'
103
+ kwargs['headers'] = headers
104
+
105
+ if text is not None and text:
106
+ input.update({'text': text})
107
+ if voice is not None and voice:
108
+ input.update({'voice': voice})
109
+ if language_type is not None and language_type:
110
+ input.update({'language_type': language_type})
111
+ if msg_copy is not None:
112
+ input.update({'messages': msg_copy})
113
+
114
+ # Check if we need to merge incremental output
115
+ is_incremental_output = kwargs.get('incremental_output', None)
116
+ to_merge_incremental_output = False
117
+ is_stream = kwargs.get('stream', False)
118
+ if (ParamUtil.should_modify_incremental_output(model) and
119
+ is_stream and is_incremental_output is not None and is_incremental_output is False):
120
+ to_merge_incremental_output = True
121
+ kwargs['incremental_output'] = True
122
+
123
+ # Pass incremental_to_full flag via headers user-agent
124
+ if 'headers' not in kwargs:
125
+ kwargs['headers'] = {}
126
+ flag = '1' if to_merge_incremental_output else '0'
127
+ kwargs['headers']['user-agent'] = f'incremental_to_full/{flag}'
128
+
78
129
  response = super().call(model=model,
79
130
  task_group=task_group,
80
131
  task=MultiModalConversation.task,
81
132
  function=MultiModalConversation.function,
82
133
  api_key=api_key,
83
134
  input=input,
135
+ workspace=workspace,
84
136
  **kwargs)
137
+ if is_stream:
138
+ if to_merge_incremental_output:
139
+ # Extract n parameter for merge logic
140
+ n = kwargs.get('n', 1)
141
+ return cls._merge_multimodal_response(response, n)
142
+ else:
143
+ return (MultiModalConversationResponse.from_api_response(rsp)
144
+ for rsp in response)
145
+ else:
146
+ return MultiModalConversationResponse.from_api_response(response)
147
+
148
+ @classmethod
149
+ def _preprocess_messages(cls, model: str, messages: List[dict],
150
+ api_key: str):
151
+ """
152
+ messages = [
153
+ {
154
+ "role": "user",
155
+ "content": [
156
+ {"image": ""},
157
+ {"text": ""},
158
+ ]
159
+ }
160
+ ]
161
+ """
162
+ has_upload = False
163
+ upload_certificate = None
164
+
165
+ for message in messages:
166
+ content = message['content']
167
+ for elem in content:
168
+ if not isinstance(elem,
169
+ (int, float, bool, str, bytes, bytearray)):
170
+ is_upload, upload_certificate = preprocess_message_element(
171
+ model, elem, api_key, upload_certificate)
172
+ if is_upload and not has_upload:
173
+ has_upload = True
174
+ return has_upload
175
+
176
+ @classmethod
177
+ def _merge_multimodal_response(cls, response, n=1) -> Generator[MultiModalConversationResponse, None, None]:
178
+ """Merge incremental response chunks to simulate non-incremental output."""
179
+ accumulated_data = {}
180
+
181
+ for rsp in response:
182
+ parsed_response = MultiModalConversationResponse.from_api_response(rsp)
183
+ result = merge_multimodal_single_response(parsed_response, accumulated_data, n)
184
+ if result is True:
185
+ yield parsed_response
186
+ elif isinstance(result, list):
187
+ # Multiple responses to yield (for n>1 non-stop cases)
188
+ for resp in result:
189
+ yield resp
190
+
191
+
192
+ class AioMultiModalConversation(BaseAioApi):
193
+ """Async MultiModal conversational robot interface.
194
+ """
195
+ task = 'multimodal-generation'
196
+ function = 'generation'
197
+
198
+ class Models:
199
+ qwen_vl_chat_v1 = 'qwen-vl-chat-v1'
200
+
201
+ @classmethod
202
+ async def call(
203
+ cls,
204
+ model: str,
205
+ messages: List = None,
206
+ api_key: str = None,
207
+ workspace: str = None,
208
+ text: str = None,
209
+ voice: str = None,
210
+ language_type: str = None,
211
+ **kwargs
212
+ ) -> Union[MultiModalConversationResponse, AsyncGenerator[
213
+ MultiModalConversationResponse, None]]:
214
+ """Call the conversation model service asynchronously.
215
+
216
+ Args:
217
+ model (str): The requested model, such as 'qwen-multimodal-v1'
218
+ messages (list): The generation messages.
219
+ examples:
220
+ [
221
+ {
222
+ "role": "system",
223
+ "content": [
224
+ {"text": "你是达摩院的生活助手机器人。"}
225
+ ]
226
+ },
227
+ {
228
+ "role": "user",
229
+ "content": [
230
+ {"image": "http://XXXX"},
231
+ {"text": "这个图片是哪里?"},
232
+ ]
233
+ }
234
+ ]
235
+ api_key (str, optional): The api api_key, can be None,
236
+ if None, will retrieve by rule [1].
237
+ [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
238
+ workspace (str): The dashscope workspace id.
239
+ text (str): The text to generate.
240
+ voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
241
+ you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
242
+ language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
243
+ **kwargs:
244
+ stream(bool, `optional`): Enable server-sent events
245
+ (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
246
+ the result will back partially[qwen-turbo,bailian-v1].
247
+ max_length(int, `optional`): The maximum length of tokens to
248
+ generate. The token count of your prompt plus max_length
249
+ cannot exceed the model's context length. Most models
250
+ have a context length of 2000 tokens[qwen-turbo,bailian-v1]. # noqa E501
251
+ top_p(float, `optional`): A sampling strategy, called nucleus
252
+ sampling, where the model considers the results of the
253
+ tokens with top_p probability mass. So 0.1 means only
254
+ the tokens comprising the top 10% probability mass are
255
+ considered[qwen-turbo,bailian-v1].
256
+ top_k(float, `optional`):
257
+
258
+ Raises:
259
+ InvalidInput: The history and auto_history are mutually exclusive.
260
+
261
+ Returns:
262
+ Union[MultiModalConversationResponse,
263
+ AsyncGenerator[MultiModalConversationResponse, None]]: If
264
+ stream is True, return AsyncGenerator, otherwise MultiModalConversationResponse.
265
+ """
266
+ if model is None or not model:
267
+ raise ModelRequired('Model is required!')
268
+ task_group, _ = _get_task_group_and_task(__name__)
269
+ input = {}
270
+ msg_copy = None
271
+
272
+ if messages is not None and messages:
273
+ msg_copy = copy.deepcopy(messages)
274
+ has_upload = cls._preprocess_messages(model, msg_copy, api_key)
275
+ if has_upload:
276
+ headers = kwargs.pop('headers', {})
277
+ headers['X-DashScope-OssResourceResolve'] = 'enable'
278
+ kwargs['headers'] = headers
279
+
280
+ if text is not None and text:
281
+ input.update({'text': text})
282
+ if voice is not None and voice:
283
+ input.update({'voice': voice})
284
+ if language_type is not None and language_type:
285
+ input.update({'language_type': language_type})
286
+ if msg_copy is not None:
287
+ input.update({'messages': msg_copy})
288
+
289
+ # Check if we need to merge incremental output
290
+ is_incremental_output = kwargs.get('incremental_output', None)
291
+ to_merge_incremental_output = False
85
292
  is_stream = kwargs.get('stream', False)
293
+ if (ParamUtil.should_modify_incremental_output(model) and
294
+ is_stream and is_incremental_output is not None and is_incremental_output is False):
295
+ to_merge_incremental_output = True
296
+ kwargs['incremental_output'] = True
297
+
298
+ # Pass incremental_to_full flag via headers user-agent
299
+ if 'headers' not in kwargs:
300
+ kwargs['headers'] = {}
301
+ flag = '1' if to_merge_incremental_output else '0'
302
+ kwargs['headers']['user-agent'] = (
303
+ kwargs['headers'].get('user-agent', '') +
304
+ f'; incremental_to_full/{flag}'
305
+ )
306
+
307
+ response = await super().call(model=model,
308
+ task_group=task_group,
309
+ task=AioMultiModalConversation.task,
310
+ function=AioMultiModalConversation.function,
311
+ api_key=api_key,
312
+ input=input,
313
+ workspace=workspace,
314
+ **kwargs)
86
315
  if is_stream:
87
- return (MultiModalConversationResponse.from_api_response(rsp)
88
- for rsp in response)
316
+ if to_merge_incremental_output:
317
+ # Extract n parameter for merge logic
318
+ n = kwargs.get('n', 1)
319
+ return cls._merge_multimodal_response(response, n)
320
+ else:
321
+ return cls._stream_responses(response)
89
322
  else:
90
323
  return MultiModalConversationResponse.from_api_response(response)
324
+
325
+ @classmethod
326
+ def _preprocess_messages(cls, model: str, messages: List[dict],
327
+ api_key: str):
328
+ """
329
+ messages = [
330
+ {
331
+ "role": "user",
332
+ "content": [
333
+ {"image": ""},
334
+ {"text": ""},
335
+ ]
336
+ }
337
+ ]
338
+ """
339
+ has_upload = False
340
+ upload_certificate = None
341
+
342
+ for message in messages:
343
+ content = message['content']
344
+ for elem in content:
345
+ if not isinstance(elem,
346
+ (int, float, bool, str, bytes, bytearray)):
347
+ is_upload, upload_certificate = preprocess_message_element(
348
+ model, elem, api_key, upload_certificate)
349
+ if is_upload and not has_upload:
350
+ has_upload = True
351
+ return has_upload
352
+
353
+ @classmethod
354
+ async def _stream_responses(cls, response) -> AsyncGenerator[MultiModalConversationResponse, None]:
355
+ """Convert async response stream to MultiModalConversationResponse stream."""
356
+ # Type hint: when stream=True, response is actually an AsyncIterable
357
+ async for rsp in response: # type: ignore
358
+ yield MultiModalConversationResponse.from_api_response(rsp)
359
+
360
+ @classmethod
361
+ async def _merge_multimodal_response(cls, response, n=1) -> AsyncGenerator[MultiModalConversationResponse, None]:
362
+ """Async version of merge incremental response chunks."""
363
+ accumulated_data = {}
364
+
365
+ async for rsp in response:
366
+ parsed_response = MultiModalConversationResponse.from_api_response(rsp)
367
+ result = merge_multimodal_single_response(parsed_response, accumulated_data, n)
368
+ if result is True:
369
+ yield parsed_response
370
+ elif isinstance(result, list):
371
+ # Multiple responses to yield (for n>1 non-stop cases)
372
+ for resp in result:
373
+ yield resp
374
+
375
+