dashscope 1.12.0__py3-none-any.whl → 1.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/__init__.py +8 -27
- dashscope/aigc/code_generation.py +14 -17
- dashscope/aigc/conversation.py +26 -20
- dashscope/aigc/generation.py +11 -1
- dashscope/aigc/multimodal_conversation.py +39 -4
- dashscope/api_entities/api_request_data.py +2 -2
- dashscope/api_entities/api_request_factory.py +4 -10
- dashscope/api_entities/dashscope_response.py +18 -9
- dashscope/audio/asr/__init__.py +5 -1
- dashscope/audio/asr/asr_phrase_manager.py +179 -0
- dashscope/audio/asr/recognition.py +61 -3
- dashscope/audio/asr/transcription.py +55 -2
- dashscope/client/base_api.py +13 -8
- dashscope/common/constants.py +5 -2
- dashscope/common/error.py +4 -0
- dashscope/common/utils.py +12 -2
- dashscope/embeddings/batch_text_embedding.py +3 -2
- dashscope/embeddings/multimodal_embedding.py +37 -9
- dashscope/embeddings/text_embedding.py +1 -0
- dashscope/finetune.py +2 -0
- dashscope/nlp/understanding.py +11 -16
- dashscope/tokenizers/__init__.py +1 -1
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/oss_utils.py +133 -0
- dashscope/version.py +1 -1
- {dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/METADATA +1 -1
- dashscope-1.13.1.dist-info/RECORD +59 -0
- dashscope-1.12.0.dist-info/RECORD +0 -56
- {dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/LICENSE +0 -0
- {dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/WHEEL +0 -0
- {dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/entry_points.txt +0 -0
- {dashscope-1.12.0.dist-info → dashscope-1.13.1.dist-info}/top_level.txt +0 -0
|
@@ -112,8 +112,17 @@ class Recognition(BaseApi):
|
|
|
112
112
|
sample_rate (int): The input audio sample rate for speech recognition.
|
|
113
113
|
|
|
114
114
|
**kwargs:
|
|
115
|
+
phrase_id (list, `optional`): The ID of phrase.
|
|
115
116
|
disfluency_removal_enabled(bool, `optional`): Filter mood words,
|
|
116
117
|
turned off by default.
|
|
118
|
+
diarization_enabled (bool, `optional`): Speech auto diarization,
|
|
119
|
+
turned off by default.
|
|
120
|
+
speaker_count (int, `optional`): The number of speakers.
|
|
121
|
+
timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
|
|
122
|
+
calibration, turned off by default.
|
|
123
|
+
special_word_filter(str, `optional`): Sensitive word filter.
|
|
124
|
+
audio_event_detection_enabled(bool, `optional`):
|
|
125
|
+
Audio event detection, turned off by default.
|
|
117
126
|
|
|
118
127
|
Raises:
|
|
119
128
|
InputRequired: Input is required.
|
|
@@ -190,6 +199,15 @@ class Recognition(BaseApi):
|
|
|
190
199
|
def __launch_request(self):
|
|
191
200
|
"""Initiate real-time speech recognition requests.
|
|
192
201
|
"""
|
|
202
|
+
resources_list: list = []
|
|
203
|
+
if self._phrase is not None and len(self._phrase) > 0:
|
|
204
|
+
item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
|
|
205
|
+
resources_list.append(item)
|
|
206
|
+
|
|
207
|
+
if len(resources_list) > 0:
|
|
208
|
+
self._kwargs['resources'] = resources_list
|
|
209
|
+
|
|
210
|
+
self._tidy_kwargs()
|
|
193
211
|
task_name, _ = _get_task_group_and_task(__name__)
|
|
194
212
|
responses = super().call(model=self.model,
|
|
195
213
|
task_group='audio',
|
|
@@ -205,10 +223,25 @@ class Recognition(BaseApi):
|
|
|
205
223
|
**self._kwargs)
|
|
206
224
|
return responses
|
|
207
225
|
|
|
208
|
-
def start(self):
|
|
226
|
+
def start(self, phrase_id: str = None, **kwargs):
|
|
209
227
|
"""Real-time speech recognition in asynchronous mode.
|
|
210
228
|
Please call 'stop()' after you have completed recognition.
|
|
211
229
|
|
|
230
|
+
Args:
|
|
231
|
+
phrase_id (str, `optional`): The ID of phrase.
|
|
232
|
+
|
|
233
|
+
**kwargs:
|
|
234
|
+
disfluency_removal_enabled(bool, `optional`):
|
|
235
|
+
Filter mood words, turned off by default.
|
|
236
|
+
diarization_enabled (bool, `optional`):
|
|
237
|
+
Speech auto diarization, turned off by default.
|
|
238
|
+
speaker_count (int, `optional`): The number of speakers.
|
|
239
|
+
timestamp_alignment_enabled (bool, `optional`):
|
|
240
|
+
Timestamp-alignment calibration, turned off by default.
|
|
241
|
+
special_word_filter(str, `optional`): Sensitive word filter.
|
|
242
|
+
audio_event_detection_enabled(bool, `optional`):
|
|
243
|
+
Audio event detection, turned off by default.
|
|
244
|
+
|
|
212
245
|
Raises:
|
|
213
246
|
InvalidParameter: This interface cannot be called again
|
|
214
247
|
if it has already been started.
|
|
@@ -219,6 +252,8 @@ class Recognition(BaseApi):
|
|
|
219
252
|
if self._running:
|
|
220
253
|
raise InvalidParameter('Speech recognition has started.')
|
|
221
254
|
|
|
255
|
+
self._phrase = phrase_id
|
|
256
|
+
self._kwargs.update(**kwargs)
|
|
222
257
|
self._recognition_once = False
|
|
223
258
|
self._worker = threading.Thread(target=self.__receive_worker)
|
|
224
259
|
self._worker.start()
|
|
@@ -234,11 +269,27 @@ class Recognition(BaseApi):
|
|
|
234
269
|
self._running = False
|
|
235
270
|
raise InvalidTask('Invalid task, task create failed.')
|
|
236
271
|
|
|
237
|
-
def call(self,
|
|
272
|
+
def call(self,
|
|
273
|
+
file: str,
|
|
274
|
+
phrase_id: str = None,
|
|
275
|
+
**kwargs) -> RecognitionResult:
|
|
238
276
|
"""Real-time speech recognition in synchronous mode.
|
|
239
277
|
|
|
240
278
|
Args:
|
|
241
279
|
file (str): The path to the local audio file.
|
|
280
|
+
phrase_id (str, `optional`): The ID of phrase.
|
|
281
|
+
|
|
282
|
+
**kwargs:
|
|
283
|
+
disfluency_removal_enabled(bool, `optional`):
|
|
284
|
+
Filter mood words, turned off by default.
|
|
285
|
+
diarization_enabled (bool, `optional`):
|
|
286
|
+
Speech auto diarization, turned off by default.
|
|
287
|
+
speaker_count (int, `optional`): The number of speakers.
|
|
288
|
+
timestamp_alignment_enabled (bool, `optional`):
|
|
289
|
+
Timestamp-alignment calibration, turned off by default.
|
|
290
|
+
special_word_filter(str, `optional`): Sensitive word filter.
|
|
291
|
+
audio_event_detection_enabled(bool, `optional`):
|
|
292
|
+
Audio event detection, turned off by default.
|
|
242
293
|
|
|
243
294
|
Raises:
|
|
244
295
|
InvalidParameter: This interface cannot be called again
|
|
@@ -258,12 +309,14 @@ class Recognition(BaseApi):
|
|
|
258
309
|
raise FileNotFoundError('No such file or directory: ' + file)
|
|
259
310
|
|
|
260
311
|
self._recognition_once = True
|
|
312
|
+
self._stream_data.clear()
|
|
313
|
+
self._phrase = phrase_id
|
|
314
|
+
self._kwargs.update(**kwargs)
|
|
261
315
|
error_flag: bool = False
|
|
262
316
|
sentences: List[Any] = []
|
|
263
317
|
usages: List[Any] = []
|
|
264
318
|
response: RecognitionResponse = None
|
|
265
319
|
result: RecognitionResult = None
|
|
266
|
-
self._stream_data.clear()
|
|
267
320
|
|
|
268
321
|
try:
|
|
269
322
|
audio_data: bytes = None
|
|
@@ -349,6 +402,11 @@ class Recognition(BaseApi):
|
|
|
349
402
|
|
|
350
403
|
self._stream_data = self._stream_data + [buffer]
|
|
351
404
|
|
|
405
|
+
def _tidy_kwargs(self):
|
|
406
|
+
for k in self._kwargs.copy():
|
|
407
|
+
if self._kwargs[k] is None:
|
|
408
|
+
self._kwargs.pop(k, None)
|
|
409
|
+
|
|
352
410
|
def _input_stream_cycle(self):
|
|
353
411
|
while self._running:
|
|
354
412
|
while len(self._stream_data) == 0:
|
|
@@ -27,6 +27,7 @@ class Transcription(BaseAsyncApi):
|
|
|
27
27
|
def call(cls,
|
|
28
28
|
model: str,
|
|
29
29
|
file_urls: List[str],
|
|
30
|
+
phrase_id: str = None,
|
|
30
31
|
api_key: str = None,
|
|
31
32
|
**kwargs) -> TranscriptionResponse:
|
|
32
33
|
"""Transcribe the given files synchronously.
|
|
@@ -34,11 +35,27 @@ class Transcription(BaseAsyncApi):
|
|
|
34
35
|
Args:
|
|
35
36
|
model (str): The requested model_id.
|
|
36
37
|
file_urls (List[str]): List of stored URLs.
|
|
37
|
-
|
|
38
|
+
phrase_id (str, `optional`): The ID of phrase.
|
|
39
|
+
|
|
40
|
+
**kwargs:
|
|
41
|
+
channel_id (List[int], optional):
|
|
42
|
+
The selected channel_id of audio file.
|
|
43
|
+
disfluency_removal_enabled(bool, `optional`):
|
|
44
|
+
Filter mood words, turned off by default.
|
|
45
|
+
diarization_enabled (bool, `optional`):
|
|
46
|
+
Speech auto diarization, turned off by default.
|
|
47
|
+
speaker_count (int, `optional`): The number of speakers.
|
|
48
|
+
timestamp_alignment_enabled (bool, `optional`):
|
|
49
|
+
Timestamp-alignment calibration, turned off by default.
|
|
50
|
+
special_word_filter(str, `optional`): Sensitive word filter.
|
|
51
|
+
audio_event_detection_enabled(bool, `optional`):
|
|
52
|
+
Audio event detection, turned off by default.
|
|
38
53
|
|
|
39
54
|
Returns:
|
|
40
55
|
TranscriptionResponse: The result of batch transcription.
|
|
41
56
|
"""
|
|
57
|
+
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
|
|
58
|
+
kwargs = cls._tidy_kwargs(**kwargs)
|
|
42
59
|
response = super().call(model, file_urls, api_key=api_key, **kwargs)
|
|
43
60
|
return TranscriptionResponse.from_api_response(response)
|
|
44
61
|
|
|
@@ -46,6 +63,7 @@ class Transcription(BaseAsyncApi):
|
|
|
46
63
|
def async_call(cls,
|
|
47
64
|
model: str,
|
|
48
65
|
file_urls: List[str],
|
|
66
|
+
phrase_id: str = None,
|
|
49
67
|
api_key: str = None,
|
|
50
68
|
**kwargs) -> TranscriptionResponse:
|
|
51
69
|
"""Transcribe the given files asynchronously,
|
|
@@ -54,11 +72,27 @@ class Transcription(BaseAsyncApi):
|
|
|
54
72
|
Args:
|
|
55
73
|
model (str): The requested model, such as paraformer-16k-1
|
|
56
74
|
file_urls (List[str]): List of stored URLs.
|
|
57
|
-
|
|
75
|
+
phrase_id (str, `optional`): The ID of phrase.
|
|
76
|
+
|
|
77
|
+
**kwargs:
|
|
78
|
+
channel_id (List[int], optional):
|
|
79
|
+
The selected channel_id of audio file.
|
|
80
|
+
disfluency_removal_enabled(bool, `optional`):
|
|
81
|
+
Filter mood words, turned off by default.
|
|
82
|
+
diarization_enabled (bool, `optional`):
|
|
83
|
+
Speech auto diarization, turned off by default.
|
|
84
|
+
speaker_count (int, `optional`): The number of speakers.
|
|
85
|
+
timestamp_alignment_enabled (bool, `optional`):
|
|
86
|
+
Timestamp-alignment calibration, turned off by default.
|
|
87
|
+
special_word_filter(str, `optional`): Sensitive word filter.
|
|
88
|
+
audio_event_detection_enabled(bool, `optional`):
|
|
89
|
+
Audio event detection, turned off by default.
|
|
58
90
|
|
|
59
91
|
Returns:
|
|
60
92
|
TranscriptionResponse: The response including task_id.
|
|
61
93
|
"""
|
|
94
|
+
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
|
|
95
|
+
kwargs = cls._tidy_kwargs(**kwargs)
|
|
62
96
|
response = cls._launch_request(model,
|
|
63
97
|
file_urls,
|
|
64
98
|
api_key=api_key,
|
|
@@ -154,3 +188,22 @@ class Transcription(BaseAsyncApi):
|
|
|
154
188
|
break
|
|
155
189
|
|
|
156
190
|
return response
|
|
191
|
+
|
|
192
|
+
@classmethod
|
|
193
|
+
def _fill_resource_id(cls, phrase_id: str, **kwargs):
|
|
194
|
+
resources_list: list = []
|
|
195
|
+
if phrase_id is not None and len(phrase_id) > 0:
|
|
196
|
+
item = {'resource_id': phrase_id, 'resource_type': 'asr_phrase'}
|
|
197
|
+
resources_list.append(item)
|
|
198
|
+
|
|
199
|
+
if len(resources_list) > 0:
|
|
200
|
+
kwargs['resources'] = resources_list
|
|
201
|
+
|
|
202
|
+
return kwargs
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def _tidy_kwargs(cls, **kwargs):
|
|
206
|
+
for k in kwargs.copy():
|
|
207
|
+
if kwargs[k] is None:
|
|
208
|
+
kwargs.pop(k, None)
|
|
209
|
+
return kwargs
|
dashscope/client/base_api.py
CHANGED
|
@@ -2,9 +2,8 @@ import time
|
|
|
2
2
|
from http import HTTPStatus
|
|
3
3
|
from typing import List, Union
|
|
4
4
|
|
|
5
|
-
import requests
|
|
6
|
-
|
|
7
5
|
import dashscope
|
|
6
|
+
import requests
|
|
8
7
|
from dashscope.api_entities.api_request_factory import _build_api_request
|
|
9
8
|
from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
|
|
10
9
|
from dashscope.common.api_key import get_default_api_key
|
|
@@ -231,8 +230,10 @@ class BaseAsyncApi(AsyncTaskGetMixin):
|
|
|
231
230
|
for example: 20230420000000. Defaults to None.
|
|
232
231
|
end_time (str, optional): The tasks end time,
|
|
233
232
|
for example: 20230420000000. Defaults to None.
|
|
234
|
-
model_name (str, optional): The tasks model name.
|
|
235
|
-
|
|
233
|
+
model_name (str, optional): The tasks model name.
|
|
234
|
+
Defaults to None.
|
|
235
|
+
api_key_id (str, optional): The tasks api-key-id.
|
|
236
|
+
Defaults to None.
|
|
236
237
|
region (str, optional): The service region,
|
|
237
238
|
for example: cn-beijing. Defaults to None.
|
|
238
239
|
status (str, optional): The status of tasks[PENDING,
|
|
@@ -321,9 +322,12 @@ class BaseAsyncApi(AsyncTaskGetMixin):
|
|
|
321
322
|
step = 0
|
|
322
323
|
while True:
|
|
323
324
|
step += 1
|
|
324
|
-
# we start by querying once every second, and double
|
|
325
|
-
#
|
|
326
|
-
#
|
|
325
|
+
# we start by querying once every second, and double
|
|
326
|
+
# the query interval after every 3(increment_steps)
|
|
327
|
+
# intervals, until we hit the max waiting interval
|
|
328
|
+
# of 5(seconds)
|
|
329
|
+
# TODO: investigate if we can use long-poll
|
|
330
|
+
# (server side return immediately when ready)
|
|
327
331
|
if wait_seconds < max_wait_seconds and step % increment_steps == 0:
|
|
328
332
|
wait_seconds = min(wait_seconds * 2, max_wait_seconds)
|
|
329
333
|
rsp = cls._get(task_id, api_key)
|
|
@@ -460,6 +464,7 @@ class GetMixin():
|
|
|
460
464
|
def get(cls,
|
|
461
465
|
target,
|
|
462
466
|
api_key: str = None,
|
|
467
|
+
params: dict = {},
|
|
463
468
|
**kwargs) -> DashScopeAPIResponse:
|
|
464
469
|
"""Get object information.
|
|
465
470
|
|
|
@@ -473,7 +478,7 @@ class GetMixin():
|
|
|
473
478
|
"""
|
|
474
479
|
url = join_url(dashscope.base_http_api_url, cls.SUB_PATH.lower(),
|
|
475
480
|
target)
|
|
476
|
-
return _get(url, api_key, **kwargs)
|
|
481
|
+
return _get(url, api_key=api_key, params=params, **kwargs)
|
|
477
482
|
|
|
478
483
|
|
|
479
484
|
class GetStatusMixin():
|
dashscope/common/constants.py
CHANGED
|
@@ -28,9 +28,12 @@ SERVICE_503_MESSAGE = 'Service temporarily unavailable, possibly overloaded or n
|
|
|
28
28
|
WEBSOCKET_ERROR_CODE = 44
|
|
29
29
|
SSE_CONTENT_TYPE = 'text/event-stream'
|
|
30
30
|
DEPRECATED_MESSAGE = 'history and auto_history are deprecated for qwen serial models and will be remove in future, use messages' # noqa E501
|
|
31
|
-
SCENE =
|
|
31
|
+
SCENE = 'scene'
|
|
32
32
|
MESSAGE = 'message'
|
|
33
|
-
|
|
33
|
+
REQUEST_CONTENT_TEXT = 'text'
|
|
34
|
+
REQUEST_CONTENT_IMAGE = 'image'
|
|
35
|
+
REQUEST_CONTENT_AUDIO = 'audio'
|
|
36
|
+
FILE_PATH_SCHEMA = 'file://'
|
|
34
37
|
|
|
35
38
|
REPEATABLE_STATUS = [
|
|
36
39
|
HTTPStatus.SERVICE_UNAVAILABLE, HTTPStatus.GATEWAY_TIMEOUT
|
dashscope/common/error.py
CHANGED
dashscope/common/utils.py
CHANGED
|
@@ -8,7 +8,6 @@ from urllib.parse import urlparse
|
|
|
8
8
|
|
|
9
9
|
import aiohttp
|
|
10
10
|
import requests
|
|
11
|
-
|
|
12
11
|
from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
|
|
13
12
|
from dashscope.common.api_key import get_default_api_key
|
|
14
13
|
from dashscope.version import __version__
|
|
@@ -97,6 +96,16 @@ def async_to_sync(async_generator):
|
|
|
97
96
|
yield message
|
|
98
97
|
|
|
99
98
|
|
|
99
|
+
def get_user_agent():
|
|
100
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
101
|
+
__version__,
|
|
102
|
+
platform.python_version(),
|
|
103
|
+
platform.platform(),
|
|
104
|
+
platform.processor(),
|
|
105
|
+
)
|
|
106
|
+
return ua
|
|
107
|
+
|
|
108
|
+
|
|
100
109
|
def default_headers(api_key: str = None) -> Dict[str, str]:
|
|
101
110
|
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
102
111
|
__version__,
|
|
@@ -117,7 +126,8 @@ def join_url(base_url, *args):
|
|
|
117
126
|
base_url = base_url + '/'
|
|
118
127
|
url = base_url
|
|
119
128
|
for arg in args:
|
|
120
|
-
|
|
129
|
+
if arg:
|
|
130
|
+
url += arg + '/'
|
|
121
131
|
return url[:-1]
|
|
122
132
|
|
|
123
133
|
|
|
@@ -15,6 +15,7 @@ class BatchTextEmbedding(BaseAsyncApi):
|
|
|
15
15
|
"""
|
|
16
16
|
class Models:
|
|
17
17
|
text_embedding_async_v1 = 'text-embedding-async-v1'
|
|
18
|
+
text_embedding_async_v2 = 'text-embedding-async-v2'
|
|
18
19
|
|
|
19
20
|
@classmethod
|
|
20
21
|
def call(cls,
|
|
@@ -26,7 +27,7 @@ class BatchTextEmbedding(BaseAsyncApi):
|
|
|
26
27
|
|
|
27
28
|
Args:
|
|
28
29
|
model (str): The model, reference ``Models``.
|
|
29
|
-
url (Any): The async request file url, which contains text
|
|
30
|
+
url (Any): The async request file url, which contains text
|
|
30
31
|
to embedding line by line.
|
|
31
32
|
api_key (str, optional): The api api_key. Defaults to None.
|
|
32
33
|
**kwargs:
|
|
@@ -58,7 +59,7 @@ class BatchTextEmbedding(BaseAsyncApi):
|
|
|
58
59
|
|
|
59
60
|
Args:
|
|
60
61
|
model (str): The model, reference ``Models``.
|
|
61
|
-
url (Any): The async request file url, which contains text
|
|
62
|
+
url (Any): The async request file url, which contains text
|
|
62
63
|
to embedding line by line.
|
|
63
64
|
api_key (str, optional): The api api_key. Defaults to None.
|
|
64
65
|
**kwargs:
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
|
|
4
|
+
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
|
|
5
5
|
DictMixin)
|
|
6
6
|
from dashscope.client.base_api import BaseApi
|
|
7
7
|
from dashscope.common.error import InputRequired, ModelRequired
|
|
8
8
|
from dashscope.common.utils import _get_task_group_and_task
|
|
9
|
+
from dashscope.utils.oss_utils import preprocess_message_element
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@dataclass(init=False)
|
|
@@ -15,30 +16,34 @@ class MultiModalEmbeddingItemBase(DictMixin):
|
|
|
15
16
|
def __init__(self, factor: float, **kwargs):
|
|
16
17
|
super().__init__(factor=factor, **kwargs)
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
@dataclass(init=False)
|
|
19
21
|
class MultiModalEmbeddingItemText(MultiModalEmbeddingItemBase):
|
|
20
22
|
text: str
|
|
21
|
-
|
|
22
|
-
def __init__(self, text: str,
|
|
23
|
+
|
|
24
|
+
def __init__(self, text: str, factor: float, **kwargs):
|
|
23
25
|
super().__init__(factor, **kwargs)
|
|
24
26
|
self.text = text
|
|
25
27
|
|
|
28
|
+
|
|
26
29
|
@dataclass(init=False)
|
|
27
30
|
class MultiModalEmbeddingItemImage(MultiModalEmbeddingItemBase):
|
|
28
31
|
image: str
|
|
29
|
-
|
|
32
|
+
|
|
30
33
|
def __init__(self, image: str, factor: float, **kwargs):
|
|
31
34
|
super().__init__(factor, **kwargs)
|
|
32
35
|
self.image = image
|
|
33
|
-
|
|
36
|
+
|
|
37
|
+
|
|
34
38
|
@dataclass(init=False)
|
|
35
39
|
class MultiModalEmbeddingItemAudio(MultiModalEmbeddingItemBase):
|
|
36
40
|
audio: str
|
|
37
|
-
|
|
41
|
+
|
|
38
42
|
def __init__(self, audio: str, factor: float, **kwargs):
|
|
39
43
|
super().__init__(factor, **kwargs)
|
|
40
44
|
self.audio = audio
|
|
41
|
-
|
|
45
|
+
|
|
46
|
+
|
|
42
47
|
class MultiModalEmbedding(BaseApi):
|
|
43
48
|
task = 'multimodal-embedding'
|
|
44
49
|
|
|
@@ -46,7 +51,10 @@ class MultiModalEmbedding(BaseApi):
|
|
|
46
51
|
multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
|
|
47
52
|
|
|
48
53
|
@classmethod
|
|
49
|
-
def call(cls,
|
|
54
|
+
def call(cls,
|
|
55
|
+
model: str,
|
|
56
|
+
input: List[MultiModalEmbeddingItemBase],
|
|
57
|
+
api_key: str = None,
|
|
50
58
|
**kwargs) -> DashScopeAPIResponse:
|
|
51
59
|
"""Get embedding multimodal contents..
|
|
52
60
|
|
|
@@ -55,7 +63,7 @@ class MultiModalEmbedding(BaseApi):
|
|
|
55
63
|
input (List[MultiModalEmbeddingElement]): The embedding elements,
|
|
56
64
|
every element include data, modal, factor field.
|
|
57
65
|
**kwargs:
|
|
58
|
-
auto_truncation(bool, `optional`): Automatically truncate
|
|
66
|
+
auto_truncation(bool, `optional`): Automatically truncate
|
|
59
67
|
audio longer than 15 seconds or text longer than 70 words.
|
|
60
68
|
Default to false(Too long input will result in failure).
|
|
61
69
|
|
|
@@ -67,6 +75,11 @@ class MultiModalEmbedding(BaseApi):
|
|
|
67
75
|
if model is None or not model:
|
|
68
76
|
raise ModelRequired('Model is required!')
|
|
69
77
|
embedding_input = {}
|
|
78
|
+
has_upload = cls._preprocess_message_inputs(model, input, api_key)
|
|
79
|
+
if has_upload:
|
|
80
|
+
headers = kwargs.pop('headers', {})
|
|
81
|
+
headers['X-DashScope-OssResourceResolve'] = 'enable'
|
|
82
|
+
kwargs['headers'] = headers
|
|
70
83
|
embedding_input['contents'] = input
|
|
71
84
|
kwargs.pop('stream', False) # not support streaming output.
|
|
72
85
|
task_group, function = _get_task_group_and_task(__name__)
|
|
@@ -76,3 +89,18 @@ class MultiModalEmbedding(BaseApi):
|
|
|
76
89
|
task=MultiModalEmbedding.task,
|
|
77
90
|
function=function,
|
|
78
91
|
**kwargs)
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def _preprocess_message_inputs(cls, model: str, input: List[dict],
|
|
95
|
+
api_key: str):
|
|
96
|
+
"""preprocess following inputs
|
|
97
|
+
input = [{'factor': 1, 'text': 'hello'},
|
|
98
|
+
{'factor': 2, 'audio': ''},
|
|
99
|
+
{'factor': 3, 'image': ''}]
|
|
100
|
+
"""
|
|
101
|
+
has_upload = False
|
|
102
|
+
for elem in input:
|
|
103
|
+
is_upload = preprocess_message_element(model, elem, api_key)
|
|
104
|
+
if is_upload and not has_upload:
|
|
105
|
+
has_upload = True
|
|
106
|
+
return has_upload
|
dashscope/finetune.py
CHANGED
|
@@ -43,6 +43,8 @@ class FineTune(CreateMixin, CancelMixin, DeleteMixin, ListMixin,
|
|
|
43
43
|
}
|
|
44
44
|
if mode is not None:
|
|
45
45
|
request['training_type'] = mode
|
|
46
|
+
if 'finetuned_output' in kwargs:
|
|
47
|
+
request['finetuned_output'] = kwargs['finetuned_output']
|
|
46
48
|
return super().call(request, api_key, **kwargs)
|
|
47
49
|
|
|
48
50
|
@classmethod
|
dashscope/nlp/understanding.py
CHANGED
|
@@ -10,20 +10,17 @@ class Understanding(BaseApi):
|
|
|
10
10
|
"""API for AI-Generated Content(AIGC) models.
|
|
11
11
|
|
|
12
12
|
"""
|
|
13
|
-
|
|
14
13
|
class Models:
|
|
15
14
|
opennlu_v1 = 'opennlu-v1'
|
|
16
15
|
|
|
17
16
|
@classmethod
|
|
18
|
-
def call(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
**kwargs
|
|
26
|
-
) -> DashScopeAPIResponse:
|
|
17
|
+
def call(cls,
|
|
18
|
+
model: str,
|
|
19
|
+
sentence: str = None,
|
|
20
|
+
labels: str = None,
|
|
21
|
+
task: str = None,
|
|
22
|
+
api_key: str = None,
|
|
23
|
+
**kwargs) -> DashScopeAPIResponse:
|
|
27
24
|
"""Call generation model service.
|
|
28
25
|
|
|
29
26
|
Args:
|
|
@@ -37,7 +34,8 @@ class Understanding(BaseApi):
|
|
|
37
34
|
Returns:
|
|
38
35
|
DashScopeAPIResponse: The understanding result.
|
|
39
36
|
"""
|
|
40
|
-
if (sentence is None or not sentence) or (labels is None
|
|
37
|
+
if (sentence is None or not sentence) or (labels is None
|
|
38
|
+
or not labels):
|
|
41
39
|
raise InputRequired('sentence and labels is required!')
|
|
42
40
|
if model is None or not model:
|
|
43
41
|
raise ModelRequired('Model is required!')
|
|
@@ -57,11 +55,8 @@ class Understanding(BaseApi):
|
|
|
57
55
|
@classmethod
|
|
58
56
|
def _build_input_parameters(cls, model, sentence, labels, task, **kwargs):
|
|
59
57
|
parameters = {}
|
|
60
|
-
input = {
|
|
61
|
-
"sentence": sentence,
|
|
62
|
-
"labels": labels
|
|
63
|
-
}
|
|
58
|
+
input = {'sentence': sentence, 'labels': labels}
|
|
64
59
|
if task is not None and task:
|
|
65
|
-
input[
|
|
60
|
+
input['task'] = task
|
|
66
61
|
|
|
67
62
|
return input, {**parameters, **kwargs}
|
dashscope/tokenizers/__init__.py
CHANGED
|
File without changes
|