dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashscope/__init__.py +61 -14
- dashscope/aigc/__init__.py +10 -3
- dashscope/aigc/chat_completion.py +282 -0
- dashscope/aigc/code_generation.py +145 -0
- dashscope/aigc/conversation.py +71 -12
- dashscope/aigc/generation.py +288 -16
- dashscope/aigc/image_synthesis.py +473 -31
- dashscope/aigc/multimodal_conversation.py +299 -14
- dashscope/aigc/video_synthesis.py +610 -0
- dashscope/api_entities/aiohttp_request.py +8 -5
- dashscope/api_entities/api_request_data.py +4 -2
- dashscope/api_entities/api_request_factory.py +68 -20
- dashscope/api_entities/base_request.py +20 -3
- dashscope/api_entities/chat_completion_types.py +344 -0
- dashscope/api_entities/dashscope_response.py +243 -15
- dashscope/api_entities/encryption.py +179 -0
- dashscope/api_entities/http_request.py +216 -62
- dashscope/api_entities/websocket_request.py +43 -34
- dashscope/app/__init__.py +5 -0
- dashscope/app/application.py +203 -0
- dashscope/app/application_response.py +246 -0
- dashscope/assistants/__init__.py +16 -0
- dashscope/assistants/assistant_types.py +175 -0
- dashscope/assistants/assistants.py +311 -0
- dashscope/assistants/files.py +197 -0
- dashscope/audio/__init__.py +4 -2
- dashscope/audio/asr/__init__.py +17 -1
- dashscope/audio/asr/asr_phrase_manager.py +203 -0
- dashscope/audio/asr/recognition.py +167 -27
- dashscope/audio/asr/transcription.py +107 -14
- dashscope/audio/asr/translation_recognizer.py +1006 -0
- dashscope/audio/asr/vocabulary.py +177 -0
- dashscope/audio/qwen_asr/__init__.py +7 -0
- dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
- dashscope/audio/qwen_omni/__init__.py +11 -0
- dashscope/audio/qwen_omni/omni_realtime.py +524 -0
- dashscope/audio/qwen_tts/__init__.py +5 -0
- dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
- dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
- dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
- dashscope/audio/tts/__init__.py +2 -0
- dashscope/audio/tts/speech_synthesizer.py +5 -0
- dashscope/audio/tts_v2/__init__.py +12 -0
- dashscope/audio/tts_v2/enrollment.py +179 -0
- dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
- dashscope/cli.py +157 -37
- dashscope/client/base_api.py +652 -87
- dashscope/common/api_key.py +2 -0
- dashscope/common/base_type.py +135 -0
- dashscope/common/constants.py +13 -16
- dashscope/common/env.py +2 -0
- dashscope/common/error.py +58 -22
- dashscope/common/logging.py +2 -0
- dashscope/common/message_manager.py +2 -0
- dashscope/common/utils.py +276 -46
- dashscope/customize/__init__.py +0 -0
- dashscope/customize/customize_types.py +192 -0
- dashscope/customize/deployments.py +146 -0
- dashscope/customize/finetunes.py +234 -0
- dashscope/embeddings/__init__.py +5 -1
- dashscope/embeddings/batch_text_embedding.py +208 -0
- dashscope/embeddings/batch_text_embedding_response.py +65 -0
- dashscope/embeddings/multimodal_embedding.py +118 -10
- dashscope/embeddings/text_embedding.py +13 -1
- dashscope/{file.py → files.py} +19 -4
- dashscope/io/input_output.py +2 -0
- dashscope/model.py +11 -2
- dashscope/models.py +43 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +28 -0
- dashscope/multimodal/multimodal_dialog.py +648 -0
- dashscope/multimodal/multimodal_request_params.py +313 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu.py +80 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/nlp/__init__.py +0 -0
- dashscope/nlp/understanding.py +64 -0
- dashscope/protocol/websocket.py +3 -0
- dashscope/rerank/__init__.py +0 -0
- dashscope/rerank/text_rerank.py +69 -0
- dashscope/resources/qwen.tiktoken +151643 -0
- dashscope/threads/__init__.py +26 -0
- dashscope/threads/messages/__init__.py +0 -0
- dashscope/threads/messages/files.py +113 -0
- dashscope/threads/messages/messages.py +220 -0
- dashscope/threads/runs/__init__.py +0 -0
- dashscope/threads/runs/runs.py +501 -0
- dashscope/threads/runs/steps.py +112 -0
- dashscope/threads/thread_types.py +665 -0
- dashscope/threads/threads.py +212 -0
- dashscope/tokenizers/__init__.py +7 -0
- dashscope/tokenizers/qwen_tokenizer.py +111 -0
- dashscope/tokenizers/tokenization.py +125 -0
- dashscope/tokenizers/tokenizer.py +45 -0
- dashscope/tokenizers/tokenizer_base.py +32 -0
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/message_utils.py +838 -0
- dashscope/utils/oss_utils.py +243 -0
- dashscope/utils/param_utils.py +29 -0
- dashscope/version.py +3 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
- dashscope-1.25.6.dist-info/RECORD +112 -0
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
- dashscope/deployment.py +0 -129
- dashscope/finetune.py +0 -149
- dashscope-1.8.0.dist-info/RECORD +0 -49
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from dashscope.assistants.assistant_types import DeleteResponse
|
|
6
|
+
from dashscope.client.base_api import (CreateMixin, DeleteMixin,
|
|
7
|
+
GetStatusMixin, UpdateMixin)
|
|
8
|
+
from dashscope.common.error import InputRequired
|
|
9
|
+
from dashscope.threads.thread_types import Run, Thread
|
|
10
|
+
|
|
11
|
+
__all__ = ['Threads']
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Threads(CreateMixin, DeleteMixin, GetStatusMixin, UpdateMixin):
|
|
15
|
+
SUB_PATH = 'threads'
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def call(cls,
|
|
19
|
+
*,
|
|
20
|
+
messages: List[Dict] = None,
|
|
21
|
+
metadata: Dict = None,
|
|
22
|
+
workspace: str = None,
|
|
23
|
+
api_key: str = None,
|
|
24
|
+
**kwargs) -> Thread:
|
|
25
|
+
"""Create a thread.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
messages (List[Dict], optional): List of messages to start thread. Defaults to None.
|
|
29
|
+
metadata (Dict, optional): The key-value information associate with thread. Defaults to None.
|
|
30
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
31
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Thread: The thread object.
|
|
35
|
+
"""
|
|
36
|
+
return cls.create(messages=messages,
|
|
37
|
+
metadata=metadata,
|
|
38
|
+
workspace=workspace,
|
|
39
|
+
api_key=api_key,
|
|
40
|
+
**kwargs)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def create(cls,
|
|
44
|
+
*,
|
|
45
|
+
messages: List[Dict] = None,
|
|
46
|
+
metadata: Dict = None,
|
|
47
|
+
workspace: str = None,
|
|
48
|
+
api_key: str = None,
|
|
49
|
+
**kwargs) -> Thread:
|
|
50
|
+
"""Create a thread.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
messages (List[Dict], optional): List of messages to start thread. Defaults to None.
|
|
54
|
+
metadata (Dict, optional): The key-value information associate with thread. Defaults to None.
|
|
55
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
56
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Thread: The thread object.
|
|
60
|
+
"""
|
|
61
|
+
data = {}
|
|
62
|
+
if messages:
|
|
63
|
+
data['messages'] = messages
|
|
64
|
+
if metadata:
|
|
65
|
+
data['metadata'] = metadata
|
|
66
|
+
response = super().call(data=data if data else '',
|
|
67
|
+
api_key=api_key,
|
|
68
|
+
flattened_output=True,
|
|
69
|
+
workspace=workspace,
|
|
70
|
+
**kwargs)
|
|
71
|
+
return Thread(**response)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def get(cls,
|
|
75
|
+
thread_id: str,
|
|
76
|
+
*,
|
|
77
|
+
workspace: str = None,
|
|
78
|
+
api_key: str = None,
|
|
79
|
+
**kwargs) -> Thread:
|
|
80
|
+
"""Retrieve the thread.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
thread_id (str): The target thread.
|
|
84
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
85
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Thread: The `Thread` information.
|
|
89
|
+
"""
|
|
90
|
+
return cls.retrieve(thread_id,
|
|
91
|
+
workspace=workspace,
|
|
92
|
+
api_key=api_key,
|
|
93
|
+
**kwargs)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def retrieve(cls,
|
|
97
|
+
thread_id: str,
|
|
98
|
+
*,
|
|
99
|
+
workspace: str = None,
|
|
100
|
+
api_key: str = None,
|
|
101
|
+
**kwargs) -> Thread:
|
|
102
|
+
"""Retrieve the thread.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
thread_id (str): The target thread.
|
|
106
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
107
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Thread: The `Thread` information.
|
|
111
|
+
"""
|
|
112
|
+
if not thread_id:
|
|
113
|
+
raise InputRequired('thread_id is required!')
|
|
114
|
+
response = super().get(thread_id,
|
|
115
|
+
api_key=api_key,
|
|
116
|
+
flattened_output=True,
|
|
117
|
+
workspace=workspace,
|
|
118
|
+
**kwargs)
|
|
119
|
+
return Thread(**response)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def update(cls,
|
|
123
|
+
thread_id: str,
|
|
124
|
+
*,
|
|
125
|
+
metadata: Dict = None,
|
|
126
|
+
workspace: str = None,
|
|
127
|
+
api_key: str = None,
|
|
128
|
+
**kwargs) -> Thread:
|
|
129
|
+
"""Update thread information.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
thread_id (str): The thread id.
|
|
133
|
+
metadata (Dict, optional): The thread key-value information. Defaults to None.
|
|
134
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
135
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Thread: The `Thread` information.
|
|
139
|
+
"""
|
|
140
|
+
if not thread_id:
|
|
141
|
+
raise InputRequired('thread_id is required!')
|
|
142
|
+
response = super().update(thread_id,
|
|
143
|
+
json={'metadata': metadata},
|
|
144
|
+
api_key=api_key,
|
|
145
|
+
workspace=workspace,
|
|
146
|
+
flattened_output=True,
|
|
147
|
+
method='post',
|
|
148
|
+
**kwargs)
|
|
149
|
+
return Thread(**response)
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def delete(cls,
|
|
153
|
+
thread_id,
|
|
154
|
+
*,
|
|
155
|
+
workspace: str = None,
|
|
156
|
+
api_key: str = None,
|
|
157
|
+
**kwargs) -> DeleteResponse:
|
|
158
|
+
"""Delete thread.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
thread_id (str): The thread id to delete.
|
|
162
|
+
workspace (str, optional): The DashScope workspace id. Defaults to None.
|
|
163
|
+
api_key (str, optional): Your DashScope api key. Defaults to None.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
AssistantsDeleteResponse: The deleted information.
|
|
167
|
+
"""
|
|
168
|
+
if not thread_id:
|
|
169
|
+
raise InputRequired('thread_id is required!')
|
|
170
|
+
response = super().delete(thread_id,
|
|
171
|
+
api_key=api_key,
|
|
172
|
+
workspace=workspace,
|
|
173
|
+
flattened_output=True,
|
|
174
|
+
**kwargs)
|
|
175
|
+
return DeleteResponse(**response)
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def create_and_run(cls,
|
|
179
|
+
*,
|
|
180
|
+
assistant_id: str,
|
|
181
|
+
thread: Optional[Dict] = None,
|
|
182
|
+
model: Optional[str] = None,
|
|
183
|
+
instructions: Optional[str] = None,
|
|
184
|
+
additional_instructions: Optional[str] = None,
|
|
185
|
+
tools: Optional[List[Dict]] = None,
|
|
186
|
+
metadata: Optional[Dict] = None,
|
|
187
|
+
workspace: str = None,
|
|
188
|
+
api_key: str = None,
|
|
189
|
+
**kwargs) -> Run:
|
|
190
|
+
if not assistant_id:
|
|
191
|
+
raise InputRequired('assistant_id is required')
|
|
192
|
+
data = {'assistant_id': assistant_id}
|
|
193
|
+
if thread:
|
|
194
|
+
data['thread'] = thread
|
|
195
|
+
if model:
|
|
196
|
+
data['model'] = model
|
|
197
|
+
if instructions:
|
|
198
|
+
data['instructions'] = instructions
|
|
199
|
+
if additional_instructions:
|
|
200
|
+
data['additional_instructions'] = additional_instructions
|
|
201
|
+
if tools:
|
|
202
|
+
data['tools'] = tools
|
|
203
|
+
if metadata:
|
|
204
|
+
data['metadata'] = metadata
|
|
205
|
+
|
|
206
|
+
response = super().call(data=data,
|
|
207
|
+
path='threads/runs',
|
|
208
|
+
api_key=api_key,
|
|
209
|
+
flattened_output=True,
|
|
210
|
+
workspace=workspace,
|
|
211
|
+
**kwargs)
|
|
212
|
+
return Run(**response)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import unicodedata
|
|
5
|
+
from typing import Collection, Dict, List, Set, Union
|
|
6
|
+
|
|
7
|
+
from .tokenizer_base import Tokenizer
|
|
8
|
+
|
|
9
|
+
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""" # noqa E501
|
|
10
|
+
ENDOFTEXT = '<|endoftext|>'
|
|
11
|
+
IMSTART = '<|im_start|>'
|
|
12
|
+
IMEND = '<|im_end|>'
|
|
13
|
+
# as the default behavior is changed to allow special tokens in
|
|
14
|
+
# regular texts, the surface forms of special tokens need to be
|
|
15
|
+
# as different as possible to minimize the impact
|
|
16
|
+
EXTRAS = tuple((f'<|extra_{i}|>' for i in range(205)))
|
|
17
|
+
# changed to use actual index to avoid misconfiguration with vocabulary expansion
|
|
18
|
+
SPECIAL_START_ID = 151643
|
|
19
|
+
SPECIAL_TOKENS = tuple(
|
|
20
|
+
enumerate(
|
|
21
|
+
((
|
|
22
|
+
ENDOFTEXT,
|
|
23
|
+
IMSTART,
|
|
24
|
+
IMEND,
|
|
25
|
+
) + EXTRAS),
|
|
26
|
+
start=SPECIAL_START_ID,
|
|
27
|
+
))
|
|
28
|
+
SPECIAL_TOKENS_SET = set(t for i, t in SPECIAL_TOKENS)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class QwenTokenizer(Tokenizer):
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
|
|
34
|
+
with open(tiktoken_bpe_file, 'rb') as f:
|
|
35
|
+
contents = f.read()
|
|
36
|
+
return {
|
|
37
|
+
base64.b64decode(token): int(rank)
|
|
38
|
+
for token, rank in (line.split() for line in contents.splitlines()
|
|
39
|
+
if line)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def __init__(self, vocab_file, errors='replace', extra_vocab_file=None):
|
|
43
|
+
self._errors = errors
|
|
44
|
+
self._vocab_file = vocab_file
|
|
45
|
+
self._extra_vocab_file = extra_vocab_file
|
|
46
|
+
|
|
47
|
+
self._mergeable_ranks = QwenTokenizer._load_tiktoken_bpe(
|
|
48
|
+
vocab_file) # type: Dict[bytes, int]
|
|
49
|
+
self._special_tokens = {
|
|
50
|
+
token: index
|
|
51
|
+
for index, token in SPECIAL_TOKENS
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# try load extra vocab from file
|
|
55
|
+
if extra_vocab_file is not None:
|
|
56
|
+
used_ids = set(self._mergeable_ranks.values()) | set(
|
|
57
|
+
self._special_tokens.values())
|
|
58
|
+
extra_mergeable_ranks = self._load_tiktoken_bpe(extra_vocab_file)
|
|
59
|
+
for token, index in extra_mergeable_ranks.items():
|
|
60
|
+
if token in self._mergeable_ranks:
|
|
61
|
+
continue
|
|
62
|
+
if index in used_ids:
|
|
63
|
+
continue
|
|
64
|
+
self._mergeable_ranks[token] = index
|
|
65
|
+
# the index may be sparse after this, but don't worry tiktoken.Encoding will handle this
|
|
66
|
+
import tiktoken
|
|
67
|
+
enc = tiktoken.Encoding(
|
|
68
|
+
'Qwen',
|
|
69
|
+
pat_str=PAT_STR,
|
|
70
|
+
mergeable_ranks=self._mergeable_ranks,
|
|
71
|
+
special_tokens=self._special_tokens,
|
|
72
|
+
)
|
|
73
|
+
assert (
|
|
74
|
+
len(self._mergeable_ranks) +
|
|
75
|
+
len(self._special_tokens) == enc.n_vocab
|
|
76
|
+
), f'{len(self._mergeable_ranks) + len(self._special_tokens)} != {enc.n_vocab} in encoding'
|
|
77
|
+
|
|
78
|
+
self.decoder = {v: k
|
|
79
|
+
for k, v in self._mergeable_ranks.items()
|
|
80
|
+
} # type: dict[int, bytes|str]
|
|
81
|
+
self.decoder.update({v: k for k, v in self._special_tokens.items()})
|
|
82
|
+
|
|
83
|
+
self._tokenizer = enc # type: tiktoken.Encoding
|
|
84
|
+
|
|
85
|
+
self.eod_id = self._tokenizer.eot_token
|
|
86
|
+
self.im_start_id = self._special_tokens[IMSTART]
|
|
87
|
+
self.im_end_id = self._special_tokens[IMEND]
|
|
88
|
+
|
|
89
|
+
def encode(
|
|
90
|
+
self,
|
|
91
|
+
text: str,
|
|
92
|
+
allowed_special: Union[Set, str] = 'all',
|
|
93
|
+
disallowed_special: Union[Collection, str] = (),
|
|
94
|
+
) -> Union[List[List], List]:
|
|
95
|
+
text = unicodedata.normalize('NFC', text)
|
|
96
|
+
return self._tokenizer.encode(text,
|
|
97
|
+
allowed_special=allowed_special,
|
|
98
|
+
disallowed_special=disallowed_special)
|
|
99
|
+
|
|
100
|
+
def decode(
|
|
101
|
+
self,
|
|
102
|
+
token_ids: Union[int, List[int]],
|
|
103
|
+
skip_special_tokens: bool = False,
|
|
104
|
+
errors: str = None,
|
|
105
|
+
**kwargs,
|
|
106
|
+
) -> str:
|
|
107
|
+
if isinstance(token_ids, int):
|
|
108
|
+
token_ids = [token_ids]
|
|
109
|
+
if skip_special_tokens:
|
|
110
|
+
token_ids = [i for i in token_ids if i < self.eod_id]
|
|
111
|
+
return self._tokenizer.decode(token_ids, errors=errors or self._errors)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
from typing import Any, List
|
|
5
|
+
|
|
6
|
+
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
|
|
7
|
+
Message, Role)
|
|
8
|
+
from dashscope.client.base_api import BaseApi
|
|
9
|
+
from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
|
|
10
|
+
DEPRECATED_MESSAGE, HISTORY, MESSAGES,
|
|
11
|
+
PROMPT)
|
|
12
|
+
from dashscope.common.error import InputRequired, ModelRequired
|
|
13
|
+
from dashscope.common.logging import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Tokenization(BaseApi):
|
|
17
|
+
FUNCTION = 'tokenizer'
|
|
18
|
+
"""API for get tokenizer result..
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
class Models:
|
|
22
|
+
"""List of models currently supported
|
|
23
|
+
"""
|
|
24
|
+
qwen_turbo = 'qwen-turbo'
|
|
25
|
+
qwen_plus = 'qwen-plus'
|
|
26
|
+
qwen_7b_chat = 'qwen-7b-chat'
|
|
27
|
+
qwen_14b_chat = 'qwen-14b-chat'
|
|
28
|
+
llama2_7b_chat_v2 = 'llama2-7b-chat-v2'
|
|
29
|
+
llama2_13b_chat_v2 = 'llama2-13b-chat-v2'
|
|
30
|
+
text_embedding_v2 = 'text-embedding-v2'
|
|
31
|
+
qwen_72b_chat = 'qwen-72b-chat'
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def call(cls,
|
|
35
|
+
model: str,
|
|
36
|
+
input: Any = None,
|
|
37
|
+
prompt: Any = None,
|
|
38
|
+
history: list = None,
|
|
39
|
+
api_key: str = None,
|
|
40
|
+
messages: List[Message] = None,
|
|
41
|
+
workspace: str = None,
|
|
42
|
+
**kwargs) -> DashScopeAPIResponse:
|
|
43
|
+
"""Call tokenization.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
model (str): The requested model, such as qwen-v1
|
|
47
|
+
input: (Any): The model input body.
|
|
48
|
+
prompt (Any): The input prompt, for qwen serial model.
|
|
49
|
+
history (list):The user provided history,
|
|
50
|
+
deprecated, use messages instead.
|
|
51
|
+
examples:
|
|
52
|
+
[{'user':'The weather is fine today.',
|
|
53
|
+
'bot': 'Suitable for outings'}].
|
|
54
|
+
Defaults to None.
|
|
55
|
+
api_key (str, optional): The api api_key, can be None,
|
|
56
|
+
if None, will get by default rule(TODO: api key doc).
|
|
57
|
+
messages (list): The generation messages.
|
|
58
|
+
examples:
|
|
59
|
+
[{'role': 'user',
|
|
60
|
+
'content': 'The weather is fine today.'},
|
|
61
|
+
{'role': 'assistant', 'content': 'Suitable for outings'}]
|
|
62
|
+
workspace (str): The dashscope workspace id.
|
|
63
|
+
**kwargs:
|
|
64
|
+
see model input.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
InputRequired: input is required.
|
|
68
|
+
ModelRequired: model is required.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
DashScopeAPIResponse: The tokenizer output.
|
|
72
|
+
"""
|
|
73
|
+
if (input is None or not input) and \
|
|
74
|
+
(prompt is None or not prompt) and \
|
|
75
|
+
(messages is None or not messages):
|
|
76
|
+
raise InputRequired('prompt or messages is required!')
|
|
77
|
+
if model is None or not model:
|
|
78
|
+
raise ModelRequired('Model is required!')
|
|
79
|
+
if input is None:
|
|
80
|
+
input, parameters = cls._build_llm_parameters(
|
|
81
|
+
model, prompt, history, messages, **kwargs)
|
|
82
|
+
else:
|
|
83
|
+
parameters = kwargs
|
|
84
|
+
|
|
85
|
+
if kwargs.pop('stream', False): # not support stream
|
|
86
|
+
logger.warning('streaming option not supported for tokenization.')
|
|
87
|
+
|
|
88
|
+
return super().call(model=model,
|
|
89
|
+
task_group=None,
|
|
90
|
+
function=cls.FUNCTION,
|
|
91
|
+
api_key=api_key,
|
|
92
|
+
input=input,
|
|
93
|
+
is_service=False,
|
|
94
|
+
workspace=workspace,
|
|
95
|
+
**parameters)
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def _build_llm_parameters(cls, model, prompt, history, messages, **kwargs):
|
|
99
|
+
parameters = {}
|
|
100
|
+
input = {}
|
|
101
|
+
if history is not None:
|
|
102
|
+
logger.warning(DEPRECATED_MESSAGE)
|
|
103
|
+
input[HISTORY] = history
|
|
104
|
+
if prompt is not None and prompt:
|
|
105
|
+
input[PROMPT] = prompt
|
|
106
|
+
elif messages is not None:
|
|
107
|
+
msgs = copy.deepcopy(messages)
|
|
108
|
+
if prompt is not None and prompt:
|
|
109
|
+
msgs.append({'role': Role.USER, 'content': prompt})
|
|
110
|
+
input = {MESSAGES: msgs}
|
|
111
|
+
else:
|
|
112
|
+
input[PROMPT] = prompt
|
|
113
|
+
|
|
114
|
+
if model.startswith('qwen'):
|
|
115
|
+
enable_search = kwargs.pop('enable_search', False)
|
|
116
|
+
if enable_search:
|
|
117
|
+
parameters['enable_search'] = enable_search
|
|
118
|
+
elif model.startswith('bailian'):
|
|
119
|
+
customized_model_id = kwargs.pop('customized_model_id', None)
|
|
120
|
+
if customized_model_id is None:
|
|
121
|
+
raise InputRequired('customized_model_id is required for %s' %
|
|
122
|
+
model)
|
|
123
|
+
input[CUSTOMIZED_MODEL_ID] = customized_model_id
|
|
124
|
+
|
|
125
|
+
return input, {**parameters, **kwargs}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from dashscope.common.error import UnsupportedModel
|
|
7
|
+
from dashscope.tokenizers.qwen_tokenizer import QwenTokenizer
|
|
8
|
+
|
|
9
|
+
from .tokenizer_base import Tokenizer
|
|
10
|
+
|
|
11
|
+
QWEN_SERIALS = ['qwen-7b-chat', 'qwen-turbo', 'qwen-plus', 'qwen-max']
|
|
12
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
|
13
|
+
root_path = os.path.dirname(current_path)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_tokenizer(model: str) -> Tokenizer:
|
|
17
|
+
"""Get a tokenizer based on model name.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
model (str): The model name.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
UnsupportedModel: Not support model
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Tokenizer: The `Tokenizer` of the model.
|
|
27
|
+
"""
|
|
28
|
+
if model in QWEN_SERIALS:
|
|
29
|
+
return QwenTokenizer(
|
|
30
|
+
os.path.join(root_path, 'resources', 'qwen.tiktoken'))
|
|
31
|
+
elif model.startswith('qwen'):
|
|
32
|
+
return QwenTokenizer(
|
|
33
|
+
os.path.join(root_path, 'resources', 'qwen.tiktoken'))
|
|
34
|
+
else:
|
|
35
|
+
raise UnsupportedModel(
|
|
36
|
+
f'Not support model: {model}, currently only support qwen models.')
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def list_tokenizers() -> List[str]:
|
|
40
|
+
"""List support models
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List[str]: The model list.
|
|
44
|
+
"""
|
|
45
|
+
return QWEN_SERIALS
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Tokenizer:
|
|
7
|
+
"""Base tokenizer interface for local tokenizers.
|
|
8
|
+
"""
|
|
9
|
+
def __init__(self):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
def encode(self, text: str, **kwargs) -> List[int]:
|
|
13
|
+
"""Encode input text string to token ids.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
text (str): The string to be encoded.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
List[int]: The token ids.
|
|
20
|
+
"""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
def decode(self, token_ids: List[int], **kwargs) -> str:
|
|
24
|
+
"""Decode token ids to string.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
token_ids (List[int]): The input token ids.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
str: The string of the token ids.
|
|
31
|
+
"""
|
|
32
|
+
pass
|
|
File without changes
|