dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,212 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from typing import Dict, List, Optional
4
+
5
+ from dashscope.assistants.assistant_types import DeleteResponse
6
+ from dashscope.client.base_api import (CreateMixin, DeleteMixin,
7
+ GetStatusMixin, UpdateMixin)
8
+ from dashscope.common.error import InputRequired
9
+ from dashscope.threads.thread_types import Run, Thread
10
+
11
+ __all__ = ['Threads']
12
+
13
+
14
+ class Threads(CreateMixin, DeleteMixin, GetStatusMixin, UpdateMixin):
15
+ SUB_PATH = 'threads'
16
+
17
+ @classmethod
18
+ def call(cls,
19
+ *,
20
+ messages: List[Dict] = None,
21
+ metadata: Dict = None,
22
+ workspace: str = None,
23
+ api_key: str = None,
24
+ **kwargs) -> Thread:
25
+ """Create a thread.
26
+
27
+ Args:
28
+ messages (List[Dict], optional): List of messages to start thread. Defaults to None.
29
+ metadata (Dict, optional): The key-value information associate with thread. Defaults to None.
30
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
31
+ api_key (str, optional): Your DashScope api key. Defaults to None.
32
+
33
+ Returns:
34
+ Thread: The thread object.
35
+ """
36
+ return cls.create(messages=messages,
37
+ metadata=metadata,
38
+ workspace=workspace,
39
+ api_key=api_key,
40
+ **kwargs)
41
+
42
+ @classmethod
43
+ def create(cls,
44
+ *,
45
+ messages: List[Dict] = None,
46
+ metadata: Dict = None,
47
+ workspace: str = None,
48
+ api_key: str = None,
49
+ **kwargs) -> Thread:
50
+ """Create a thread.
51
+
52
+ Args:
53
+ messages (List[Dict], optional): List of messages to start thread. Defaults to None.
54
+ metadata (Dict, optional): The key-value information associate with thread. Defaults to None.
55
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
56
+ api_key (str, optional): Your DashScope api key. Defaults to None.
57
+
58
+ Returns:
59
+ Thread: The thread object.
60
+ """
61
+ data = {}
62
+ if messages:
63
+ data['messages'] = messages
64
+ if metadata:
65
+ data['metadata'] = metadata
66
+ response = super().call(data=data if data else '',
67
+ api_key=api_key,
68
+ flattened_output=True,
69
+ workspace=workspace,
70
+ **kwargs)
71
+ return Thread(**response)
72
+
73
+ @classmethod
74
+ def get(cls,
75
+ thread_id: str,
76
+ *,
77
+ workspace: str = None,
78
+ api_key: str = None,
79
+ **kwargs) -> Thread:
80
+ """Retrieve the thread.
81
+
82
+ Args:
83
+ thread_id (str): The target thread.
84
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
85
+ api_key (str, optional): Your DashScope api key. Defaults to None.
86
+
87
+ Returns:
88
+ Thread: The `Thread` information.
89
+ """
90
+ return cls.retrieve(thread_id,
91
+ workspace=workspace,
92
+ api_key=api_key,
93
+ **kwargs)
94
+
95
+ @classmethod
96
+ def retrieve(cls,
97
+ thread_id: str,
98
+ *,
99
+ workspace: str = None,
100
+ api_key: str = None,
101
+ **kwargs) -> Thread:
102
+ """Retrieve the thread.
103
+
104
+ Args:
105
+ thread_id (str): The target thread.
106
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
107
+ api_key (str, optional): Your DashScope api key. Defaults to None.
108
+
109
+ Returns:
110
+ Thread: The `Thread` information.
111
+ """
112
+ if not thread_id:
113
+ raise InputRequired('thread_id is required!')
114
+ response = super().get(thread_id,
115
+ api_key=api_key,
116
+ flattened_output=True,
117
+ workspace=workspace,
118
+ **kwargs)
119
+ return Thread(**response)
120
+
121
+ @classmethod
122
+ def update(cls,
123
+ thread_id: str,
124
+ *,
125
+ metadata: Dict = None,
126
+ workspace: str = None,
127
+ api_key: str = None,
128
+ **kwargs) -> Thread:
129
+ """Update thread information.
130
+
131
+ Args:
132
+ thread_id (str): The thread id.
133
+ metadata (Dict, optional): The thread key-value information. Defaults to None.
134
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
135
+ api_key (str, optional): Your DashScope api key. Defaults to None.
136
+
137
+ Returns:
138
+ Thread: The `Thread` information.
139
+ """
140
+ if not thread_id:
141
+ raise InputRequired('thread_id is required!')
142
+ response = super().update(thread_id,
143
+ json={'metadata': metadata},
144
+ api_key=api_key,
145
+ workspace=workspace,
146
+ flattened_output=True,
147
+ method='post',
148
+ **kwargs)
149
+ return Thread(**response)
150
+
151
+ @classmethod
152
+ def delete(cls,
153
+ thread_id,
154
+ *,
155
+ workspace: str = None,
156
+ api_key: str = None,
157
+ **kwargs) -> DeleteResponse:
158
+ """Delete thread.
159
+
160
+ Args:
161
+ thread_id (str): The thread id to delete.
162
+ workspace (str, optional): The DashScope workspace id. Defaults to None.
163
+ api_key (str, optional): Your DashScope api key. Defaults to None.
164
+
165
+ Returns:
166
+ AssistantsDeleteResponse: The deleted information.
167
+ """
168
+ if not thread_id:
169
+ raise InputRequired('thread_id is required!')
170
+ response = super().delete(thread_id,
171
+ api_key=api_key,
172
+ workspace=workspace,
173
+ flattened_output=True,
174
+ **kwargs)
175
+ return DeleteResponse(**response)
176
+
177
+ @classmethod
178
+ def create_and_run(cls,
179
+ *,
180
+ assistant_id: str,
181
+ thread: Optional[Dict] = None,
182
+ model: Optional[str] = None,
183
+ instructions: Optional[str] = None,
184
+ additional_instructions: Optional[str] = None,
185
+ tools: Optional[List[Dict]] = None,
186
+ metadata: Optional[Dict] = None,
187
+ workspace: str = None,
188
+ api_key: str = None,
189
+ **kwargs) -> Run:
190
+ if not assistant_id:
191
+ raise InputRequired('assistant_id is required')
192
+ data = {'assistant_id': assistant_id}
193
+ if thread:
194
+ data['thread'] = thread
195
+ if model:
196
+ data['model'] = model
197
+ if instructions:
198
+ data['instructions'] = instructions
199
+ if additional_instructions:
200
+ data['additional_instructions'] = additional_instructions
201
+ if tools:
202
+ data['tools'] = tools
203
+ if metadata:
204
+ data['metadata'] = metadata
205
+
206
+ response = super().call(data=data,
207
+ path='threads/runs',
208
+ api_key=api_key,
209
+ flattened_output=True,
210
+ workspace=workspace,
211
+ **kwargs)
212
+ return Run(**response)
@@ -0,0 +1,7 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .tokenization import Tokenization
4
+ from .tokenizer import get_tokenizer, list_tokenizers
5
+ from .tokenizer_base import Tokenizer
6
+
7
+ __all__ = [Tokenization, Tokenizer, get_tokenizer, list_tokenizers]
@@ -0,0 +1,111 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import base64
4
+ import unicodedata
5
+ from typing import Collection, Dict, List, Set, Union
6
+
7
+ from .tokenizer_base import Tokenizer
8
+
9
+ PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""" # noqa E501
10
+ ENDOFTEXT = '<|endoftext|>'
11
+ IMSTART = '<|im_start|>'
12
+ IMEND = '<|im_end|>'
13
+ # as the default behavior is changed to allow special tokens in
14
+ # regular texts, the surface forms of special tokens need to be
15
+ # as different as possible to minimize the impact
16
+ EXTRAS = tuple((f'<|extra_{i}|>' for i in range(205)))
17
+ # changed to use actual index to avoid misconfiguration with vocabulary expansion
18
+ SPECIAL_START_ID = 151643
19
+ SPECIAL_TOKENS = tuple(
20
+ enumerate(
21
+ ((
22
+ ENDOFTEXT,
23
+ IMSTART,
24
+ IMEND,
25
+ ) + EXTRAS),
26
+ start=SPECIAL_START_ID,
27
+ ))
28
+ SPECIAL_TOKENS_SET = set(t for i, t in SPECIAL_TOKENS)
29
+
30
+
31
+ class QwenTokenizer(Tokenizer):
32
+ @staticmethod
33
+ def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
34
+ with open(tiktoken_bpe_file, 'rb') as f:
35
+ contents = f.read()
36
+ return {
37
+ base64.b64decode(token): int(rank)
38
+ for token, rank in (line.split() for line in contents.splitlines()
39
+ if line)
40
+ }
41
+
42
+ def __init__(self, vocab_file, errors='replace', extra_vocab_file=None):
43
+ self._errors = errors
44
+ self._vocab_file = vocab_file
45
+ self._extra_vocab_file = extra_vocab_file
46
+
47
+ self._mergeable_ranks = QwenTokenizer._load_tiktoken_bpe(
48
+ vocab_file) # type: Dict[bytes, int]
49
+ self._special_tokens = {
50
+ token: index
51
+ for index, token in SPECIAL_TOKENS
52
+ }
53
+
54
+ # try load extra vocab from file
55
+ if extra_vocab_file is not None:
56
+ used_ids = set(self._mergeable_ranks.values()) | set(
57
+ self._special_tokens.values())
58
+ extra_mergeable_ranks = self._load_tiktoken_bpe(extra_vocab_file)
59
+ for token, index in extra_mergeable_ranks.items():
60
+ if token in self._mergeable_ranks:
61
+ continue
62
+ if index in used_ids:
63
+ continue
64
+ self._mergeable_ranks[token] = index
65
+ # the index may be sparse after this, but don't worry tiktoken.Encoding will handle this
66
+ import tiktoken
67
+ enc = tiktoken.Encoding(
68
+ 'Qwen',
69
+ pat_str=PAT_STR,
70
+ mergeable_ranks=self._mergeable_ranks,
71
+ special_tokens=self._special_tokens,
72
+ )
73
+ assert (
74
+ len(self._mergeable_ranks) +
75
+ len(self._special_tokens) == enc.n_vocab
76
+ ), f'{len(self._mergeable_ranks) + len(self._special_tokens)} != {enc.n_vocab} in encoding'
77
+
78
+ self.decoder = {v: k
79
+ for k, v in self._mergeable_ranks.items()
80
+ } # type: dict[int, bytes|str]
81
+ self.decoder.update({v: k for k, v in self._special_tokens.items()})
82
+
83
+ self._tokenizer = enc # type: tiktoken.Encoding
84
+
85
+ self.eod_id = self._tokenizer.eot_token
86
+ self.im_start_id = self._special_tokens[IMSTART]
87
+ self.im_end_id = self._special_tokens[IMEND]
88
+
89
+ def encode(
90
+ self,
91
+ text: str,
92
+ allowed_special: Union[Set, str] = 'all',
93
+ disallowed_special: Union[Collection, str] = (),
94
+ ) -> Union[List[List], List]:
95
+ text = unicodedata.normalize('NFC', text)
96
+ return self._tokenizer.encode(text,
97
+ allowed_special=allowed_special,
98
+ disallowed_special=disallowed_special)
99
+
100
+ def decode(
101
+ self,
102
+ token_ids: Union[int, List[int]],
103
+ skip_special_tokens: bool = False,
104
+ errors: str = None,
105
+ **kwargs,
106
+ ) -> str:
107
+ if isinstance(token_ids, int):
108
+ token_ids = [token_ids]
109
+ if skip_special_tokens:
110
+ token_ids = [i for i in token_ids if i < self.eod_id]
111
+ return self._tokenizer.decode(token_ids, errors=errors or self._errors)
@@ -0,0 +1,125 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import copy
4
+ from typing import Any, List
5
+
6
+ from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
7
+ Message, Role)
8
+ from dashscope.client.base_api import BaseApi
9
+ from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
10
+ DEPRECATED_MESSAGE, HISTORY, MESSAGES,
11
+ PROMPT)
12
+ from dashscope.common.error import InputRequired, ModelRequired
13
+ from dashscope.common.logging import logger
14
+
15
+
16
+ class Tokenization(BaseApi):
17
+ FUNCTION = 'tokenizer'
18
+ """API for get tokenizer result..
19
+
20
+ """
21
+ class Models:
22
+ """List of models currently supported
23
+ """
24
+ qwen_turbo = 'qwen-turbo'
25
+ qwen_plus = 'qwen-plus'
26
+ qwen_7b_chat = 'qwen-7b-chat'
27
+ qwen_14b_chat = 'qwen-14b-chat'
28
+ llama2_7b_chat_v2 = 'llama2-7b-chat-v2'
29
+ llama2_13b_chat_v2 = 'llama2-13b-chat-v2'
30
+ text_embedding_v2 = 'text-embedding-v2'
31
+ qwen_72b_chat = 'qwen-72b-chat'
32
+
33
+ @classmethod
34
+ def call(cls,
35
+ model: str,
36
+ input: Any = None,
37
+ prompt: Any = None,
38
+ history: list = None,
39
+ api_key: str = None,
40
+ messages: List[Message] = None,
41
+ workspace: str = None,
42
+ **kwargs) -> DashScopeAPIResponse:
43
+ """Call tokenization.
44
+
45
+ Args:
46
+ model (str): The requested model, such as qwen-v1
47
+ input: (Any): The model input body.
48
+ prompt (Any): The input prompt, for qwen serial model.
49
+ history (list):The user provided history,
50
+ deprecated, use messages instead.
51
+ examples:
52
+ [{'user':'The weather is fine today.',
53
+ 'bot': 'Suitable for outings'}].
54
+ Defaults to None.
55
+ api_key (str, optional): The api api_key, can be None,
56
+ if None, will get by default rule(TODO: api key doc).
57
+ messages (list): The generation messages.
58
+ examples:
59
+ [{'role': 'user',
60
+ 'content': 'The weather is fine today.'},
61
+ {'role': 'assistant', 'content': 'Suitable for outings'}]
62
+ workspace (str): The dashscope workspace id.
63
+ **kwargs:
64
+ see model input.
65
+
66
+ Raises:
67
+ InputRequired: input is required.
68
+ ModelRequired: model is required.
69
+
70
+ Returns:
71
+ DashScopeAPIResponse: The tokenizer output.
72
+ """
73
+ if (input is None or not input) and \
74
+ (prompt is None or not prompt) and \
75
+ (messages is None or not messages):
76
+ raise InputRequired('prompt or messages is required!')
77
+ if model is None or not model:
78
+ raise ModelRequired('Model is required!')
79
+ if input is None:
80
+ input, parameters = cls._build_llm_parameters(
81
+ model, prompt, history, messages, **kwargs)
82
+ else:
83
+ parameters = kwargs
84
+
85
+ if kwargs.pop('stream', False): # not support stream
86
+ logger.warning('streaming option not supported for tokenization.')
87
+
88
+ return super().call(model=model,
89
+ task_group=None,
90
+ function=cls.FUNCTION,
91
+ api_key=api_key,
92
+ input=input,
93
+ is_service=False,
94
+ workspace=workspace,
95
+ **parameters)
96
+
97
+ @classmethod
98
+ def _build_llm_parameters(cls, model, prompt, history, messages, **kwargs):
99
+ parameters = {}
100
+ input = {}
101
+ if history is not None:
102
+ logger.warning(DEPRECATED_MESSAGE)
103
+ input[HISTORY] = history
104
+ if prompt is not None and prompt:
105
+ input[PROMPT] = prompt
106
+ elif messages is not None:
107
+ msgs = copy.deepcopy(messages)
108
+ if prompt is not None and prompt:
109
+ msgs.append({'role': Role.USER, 'content': prompt})
110
+ input = {MESSAGES: msgs}
111
+ else:
112
+ input[PROMPT] = prompt
113
+
114
+ if model.startswith('qwen'):
115
+ enable_search = kwargs.pop('enable_search', False)
116
+ if enable_search:
117
+ parameters['enable_search'] = enable_search
118
+ elif model.startswith('bailian'):
119
+ customized_model_id = kwargs.pop('customized_model_id', None)
120
+ if customized_model_id is None:
121
+ raise InputRequired('customized_model_id is required for %s' %
122
+ model)
123
+ input[CUSTOMIZED_MODEL_ID] = customized_model_id
124
+
125
+ return input, {**parameters, **kwargs}
@@ -0,0 +1,45 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import os
4
+ from typing import List
5
+
6
+ from dashscope.common.error import UnsupportedModel
7
+ from dashscope.tokenizers.qwen_tokenizer import QwenTokenizer
8
+
9
+ from .tokenizer_base import Tokenizer
10
+
11
+ QWEN_SERIALS = ['qwen-7b-chat', 'qwen-turbo', 'qwen-plus', 'qwen-max']
12
+ current_path = os.path.dirname(os.path.abspath(__file__))
13
+ root_path = os.path.dirname(current_path)
14
+
15
+
16
+ def get_tokenizer(model: str) -> Tokenizer:
17
+ """Get a tokenizer based on model name.
18
+
19
+ Args:
20
+ model (str): The model name.
21
+
22
+ Raises:
23
+ UnsupportedModel: Not support model
24
+
25
+ Returns:
26
+ Tokenizer: The `Tokenizer` of the model.
27
+ """
28
+ if model in QWEN_SERIALS:
29
+ return QwenTokenizer(
30
+ os.path.join(root_path, 'resources', 'qwen.tiktoken'))
31
+ elif model.startswith('qwen'):
32
+ return QwenTokenizer(
33
+ os.path.join(root_path, 'resources', 'qwen.tiktoken'))
34
+ else:
35
+ raise UnsupportedModel(
36
+ f'Not support model: {model}, currently only support qwen models.')
37
+
38
+
39
+ def list_tokenizers() -> List[str]:
40
+ """List support models
41
+
42
+ Returns:
43
+ List[str]: The model list.
44
+ """
45
+ return QWEN_SERIALS
@@ -0,0 +1,32 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from typing import List
4
+
5
+
6
+ class Tokenizer:
7
+ """Base tokenizer interface for local tokenizers.
8
+ """
9
+ def __init__(self):
10
+ pass
11
+
12
+ def encode(self, text: str, **kwargs) -> List[int]:
13
+ """Encode input text string to token ids.
14
+
15
+ Args:
16
+ text (str): The string to be encoded.
17
+
18
+ Returns:
19
+ List[int]: The token ids.
20
+ """
21
+ pass
22
+
23
+ def decode(self, token_ids: List[int], **kwargs) -> str:
24
+ """Decode token ids to string.
25
+
26
+ Args:
27
+ token_ids (List[int]): The input token ids.
28
+
29
+ Returns:
30
+ str: The string of the token ids.
31
+ """
32
+ pass
File without changes