dashscope 1.23.7__py3-none-any.whl → 1.23.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dashscope might be problematic. Click here for more details.

@@ -23,8 +23,8 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
23
23
  name: str = None,
24
24
  description: str = None,
25
25
  instructions: str = None,
26
- tools: Optional[str] = [],
27
- file_ids: Optional[str] = [],
26
+ tools: Optional[List[Dict]] = None,
27
+ file_ids: Optional[List[str]] = [],
28
28
  metadata: Dict = {},
29
29
  ):
30
30
  obj = {}
@@ -36,7 +36,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
36
36
  obj['description'] = description
37
37
  if instructions:
38
38
  obj['instructions'] = instructions
39
- if tools:
39
+ if tools is not None:
40
40
  obj['tools'] = tools
41
41
  obj['file_ids'] = file_ids
42
42
  obj['metadata'] = metadata
@@ -50,7 +50,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
50
50
  name: str = None,
51
51
  description: str = None,
52
52
  instructions: str = None,
53
- tools: Optional[List[Dict]] = [],
53
+ tools: Optional[List[Dict]] = None,
54
54
  file_ids: Optional[List[str]] = [],
55
55
  metadata: Dict = None,
56
56
  workspace: str = None,
@@ -93,7 +93,7 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
93
93
  name: str = None,
94
94
  description: str = None,
95
95
  instructions: str = None,
96
- tools: Optional[List[Dict]] = [],
96
+ tools: Optional[List[Dict]] = None,
97
97
  file_ids: Optional[List[str]] = [],
98
98
  metadata: Dict = None,
99
99
  workspace: str = None,
@@ -219,8 +219,8 @@ class Assistants(CreateMixin, CancelMixin, DeleteMixin, ListObjectMixin,
219
219
  name: str = None,
220
220
  description: str = None,
221
221
  instructions: str = None,
222
- tools: Optional[str] = [],
223
- file_ids: Optional[str] = [],
222
+ tools: Optional[List[Dict]] = None,
223
+ file_ids: Optional[List[str]] = [],
224
224
  metadata: Dict = None,
225
225
  workspace: str = None,
226
226
  api_key: str = None,
@@ -1,5 +1,5 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- from . import asr, tts, tts_v2, qwen_tts
3
+ from . import asr, tts, tts_v2, qwen_tts, qwen_tts_realtime, qwen_omni
4
4
 
5
- __all__ = [asr, tts, tts_v2, qwen_tts]
5
+ __all__ = [asr, tts, tts_v2, qwen_tts, qwen_tts_realtime, qwen_omni]
@@ -0,0 +1,11 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .omni_realtime import (AudioFormat, MultiModality, OmniRealtimeCallback,
4
+ OmniRealtimeConversation)
5
+
6
+ __all__ = [
7
+ 'OmniRealtimeCallback',
8
+ 'AudioFormat',
9
+ 'MultiModality',
10
+ 'OmniRealtimeConversation',
11
+ ]
@@ -0,0 +1,415 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import json
4
+ import platform
5
+ import threading
6
+ import time
7
+ from typing import List
8
+ import uuid
9
+ from enum import Enum, unique
10
+
11
+ import dashscope
12
+ import websocket
13
+ from dashscope.common.error import InputRequired, ModelRequired
14
+ from dashscope.common.logging import logger
15
+
16
+
17
+ class OmniRealtimeCallback:
18
+ """
19
+ An interface that defines callback methods for getting omni-realtime results. # noqa E501
20
+ Derive from this class and implement its function to provide your own data.
21
+ """
22
+ def on_open(self) -> None:
23
+ pass
24
+
25
+ def on_close(self, close_status_code, close_msg) -> None:
26
+ pass
27
+
28
+ def on_event(self, message: str) -> None:
29
+ pass
30
+
31
+
32
+ @unique
33
+ class AudioFormat(Enum):
34
+ # format, sample_rate, channels, bit_rate, name
35
+ PCM_16000HZ_MONO_16BIT = ('pcm', 16000, 'mono', '16bit', 'pcm16')
36
+ PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
37
+
38
+ def __init__(self, format, sample_rate, channels, bit_rate, format_str):
39
+ self.format = format
40
+ self.sample_rate = sample_rate
41
+ self.channels = channels
42
+ self.bit_rate = bit_rate
43
+ self.format_str = format_str
44
+
45
+ def __repr__(self):
46
+ return self.format_str
47
+
48
+ def __str__(self):
49
+ return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate: {self.format_str}'
50
+
51
+
52
+ class MultiModality(Enum):
53
+ """
54
+ MultiModality
55
+ """
56
+ TEXT = 'text'
57
+ AUDIO = 'audio'
58
+
59
+ def __str__(self):
60
+ return self.name
61
+
62
+
63
+ class OmniRealtimeConversation:
64
+ def __init__(
65
+ self,
66
+ model,
67
+ callback: OmniRealtimeCallback,
68
+ headers=None,
69
+ workspace=None,
70
+ url=None,
71
+ additional_params=None,
72
+ ):
73
+ """
74
+ Qwen Omni Realtime SDK
75
+ Parameters:
76
+ -----------
77
+ model: str
78
+ Model name.
79
+ headers: Dict
80
+ User-defined headers.
81
+ callback: OmniRealtimeCallback
82
+ Callback to receive real-time omni results.
83
+ workspace: str
84
+ Dashscope workspace ID.
85
+ url: str
86
+ Dashscope WebSocket URL.
87
+ additional_params: Dict
88
+ Additional parameters for the Dashscope API.
89
+ """
90
+
91
+ if model is None:
92
+ raise ModelRequired('Model is required!')
93
+ if callback is None:
94
+ raise ModelRequired('Callback is required!')
95
+ if url is None:
96
+ url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
97
+ else:
98
+ url = f'{url}?model={model}'
99
+ self.url = url
100
+ self.apikey = dashscope.api_key
101
+ self.user_headers = headers
102
+ self.user_workspace = workspace
103
+ self.model = model
104
+ self.config = {}
105
+ self.callback = callback
106
+ self.ws = None
107
+ self.session_id = None
108
+ self.last_message = None
109
+ self.last_response_id = None
110
+ self.last_response_create_time = None
111
+ self.last_first_text_delay = None
112
+ self.last_first_audio_delay = None
113
+ self.metrics = []
114
+
115
+ def _generate_event_id(self):
116
+ '''
117
+ generate random event id: event_xxxx
118
+ '''
119
+ return 'event_' + uuid.uuid4().hex
120
+
121
+ def _get_websocket_header(self, ):
122
+ ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
123
+ '1.18.0', # dashscope version
124
+ platform.python_version(),
125
+ platform.platform(),
126
+ platform.processor(),
127
+ )
128
+ headers = {
129
+ 'user-agent': ua,
130
+ 'Authorization': 'bearer ' + self.apikey,
131
+ }
132
+ if self.user_headers:
133
+ headers = {**self.user_headers, **headers}
134
+ if self.user_workspace:
135
+ headers = {
136
+ **headers,
137
+ 'X-DashScope-WorkSpace': self.user_workspace,
138
+ }
139
+ return headers
140
+
141
+ def connect(self) -> None:
142
+ '''
143
+ connect to server, create session and return default session configuration
144
+ '''
145
+ self.ws = websocket.WebSocketApp(
146
+ self.url,
147
+ header=self._get_websocket_header(),
148
+ on_message=self.on_message,
149
+ on_error=self.on_error,
150
+ on_close=self.on_close,
151
+ )
152
+ self.thread = threading.Thread(target=self.ws.run_forever)
153
+ self.thread.daemon = True
154
+ self.thread.start()
155
+ timeout = 5 # 最长等待时间(秒)
156
+ start_time = time.time()
157
+ while (not (self.ws.sock and self.ws.sock.connected)
158
+ and (time.time() - start_time) < timeout):
159
+ time.sleep(0.1) # 短暂休眠,避免密集轮询
160
+ if not (self.ws.sock and self.ws.sock.connected):
161
+ raise TimeoutError(
162
+ 'websocket connection could not established within 5s. '
163
+ 'Please check your network connection, firewall settings, or server status.'
164
+ )
165
+ self.callback.on_open()
166
+
167
+ def __send_str(self, data: str, enable_log: bool = True):
168
+ if enable_log:
169
+ logger.debug('[omni realtime] send string: {}'.format(data))
170
+ self.ws.send(data)
171
+
172
+ def update_session(self,
173
+ output_modalities: List[MultiModality],
174
+ voice: str,
175
+ input_audio_format: AudioFormat = AudioFormat.
176
+ PCM_16000HZ_MONO_16BIT,
177
+ output_audio_format: AudioFormat = AudioFormat.
178
+ PCM_24000HZ_MONO_16BIT,
179
+ enable_input_audio_transcription: bool = True,
180
+ input_audio_transcription_model: str = None,
181
+ enable_turn_detection: bool = True,
182
+ turn_detection_type: str = 'server_vad',
183
+ prefix_padding_ms: int = 300,
184
+ turn_detection_threshold: float = 0.2,
185
+ turn_detection_silence_duration_ms: int = 800,
186
+ turn_detection_param: dict = None,
187
+ **kwargs) -> None:
188
+ '''
189
+ update session configuration, should be used before create response
190
+
191
+ Parameters
192
+ ----------
193
+ output_modalities: list[MultiModality]
194
+ omni output modalities to be used in session
195
+ voice: str
196
+ voice to be used in session
197
+ input_audio_format: AudioFormat
198
+ input audio format
199
+ output_audio_format: AudioFormat
200
+ output audio format
201
+ enable_turn_detection: bool
202
+ enable turn detection
203
+ turn_detection_threshold: float
204
+ turn detection threshold, range [-1, 1]
205
+ In a noisy environment, it may be necessary to increase the threshold to reduce false detections
206
+ In a quiet environment, it may be necessary to decrease the threshold to improve sensitivity
207
+ turn_detection_silence_duration_ms: int
208
+ duration of silence in milliseconds to detect turn, range [200, 6000]
209
+ '''
210
+ self.config = {
211
+ 'modalities': [m.value for m in output_modalities],
212
+ 'voice': voice,
213
+ 'input_audio_format': input_audio_format.format_str,
214
+ 'output_audio_format': output_audio_format.format_str,
215
+ }
216
+ if enable_input_audio_transcription:
217
+ self.config['input_audio_transcription'] = {
218
+ 'model': input_audio_transcription_model,
219
+ }
220
+ else:
221
+ self.config['input_audio_transcription'] = None
222
+ if enable_turn_detection:
223
+ self.config['turn_detection'] = {
224
+ 'type': turn_detection_type,
225
+ 'threshold': turn_detection_threshold,
226
+ 'prefix_padding_ms': prefix_padding_ms,
227
+ 'silence_duration_ms': turn_detection_silence_duration_ms,
228
+ }
229
+ if turn_detection_param is not None:
230
+ self.config['turn_detection'].update(turn_detection_param)
231
+ else:
232
+ self.config['turn_detection'] = None
233
+ self.config.update(kwargs)
234
+ self.__send_str(
235
+ json.dumps({
236
+ 'event_id': self._generate_event_id(),
237
+ 'type': 'session.update',
238
+ 'session': self.config
239
+ }))
240
+
241
+ def append_audio(self, audio_b64: str) -> None:
242
+ '''
243
+ send audio in base64 format
244
+
245
+ Parameters
246
+ ----------
247
+ audio_b64: str
248
+ base64 audio string
249
+ '''
250
+ logger.debug('[omni realtime] append audio: {}'.format(len(audio_b64)))
251
+ self.__send_str(
252
+ json.dumps({
253
+ 'event_id': self._generate_event_id(),
254
+ 'type': 'input_audio_buffer.append',
255
+ 'audio': audio_b64
256
+ }), False)
257
+
258
+ def append_video(self, video_b64: str) -> None:
259
+ '''
260
+ send one image frame in video in base64 format
261
+
262
+ Parameters
263
+ ----------
264
+ video_b64: str
265
+ base64 image string
266
+ '''
267
+ logger.debug('[omni realtime] append video: {}'.format(len(video_b64)))
268
+ self.__send_str(
269
+ json.dumps({
270
+ 'event_id': self._generate_event_id(),
271
+ 'type': 'input_image_buffer.append',
272
+ 'image': video_b64
273
+ }), False)
274
+
275
+ def commit(self, ) -> None:
276
+ '''
277
+ Commit the audio and video sent before.
278
+ When in Server VAD mode, the client does not need to use this method,
279
+ the server will commit the audio automatically after detecting vad end.
280
+ '''
281
+ self.__send_str(
282
+ json.dumps({
283
+ 'event_id': self._generate_event_id(),
284
+ 'type': 'input_audio_buffer.commit'
285
+ }))
286
+
287
+ def clear_appended_audio(self, ) -> None:
288
+ '''
289
+ clear the audio sent to server before.
290
+ '''
291
+ self.__send_str(
292
+ json.dumps({
293
+ 'event_id': self._generate_event_id(),
294
+ 'type': 'input_audio_buffer.clear'
295
+ }))
296
+
297
+ def create_response(self,
298
+ instructions: str = None,
299
+ output_modalities: List[MultiModality] = None) -> None:
300
+ '''
301
+ create response, use audio and video commited before to request llm.
302
+ When in Server VAD mode, the client does not need to use this method,
303
+ the server will create response automatically after detecting vad
304
+ and sending commit.
305
+
306
+ Parameters
307
+ ----------
308
+ instructions: str
309
+ instructions to llm
310
+ output_modalities: list[MultiModality]
311
+ omni output modalities to be used in session
312
+ '''
313
+ request = {
314
+ 'event_id': self._generate_event_id(),
315
+ 'type': 'response.create',
316
+ 'response': {}
317
+ }
318
+ request['response']['instructions'] = instructions
319
+ if output_modalities:
320
+ request['response']['modalities'] = [
321
+ m.value for m in output_modalities
322
+ ]
323
+ self.__send_str(json.dumps(request))
324
+
325
+ def cancel_response(self, ) -> None:
326
+ '''
327
+ cancel the current response
328
+ '''
329
+ self.__send_str(
330
+ json.dumps({
331
+ 'event_id': self._generate_event_id(),
332
+ 'type': 'response.cancel'
333
+ }))
334
+
335
+ def send_raw(self, raw_data: str) -> None:
336
+ '''
337
+ send raw data to server
338
+ '''
339
+ self.__send_str(raw_data)
340
+
341
+ def close(self, ) -> None:
342
+ '''
343
+ close the connection to server
344
+ '''
345
+ self.ws.close()
346
+
347
+ # 监听消息的回调函数
348
+ def on_message(self, ws, message):
349
+ if isinstance(message, str):
350
+ logger.debug('[omni realtime] receive string {}'.format(
351
+ message[:1024]))
352
+ try:
353
+ # 尝试将消息解析为JSON
354
+ json_data = json.loads(message)
355
+ self.last_message = json_data
356
+ self.callback.on_event(json_data)
357
+ if 'type' in message:
358
+ if 'session.created' == json_data['type']:
359
+ self.session_id = json_data['session']['id']
360
+ if 'response.created' == json_data['type']:
361
+ self.last_response_id = json_data['response']['id']
362
+ self.last_response_create_time = time.time() * 1000
363
+ self.last_first_audio_delay = None
364
+ self.last_first_text_delay = None
365
+ elif 'response.audio_transcript.delta' == json_data[
366
+ 'type']:
367
+ if self.last_response_create_time and self.last_first_text_delay is None:
368
+ self.last_first_text_delay = time.time(
369
+ ) * 1000 - self.last_response_create_time
370
+ elif 'response.audio.delta' == json_data['type']:
371
+ if self.last_response_create_time and self.last_first_audio_delay is None:
372
+ self.last_first_audio_delay = time.time(
373
+ ) * 1000 - self.last_response_create_time
374
+ elif 'response.done' == json_data['type']:
375
+ logger.info(
376
+ '[Metric] response: {}, first text delay: {}, first audio delay: {}'
377
+ .format(self.last_response_id,
378
+ self.last_first_text_delay,
379
+ self.last_first_audio_delay))
380
+ except json.JSONDecodeError:
381
+ logger.error('Failed to parse message as JSON.')
382
+ raise Exception('Failed to parse message as JSON.')
383
+ elif isinstance(message, (bytes, bytearray)):
384
+ # 如果失败,认为是二进制消息
385
+ logger.error(
386
+ 'should not receive binary message in omni realtime api')
387
+ logger.debug('[omni realtime] receive binary {} bytes'.format(
388
+ len(message)))
389
+
390
+ def on_close(self, ws, close_status_code, close_msg):
391
+ self.callback.on_close(close_status_code, close_msg)
392
+
393
+ # WebSocket发生错误的回调函数
394
+ def on_error(self, ws, error):
395
+ print(f'websocket closed due to {error}')
396
+ raise Exception(f'websocket closed due to {error}')
397
+
398
+ # 获取上一个任务的taskId
399
+ def get_session_id(self) -> str:
400
+ return self.session_id
401
+
402
+ def get_last_message(self) -> str:
403
+ return self.last_message
404
+
405
+ def get_last_message(self) -> str:
406
+ return self.last_message
407
+
408
+ def get_last_response_id(self) -> str:
409
+ return self.last_response_id
410
+
411
+ def get_last_first_text_delay(self):
412
+ return self.last_first_text_delay
413
+
414
+ def get_last_first_audio_delay(self):
415
+ return self.last_first_audio_delay
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .qwen_tts_realtime import (AudioFormat, QwenTtsRealtimeCallback,
4
+ QwenTtsRealtime)
5
+
6
+ __all__ = [
7
+ 'AudioFormat',
8
+ 'QwenTtsRealtimeCallback',
9
+ 'QwenTtsRealtime',
10
+ ]
@@ -0,0 +1,314 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import json
4
+ import platform
5
+ import threading
6
+ import time
7
+ import uuid
8
+ from enum import Enum, unique
9
+
10
+ import dashscope
11
+ import websocket
12
+ from dashscope.common.error import InputRequired, ModelRequired
13
+ from dashscope.common.logging import logger
14
+
15
+
16
+ class QwenTtsRealtimeCallback:
17
+ """
18
+ An interface that defines callback methods for getting omni-realtime results. # noqa E501
19
+ Derive from this class and implement its function to provide your own data.
20
+ """
21
+ def on_open(self) -> None:
22
+ pass
23
+
24
+ def on_close(self, close_status_code, close_msg) -> None:
25
+ pass
26
+
27
+ def on_event(self, message: str) -> None:
28
+ pass
29
+
30
+
31
+ @unique
32
+ class AudioFormat(Enum):
33
+ # format, sample_rate, channels, bit_rate, name
34
+ PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
35
+
36
+ def __init__(self, format, sample_rate, channels, bit_rate, format_str):
37
+ self.format = format
38
+ self.sample_rate = sample_rate
39
+ self.channels = channels
40
+ self.bit_rate = bit_rate
41
+ self.format_str = format_str
42
+
43
+ def __repr__(self):
44
+ return self.format_str
45
+
46
+ def __str__(self):
47
+ return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate: {self.format_str}'
48
+
49
+
50
+ class QwenTtsRealtime:
51
+ def __init__(
52
+ self,
53
+ model,
54
+ headers=None,
55
+ callback: QwenTtsRealtimeCallback = None,
56
+ workspace=None,
57
+ url=None,
58
+ additional_params=None,
59
+ ):
60
+ """
61
+ Qwen Tts Realtime SDK
62
+ Parameters:
63
+ -----------
64
+ model: str
65
+ Model name.
66
+ headers: Dict
67
+ User-defined headers.
68
+ callback: OmniRealtimeCallback
69
+ Callback to receive real-time omni results.
70
+ workspace: str
71
+ Dashscope workspace ID.
72
+ url: str
73
+ Dashscope WebSocket URL.
74
+ additional_params: Dict
75
+ Additional parameters for the Dashscope API.
76
+ """
77
+
78
+ if model is None:
79
+ raise ModelRequired('Model is required!')
80
+ if url is None:
81
+ url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
82
+ else:
83
+ url = f'{url}?model={model}'
84
+ self.url = url
85
+ self.apikey = dashscope.api_key
86
+ self.user_headers = headers
87
+ self.user_workspace = workspace
88
+ self.model = model
89
+ self.config = {}
90
+ self.callback = callback
91
+ self.ws = None
92
+ self.session_id = None
93
+ self.last_message = None
94
+ self.last_response_id = None
95
+ self.last_first_text_time = None
96
+ self.last_first_audio_delay = None
97
+ self.metrics = []
98
+
99
+ def _generate_event_id(self):
100
+ '''
101
+ generate random event id: event_xxxx
102
+ '''
103
+ return 'event_' + uuid.uuid4().hex
104
+
105
+ def _get_websocket_header(self, ):
106
+ ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
107
+ '1.18.0', # dashscope version
108
+ platform.python_version(),
109
+ platform.platform(),
110
+ platform.processor(),
111
+ )
112
+ headers = {
113
+ 'user-agent': ua,
114
+ 'Authorization': 'bearer ' + self.apikey,
115
+ }
116
+ if self.user_headers:
117
+ headers = {**self.user_headers, **headers}
118
+ if self.user_workspace:
119
+ headers = {
120
+ **headers,
121
+ 'X-DashScope-WorkSpace': self.user_workspace,
122
+ }
123
+ return headers
124
+
125
+ def connect(self) -> None:
126
+ '''
127
+ connect to server, create session and return default session configuration
128
+ '''
129
+ self.ws = websocket.WebSocketApp(
130
+ self.url,
131
+ header=self._get_websocket_header(),
132
+ on_message=self.on_message,
133
+ on_error=self.on_error,
134
+ on_close=self.on_close,
135
+ )
136
+ self.thread = threading.Thread(target=self.ws.run_forever)
137
+ self.thread.daemon = True
138
+ self.thread.start()
139
+ timeout = 5 # 最长等待时间(秒)
140
+ start_time = time.time()
141
+ while (not (self.ws.sock and self.ws.sock.connected)
142
+ and (time.time() - start_time) < timeout):
143
+ time.sleep(0.1) # 短暂休眠,避免密集轮询
144
+ if not (self.ws.sock and self.ws.sock.connected):
145
+ raise TimeoutError(
146
+ 'websocket connection could not established within 5s. '
147
+ 'Please check your network connection, firewall settings, or server status.'
148
+ )
149
+ self.callback.on_open()
150
+
151
+ def __send_str(self, data: str, enable_log: bool = True):
152
+ if enable_log:
153
+ logger.debug('[qwen tts realtime] send string: {}'.format(data))
154
+ self.ws.send(data)
155
+
156
+ def update_session(self,
157
+ voice: str,
158
+ response_format: AudioFormat = AudioFormat.
159
+ PCM_24000HZ_MONO_16BIT,
160
+ mode: str = 'server_commit',
161
+ **kwargs) -> None:
162
+ '''
163
+ update session configuration, should be used before create response
164
+
165
+ Parameters
166
+ ----------
167
+ voice: str
168
+ voice to be used in session
169
+ response_format: AudioFormat
170
+ output audio format
171
+ mode: str
172
+ response mode, server_commit or commit
173
+ '''
174
+ self.config = {
175
+ 'voice': voice,
176
+ 'mode': mode,
177
+ 'response_format': response_format.format,
178
+ 'sample_rate': response_format.sample_rate,
179
+ }
180
+ self.config.update(kwargs)
181
+ self.__send_str(
182
+ json.dumps({
183
+ 'event_id': self._generate_event_id(),
184
+ 'type': 'session.update',
185
+ 'session': self.config
186
+ }))
187
+
188
+ def append_text(self, text: str) -> None:
189
+ '''
190
+ send text
191
+
192
+ Parameters
193
+ ----------
194
+ text: str
195
+ text to send
196
+ '''
197
+ self.__send_str(
198
+ json.dumps({
199
+ 'event_id': self._generate_event_id(),
200
+ 'type': 'input_text_buffer.append',
201
+ 'text': text
202
+ }))
203
+ if self.last_first_text_time is None:
204
+ self.last_first_text_time = time.time() * 1000
205
+
206
+ def commit(self, ) -> None:
207
+ '''
208
+ commit the text sent before, create response and start synthesis audio.
209
+ '''
210
+ self.__send_str(
211
+ json.dumps({
212
+ 'event_id': self._generate_event_id(),
213
+ 'type': 'input_text_buffer.commit'
214
+ }))
215
+
216
+ def clear_appended_text(self, ) -> None:
217
+ '''
218
+ clear the text sent to server before.
219
+ '''
220
+ self.__send_str(
221
+ json.dumps({
222
+ 'event_id': self._generate_event_id(),
223
+ 'type': 'input_text_buffer.clear'
224
+ }))
225
+
226
+ def cancel_response(self, ) -> None:
227
+ '''
228
+ cancel the current response
229
+ '''
230
+ self.__send_str(
231
+ json.dumps({
232
+ 'event_id': self._generate_event_id(),
233
+ 'type': 'response.cancel'
234
+ }))
235
+
236
+ def send_raw(self, raw_data: str) -> None:
237
+ '''
238
+ send raw data to server
239
+ '''
240
+ self.__send_str(raw_data)
241
+
242
+ def finish(self, ) -> None:
243
+ '''
244
+ finish input text stream, server will synthesis all text in buffer and close the connection
245
+ '''
246
+ self.__send_str(
247
+ json.dumps({
248
+ 'event_id': self._generate_event_id(),
249
+ 'type': 'session.finish'
250
+ }))
251
+
252
+ def close(self, ) -> None:
253
+ '''
254
+ close the connection to server
255
+ '''
256
+ self.ws.close()
257
+
258
+ # 监听消息的回调函数
259
+ def on_message(self, ws, message):
260
+ if isinstance(message, str):
261
+ logger.debug('[omni realtime] receive string {}'.format(
262
+ message[:1024]))
263
+ try:
264
+ # 尝试将消息解析为JSON
265
+ json_data = json.loads(message)
266
+ self.last_message = json_data
267
+ self.callback.on_event(json_data)
268
+ if 'type' in message:
269
+ if 'session.created' == json_data['type']:
270
+ self.session_id = json_data['session']['id']
271
+ if 'response.created' == json_data['type']:
272
+ self.last_response_id = json_data['response']['id']
273
+ elif 'response.audio.delta' == json_data['type']:
274
+ if self.last_first_text_time and self.last_first_audio_delay is None:
275
+ self.last_first_audio_delay = time.time(
276
+ ) * 1000 - self.last_first_text_time
277
+ elif 'response.done' == json_data['type']:
278
+ logger.debug(
279
+ '[Metric] response: {}, first audio delay: {}'
280
+ .format(self.last_response_id,
281
+ self.last_first_audio_delay))
282
+ except json.JSONDecodeError:
283
+ logger.error('Failed to parse message as JSON.')
284
+ raise Exception('Failed to parse message as JSON.')
285
+ elif isinstance(message, (bytes, bytearray)):
286
+ # 如果失败,认为是二进制消息
287
+ logger.error(
288
+ 'should not receive binary message in omni realtime api')
289
+ logger.debug('[omni realtime] receive binary {} bytes'.format(
290
+ len(message)))
291
+
292
+ def on_close(self, ws, close_status_code, close_msg):
293
+ logger.debug(
294
+ '[omni realtime] connection closed with code {} and message {}'.format(
295
+ close_status_code, close_msg))
296
+ self.callback.on_close(close_status_code, close_msg)
297
+
298
+ # WebSocket发生错误的回调函数
299
+ def on_error(self, ws, error):
300
+ print(f'websocket closed due to {error}')
301
+ raise Exception(f'websocket closed due to {error}')
302
+
303
+ # 获取上一个任务的taskId
304
+ def get_session_id(self):
305
+ return self.session_id
306
+
307
+ def get_last_message(self):
308
+ return self.last_message
309
+
310
+ def get_last_response_id(self):
311
+ return self.last_response_id
312
+
313
+ def get_first_audio_delay(self):
314
+ return self.last_first_audio_delay
@@ -15,6 +15,7 @@ class TextEmbedding(BaseApi):
15
15
  text_embedding_v1 = 'text-embedding-v1'
16
16
  text_embedding_v2 = 'text-embedding-v2'
17
17
  text_embedding_v3 = 'text-embedding-v3'
18
+ text_embedding_v4 = 'text-embedding-v4'
18
19
 
19
20
  @classmethod
20
21
  def call(cls,
@@ -83,12 +83,15 @@ class Upstream:
83
83
  # sample_rate: int # 合成音频采样率
84
84
 
85
85
  def to_dict(self):
86
- return {
86
+ upstream: dict = {
87
87
  "type": self.type,
88
88
  "mode": self.mode,
89
89
  "audio_format": self.audio_format,
90
90
  # "sample_rate": self.sample_rate
91
91
  }
92
+ if self.pass_through_params is not None:
93
+ upstream.update(self.pass_through_params)
94
+ return upstream
92
95
 
93
96
 
94
97
  @dataclass
@@ -105,6 +108,7 @@ class Downstream:
105
108
  volume: int = field(default=50) # 语音音量 0-100
106
109
  pitch_rate: int = field(default=100) # 语音语调 50-200
107
110
  speech_rate: int = field(default=100) # 语音语速 50-200
111
+ pass_through_params: dict = field(default=None)
108
112
 
109
113
  def to_dict(self):
110
114
  stream: dict = {
@@ -120,6 +124,8 @@ class Downstream:
120
124
  stream["voice"] = self.voice
121
125
  if self.sample_rate != 0:
122
126
  stream["sample_rate"] = self.sample_rate
127
+ if self.pass_through_params is not None:
128
+ stream.update(self.pass_through_params)
123
129
  return stream
124
130
 
125
131
 
@@ -199,6 +205,7 @@ class BizParams:
199
205
  user_prompt_params: dict = field(default=None)
200
206
  user_query_params: dict = field(default=None)
201
207
  videos: list = field(default=None)
208
+ pass_through_params: dict = field(default=None)
202
209
 
203
210
  def to_dict(self):
204
211
  params = {}
@@ -214,6 +221,8 @@ class BizParams:
214
221
  params["user_query_params"] = self.user_query_params
215
222
  if self.videos is not None:
216
223
  params["videos"] = self.videos
224
+ if self.pass_through_params is not None:
225
+ params.update(self.pass_through_params)
217
226
  return params
218
227
 
219
228
 
dashscope/version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- __version__ = '1.23.7'
3
+ __version__ = '1.23.9'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dashscope
3
- Version: 1.23.7
3
+ Version: 1.23.9
4
4
  Summary: dashscope client sdk library
5
5
  Home-page: https://dashscope.aliyun.com/
6
6
  Author: Alibaba Cloud
@@ -3,7 +3,7 @@ dashscope/cli.py,sha256=amegoTkGOs6TlHMdoo4JVOqBePo3lGs745rc7leEyrE,24020
3
3
  dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
4
4
  dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
5
5
  dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
6
- dashscope/version.py,sha256=HwgMDxa-K4tdaSJnt84_qDc7qTyV4eqR_QjdjT4ulxc,74
6
+ dashscope/version.py,sha256=ACHUqhomdzZ7aih_Y368NUmXBBfj2GY5GqSf2NoTta4,74
7
7
  dashscope/aigc/__init__.py,sha256=AuRhu_vA1K0tbs_C6DgcZYhTvxMuzDgpwHJNHzEPIHg,442
8
8
  dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
9
9
  dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
@@ -27,17 +27,21 @@ dashscope/app/application.py,sha256=Whf_ij4RHOaY12_xdS8uj8HVNCwkTp_MRdrFTryF1Kg,
27
27
  dashscope/app/application_response.py,sha256=z9BKcb9QuV_TzHkqtwaCwap2GQ1mP48uDD7gIc2il98,7038
28
28
  dashscope/assistants/__init__.py,sha256=hjCTuv13yFaXyUqlexAU-RaO0Ahq3P7VK9_LkSbkGVU,434
29
29
  dashscope/assistants/assistant_types.py,sha256=DQ_lOust10wjiV38Nlsu3HaGYNbGlJoaLjEza82Wotk,4262
30
- dashscope/assistants/assistants.py,sha256=_ADX4s8Mlfhfq7kb8xMLztNX_09NAW7DouvbVLY0jpw,10884
30
+ dashscope/assistants/assistants.py,sha256=t8BGu9K2YbNpBenUyFFDMx5WpZ5x5OtbssJkj0xfVo0,10930
31
31
  dashscope/assistants/files.py,sha256=Ol2h7L2vNV8kgWqum2B-3B9vtLEHB_2KWt0K7e96Bmg,6750
32
- dashscope/audio/__init__.py,sha256=RKw-A1PLA5F4jiXI3AnuRSYeHFzQl2OW2ZGW6xyHS1Q,132
32
+ dashscope/audio/__init__.py,sha256=7e3ejVsDJxEbMHN-9E0nEDfU-CnnQ4JgtgUxqNs0IG4,192
33
33
  dashscope/audio/asr/__init__.py,sha256=JoCenJAUVOQXPmAn1toKeFYCfc8BqNn0NKpqjuJvNJc,1055
34
34
  dashscope/audio/asr/asr_phrase_manager.py,sha256=vHOLExaKCtjedkihIu7gyfQyarR9rN5JZn79LvlCpco,7693
35
35
  dashscope/audio/asr/recognition.py,sha256=b_aAPvOKjpWdSiYhM_hp30sZ06QdmNBSDJwhiv78kHM,20932
36
36
  dashscope/audio/asr/transcription.py,sha256=lYzPjh7jJQwjMoxx8-AY0YCMBKNKO0bi7xd5tZGSHPc,9094
37
37
  dashscope/audio/asr/translation_recognizer.py,sha256=JgBmhkIl_kqH8uVwop6Fba5KlXccftKFrhaygN9PKjU,39680
38
38
  dashscope/audio/asr/vocabulary.py,sha256=N0pMS2x1lDxqJ14FgTGKctfuVkR2_hlEsCNWFcgYpTY,6717
39
+ dashscope/audio/qwen_omni/__init__.py,sha256=MEFxmyxr5H6bW22l_R9073Pl6Ka6knvhrATGT-4UBjI,298
40
+ dashscope/audio/qwen_omni/omni_realtime.py,sha256=eBmoOxuKcfzMHuXsQWCrIIKmso9iEzYylOeYZ5upv-w,14869
39
41
  dashscope/audio/qwen_tts/__init__.py,sha256=JS3axY1grqO0aTIJufZ3KS1JsU6yf6y4K2CQlNvUK9I,132
40
42
  dashscope/audio/qwen_tts/speech_synthesizer.py,sha256=7LHR-PXhn-VE1cCOp_82Jq0zE9rMc3xy3dszUeyLLNs,2927
43
+ dashscope/audio/qwen_tts_realtime/__init__.py,sha256=vVkmeJr_mEAn_O0Rh5AU3ICg6qIZqppUryJ5lY8VYPo,254
44
+ dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py,sha256=8bOAMcDasTHwSLb9xAGJoj9eUPpQHh2aWvonV6Kf1U4,10367
41
45
  dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO0I,245
42
46
  dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
43
47
  dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
@@ -62,14 +66,14 @@ dashscope/embeddings/__init__.py,sha256=XQ7vKr8oZM2CmdOduE53BWy6_Qpn9xUPkma64yw8
62
66
  dashscope/embeddings/batch_text_embedding.py,sha256=lVhvTS8McYfXuqt_8CmmhA6bPqD0nrGv965kjYG_j0E,8842
63
67
  dashscope/embeddings/batch_text_embedding_response.py,sha256=ZfkJMUq8GRsFA6XUTsiAsIySqGJH-VPi2P9Ba1KTU-s,2056
64
68
  dashscope/embeddings/multimodal_embedding.py,sha256=NwjQsdkKgUz51ozGjqFDzVlLcZjY0m1JNdH1EyAY0a4,4109
65
- dashscope/embeddings/text_embedding.py,sha256=4W1V-Ggj0WJhY5MdP2xoUTteXlWk8TUtI6y2gRUVCUk,2060
69
+ dashscope/embeddings/text_embedding.py,sha256=2MPEyMB99xueDbvFg9kKAe8bgHMDEaFLaFa6GzDWDHg,2108
66
70
  dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
71
  dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
68
72
  dashscope/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
73
  dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
70
74
  dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
71
75
  dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
72
- dashscope/multimodal/multimodal_request_params.py,sha256=7A4UhsbYjcX7aAJwWI1xZEt0e1bSgPcu5pJAinaZyx0,7907
76
+ dashscope/multimodal/multimodal_request_params.py,sha256=9Dlvyy0u67K5FtMfWkCRLHKsevTM8jvT2V-OljZP5sM,8350
73
77
  dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
78
  dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
75
79
  dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,9 +97,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
93
97
  dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
94
98
  dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
99
  dashscope/utils/oss_utils.py,sha256=L5LN3lN8etVxSL_jkZydstvEKpnTG9CY0zcvPGQ5LBo,7383
96
- dashscope-1.23.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
97
- dashscope-1.23.7.dist-info/METADATA,sha256=JxFpTi5zU9f1O1uLoTH1OoV6iV3K3aAqshhD1MTDtFY,7123
98
- dashscope-1.23.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- dashscope-1.23.7.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
100
- dashscope-1.23.7.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
101
- dashscope-1.23.7.dist-info/RECORD,,
100
+ dashscope-1.23.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
101
+ dashscope-1.23.9.dist-info/METADATA,sha256=tQQhkm5TxNwI2TCmWzT11dwWd9UGFb7cI1cojbo9Dpw,7123
102
+ dashscope-1.23.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
103
+ dashscope-1.23.9.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
104
+ dashscope-1.23.9.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
105
+ dashscope-1.23.9.dist-info/RECORD,,