dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,524 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import json
4
+ import platform
5
+ import threading
6
+ import time
7
+ from dataclasses import field, dataclass
8
+ from typing import List, Any, Dict
9
+ import uuid
10
+ from enum import Enum, unique
11
+
12
+ import dashscope
13
+ import websocket
14
+ from dashscope.common.error import InputRequired, ModelRequired
15
+ from dashscope.common.logging import logger
16
+
17
+
18
+ class OmniRealtimeCallback:
19
+ """
20
+ An interface that defines callback methods for getting omni-realtime results. # noqa E501
21
+ Derive from this class and implement its function to provide your own data.
22
+ """
23
+ def on_open(self) -> None:
24
+ pass
25
+
26
+ def on_close(self, close_status_code, close_msg) -> None:
27
+ pass
28
+
29
+ def on_event(self, message: str) -> None:
30
+ pass
31
+
32
+
33
+ @dataclass
34
+ class TranslationParams:
35
+ """
36
+ TranslationParams
37
+ """
38
+
39
+ @dataclass
40
+ class Corpus:
41
+ phrases: Dict[str, Any] = field(default=None)
42
+
43
+ language: str = field(default=None)
44
+ corpus: Corpus = field(default=None)
45
+
46
+
47
+ @dataclass
48
+ class TranscriptionParams:
49
+ """
50
+ TranscriptionParams
51
+ """
52
+ language: str = field(default=None)
53
+ sample_rate: int = field(default=16000)
54
+ input_audio_format: str = field(default="pcm")
55
+ corpus: Dict[str, Any] = field(default=None)
56
+ corpus_text: str = field(default=None)
57
+
58
+
59
+ @unique
60
+ class AudioFormat(Enum):
61
+ # format, sample_rate, channels, bit_rate, name
62
+ PCM_16000HZ_MONO_16BIT = ('pcm', 16000, 'mono', '16bit', 'pcm16')
63
+ PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', '16bit', 'pcm16')
64
+
65
+ def __init__(self, format, sample_rate, channels, bit_rate, format_str):
66
+ self.format = format
67
+ self.sample_rate = sample_rate
68
+ self.channels = channels
69
+ self.bit_rate = bit_rate
70
+ self.format_str = format_str
71
+
72
+ def __repr__(self):
73
+ return self.format_str
74
+
75
+ def __str__(self):
76
+ return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate} bit rate: {self.format_str}'
77
+
78
+
79
+ class MultiModality(Enum):
80
+ """
81
+ MultiModality
82
+ """
83
+ TEXT = 'text'
84
+ AUDIO = 'audio'
85
+
86
+ def __str__(self):
87
+ return self.name
88
+
89
+
90
+ class OmniRealtimeConversation:
91
+ def __init__(
92
+ self,
93
+ model,
94
+ callback: OmniRealtimeCallback,
95
+ headers=None,
96
+ workspace=None,
97
+ url=None,
98
+ api_key: str = None,
99
+ additional_params=None,
100
+ ):
101
+ """
102
+ Qwen Omni Realtime SDK
103
+ Parameters:
104
+ -----------
105
+ model: str
106
+ Model name.
107
+ headers: Dict
108
+ User-defined headers.
109
+ callback: OmniRealtimeCallback
110
+ Callback to receive real-time omni results.
111
+ workspace: str
112
+ Dashscope workspace ID.
113
+ url: str
114
+ Dashscope WebSocket URL.
115
+ additional_params: Dict
116
+ Additional parameters for the Dashscope API.
117
+ """
118
+
119
+ if model is None:
120
+ raise ModelRequired('Model is required!')
121
+ if callback is None:
122
+ raise ModelRequired('Callback is required!')
123
+ if url is None:
124
+ url = f'wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model={model}'
125
+ else:
126
+ url = f'{url}?model={model}'
127
+ self.url = url
128
+ self.apikey = api_key or dashscope.api_key
129
+ self.user_headers = headers
130
+ self.user_workspace = workspace
131
+ self.model = model
132
+ self.config = {}
133
+ self.callback = callback
134
+ self.ws = None
135
+ self.session_id = None
136
+ self.last_message = None
137
+ self.last_response_id = None
138
+ self.last_response_create_time = None
139
+ self.last_first_text_delay = None
140
+ self.last_first_audio_delay = None
141
+ self.metrics = []
142
+ # 添加用于同步等待连接关闭的事件
143
+ self.disconnect_event = None
144
+
145
+ def _generate_event_id(self):
146
+ '''
147
+ generate random event id: event_xxxx
148
+ '''
149
+ return 'event_' + uuid.uuid4().hex
150
+
151
+ def _get_websocket_header(self, ):
152
+ ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
153
+ '1.18.0', # dashscope version
154
+ platform.python_version(),
155
+ platform.platform(),
156
+ platform.processor(),
157
+ )
158
+ headers = {
159
+ 'user-agent': ua,
160
+ 'Authorization': 'bearer ' + self.apikey,
161
+ }
162
+ if self.user_headers:
163
+ headers = {**self.user_headers, **headers}
164
+ if self.user_workspace:
165
+ headers = {
166
+ **headers,
167
+ 'X-DashScope-WorkSpace': self.user_workspace,
168
+ }
169
+ return headers
170
+
171
+ def connect(self) -> None:
172
+ '''
173
+ connect to server, create session and return default session configuration
174
+ '''
175
+ self.ws = websocket.WebSocketApp(
176
+ self.url,
177
+ header=self._get_websocket_header(),
178
+ on_message=self.on_message,
179
+ on_error=self.on_error,
180
+ on_close=self.on_close,
181
+ )
182
+ self.thread = threading.Thread(target=self.ws.run_forever)
183
+ self.thread.daemon = True
184
+ self.thread.start()
185
+ timeout = 5 # 最长等待时间(秒)
186
+ start_time = time.time()
187
+ while (not (self.ws.sock and self.ws.sock.connected)
188
+ and (time.time() - start_time) < timeout):
189
+ time.sleep(0.1) # 短暂休眠,避免密集轮询
190
+ if not (self.ws.sock and self.ws.sock.connected):
191
+ raise TimeoutError(
192
+ 'websocket connection could not established within 5s. '
193
+ 'Please check your network connection, firewall settings, or server status.'
194
+ )
195
+ self.callback.on_open()
196
+
197
+ def __send_str(self, data: str, enable_log: bool = True):
198
+ if enable_log:
199
+ logger.debug('[omni realtime] send string: {}'.format(data))
200
+ self.ws.send(data)
201
+
202
+ def update_session(self,
203
+ output_modalities: List[MultiModality],
204
+ voice: str = None,
205
+ input_audio_format: AudioFormat = AudioFormat.
206
+ PCM_16000HZ_MONO_16BIT,
207
+ output_audio_format: AudioFormat = AudioFormat.
208
+ PCM_24000HZ_MONO_16BIT,
209
+ enable_input_audio_transcription: bool = True,
210
+ input_audio_transcription_model: str = None,
211
+ enable_turn_detection: bool = True,
212
+ turn_detection_type: str = 'server_vad',
213
+ prefix_padding_ms: int = 300,
214
+ turn_detection_threshold: float = 0.2,
215
+ turn_detection_silence_duration_ms: int = 800,
216
+ turn_detection_param: dict = None,
217
+ translation_params: TranslationParams = None,
218
+ transcription_params: TranscriptionParams = None,
219
+ **kwargs) -> None:
220
+ '''
221
+ update session configuration, should be used before create response
222
+
223
+ Parameters
224
+ ----------
225
+ output_modalities: list[MultiModality]
226
+ omni output modalities to be used in session
227
+ voice: str
228
+ voice to be used in session
229
+ input_audio_format: AudioFormat
230
+ input audio format
231
+ output_audio_format: AudioFormat
232
+ output audio format
233
+ enable_turn_detection: bool
234
+ enable turn detection
235
+ turn_detection_threshold: float
236
+ turn detection threshold, range [-1, 1]
237
+ In a noisy environment, it may be necessary to increase the threshold to reduce false detections
238
+ In a quiet environment, it may be necessary to decrease the threshold to improve sensitivity
239
+ turn_detection_silence_duration_ms: int
240
+ duration of silence in milliseconds to detect turn, range [200, 6000]
241
+ translation_params: TranslationParams
242
+ translation params, include language. Only effective with qwen3-livetranslate-flash-realtime model or
243
+ further models. Do not set this parameter for other models.
244
+ transcription_params: TranscriptionParams
245
+ transcription params, include language, sample_rate, input_audio_format, corpus.
246
+ Only effective with qwen3-asr-flash-realtime model or
247
+ further models. Do not set this parameter for other models.
248
+ '''
249
+ self.config = {
250
+ 'modalities': [m.value for m in output_modalities],
251
+ 'voice': voice,
252
+ 'input_audio_format': input_audio_format.format_str,
253
+ 'output_audio_format': output_audio_format.format_str,
254
+ }
255
+ if enable_input_audio_transcription:
256
+ self.config['input_audio_transcription'] = {
257
+ 'model': input_audio_transcription_model,
258
+ }
259
+ else:
260
+ self.config['input_audio_transcription'] = None
261
+ if enable_turn_detection:
262
+ self.config['turn_detection'] = {
263
+ 'type': turn_detection_type,
264
+ 'threshold': turn_detection_threshold,
265
+ 'prefix_padding_ms': prefix_padding_ms,
266
+ 'silence_duration_ms': turn_detection_silence_duration_ms,
267
+ }
268
+ if turn_detection_param is not None:
269
+ self.config['turn_detection'].update(turn_detection_param)
270
+ else:
271
+ self.config['turn_detection'] = None
272
+ if translation_params is not None:
273
+ self.config['translation'] = {
274
+ 'language': translation_params.language,
275
+ }
276
+ if translation_params.corpus is not None:
277
+ if translation_params.corpus and translation_params.corpus.phrases is not None:
278
+ self.config['translation']['corpus'] = {
279
+ 'phrases': translation_params.corpus.phrases
280
+ }
281
+ if transcription_params is not None:
282
+ self.config['input_audio_transcription'] = {}
283
+ if transcription_params.language is not None:
284
+ self.config['input_audio_transcription'].update({'language': transcription_params.language})
285
+ if transcription_params.corpus_text is not None:
286
+ transcription_params.corpus = {
287
+ "text": transcription_params.corpus_text
288
+ }
289
+ if transcription_params.corpus is not None:
290
+ self.config['input_audio_transcription'].update({'corpus': transcription_params.corpus})
291
+ self.config['input_audio_format'] = transcription_params.input_audio_format
292
+ self.config['sample_rate'] = transcription_params.sample_rate
293
+ self.config.update(kwargs)
294
+ self.__send_str(
295
+ json.dumps({
296
+ 'event_id': self._generate_event_id(),
297
+ 'type': 'session.update',
298
+ 'session': self.config
299
+ }))
300
+
301
+ def end_session(self, timeout: int = 20) -> None:
302
+ """
303
+ end session
304
+
305
+ Parameters:
306
+ -----------
307
+ timeout: int
308
+ Timeout in seconds to wait for the session to end. Default is 20 seconds.
309
+ """
310
+ if self.disconnect_event is not None:
311
+ # if the event is already set, do nothing
312
+ return
313
+
314
+ # create the event
315
+ self.disconnect_event = threading.Event()
316
+
317
+ self.__send_str(
318
+ json.dumps({
319
+ 'event_id': self._generate_event_id(),
320
+ 'type': 'session.finish'
321
+ }))
322
+
323
+ # wait for the event to be set
324
+ finish_success = self.disconnect_event.wait(timeout)
325
+ # clear the event
326
+ self.disconnect_event = None
327
+
328
+ # if the event is not set, close the connection
329
+ if not finish_success:
330
+ self.close()
331
+ raise TimeoutError("Session end timeout after {} seconds".format(timeout))
332
+
333
+ def end_session_async(self, ) -> None:
334
+ """
335
+ end session asynchronously. you need close the connection manually
336
+ """
337
+ # 发送结束会话消息
338
+ self.__send_str(
339
+ json.dumps({
340
+ 'event_id': self._generate_event_id(),
341
+ 'type': 'session.finish'
342
+ }))
343
+
344
+ def append_audio(self, audio_b64: str) -> None:
345
+ '''
346
+ send audio in base64 format
347
+
348
+ Parameters
349
+ ----------
350
+ audio_b64: str
351
+ base64 audio string
352
+ '''
353
+ logger.debug('[omni realtime] append audio: {}'.format(len(audio_b64)))
354
+ self.__send_str(
355
+ json.dumps({
356
+ 'event_id': self._generate_event_id(),
357
+ 'type': 'input_audio_buffer.append',
358
+ 'audio': audio_b64
359
+ }), False)
360
+
361
+ def append_video(self, video_b64: str) -> None:
362
+ '''
363
+ send one image frame in video in base64 format
364
+
365
+ Parameters
366
+ ----------
367
+ video_b64: str
368
+ base64 image string
369
+ '''
370
+ logger.debug('[omni realtime] append video: {}'.format(len(video_b64)))
371
+ self.__send_str(
372
+ json.dumps({
373
+ 'event_id': self._generate_event_id(),
374
+ 'type': 'input_image_buffer.append',
375
+ 'image': video_b64
376
+ }), False)
377
+
378
+ def commit(self, ) -> None:
379
+ '''
380
+ Commit the audio and video sent before.
381
+ When in Server VAD mode, the client does not need to use this method,
382
+ the server will commit the audio automatically after detecting vad end.
383
+ '''
384
+ self.__send_str(
385
+ json.dumps({
386
+ 'event_id': self._generate_event_id(),
387
+ 'type': 'input_audio_buffer.commit'
388
+ }))
389
+
390
+ def clear_appended_audio(self, ) -> None:
391
+ '''
392
+ clear the audio sent to server before.
393
+ '''
394
+ self.__send_str(
395
+ json.dumps({
396
+ 'event_id': self._generate_event_id(),
397
+ 'type': 'input_audio_buffer.clear'
398
+ }))
399
+
400
+ def create_response(self,
401
+ instructions: str = None,
402
+ output_modalities: List[MultiModality] = None) -> None:
403
+ '''
404
+ create response, use audio and video commited before to request llm.
405
+ When in Server VAD mode, the client does not need to use this method,
406
+ the server will create response automatically after detecting vad
407
+ and sending commit.
408
+
409
+ Parameters
410
+ ----------
411
+ instructions: str
412
+ instructions to llm
413
+ output_modalities: list[MultiModality]
414
+ omni output modalities to be used in session
415
+ '''
416
+ request = {
417
+ 'event_id': self._generate_event_id(),
418
+ 'type': 'response.create',
419
+ 'response': {}
420
+ }
421
+ request['response']['instructions'] = instructions
422
+ if output_modalities:
423
+ request['response']['modalities'] = [
424
+ m.value for m in output_modalities
425
+ ]
426
+ self.__send_str(json.dumps(request))
427
+
428
+ def cancel_response(self, ) -> None:
429
+ '''
430
+ cancel the current response
431
+ '''
432
+ self.__send_str(
433
+ json.dumps({
434
+ 'event_id': self._generate_event_id(),
435
+ 'type': 'response.cancel'
436
+ }))
437
+
438
+ def send_raw(self, raw_data: str) -> None:
439
+ '''
440
+ send raw data to server
441
+ '''
442
+ self.__send_str(raw_data)
443
+
444
+ def close(self, ) -> None:
445
+ '''
446
+ close the connection to server
447
+ '''
448
+ self.ws.close()
449
+
450
+ # 监听消息的回调函数
451
+ def on_message(self, ws, message):
452
+ if isinstance(message, str):
453
+ logger.debug('[omni realtime] receive string {}'.format(
454
+ message[:1024]))
455
+ try:
456
+ # 尝试将消息解析为JSON
457
+ json_data = json.loads(message)
458
+ self.last_message = json_data
459
+ self.callback.on_event(json_data)
460
+ if 'type' in message:
461
+ if 'session.created' == json_data['type']:
462
+ logger.info('[omni realtime] session created')
463
+ self.session_id = json_data['session']['id']
464
+ elif 'session.finished' == json_data['type']:
465
+ # wait for the event to be set
466
+ logger.info('[omni realtime] session finished')
467
+ if self.disconnect_event is not None:
468
+ self.disconnect_event.set()
469
+ if 'response.created' == json_data['type']:
470
+ self.last_response_id = json_data['response']['id']
471
+ self.last_response_create_time = time.time() * 1000
472
+ self.last_first_audio_delay = None
473
+ self.last_first_text_delay = None
474
+ elif 'response.audio_transcript.delta' == json_data[
475
+ 'type']:
476
+ if self.last_response_create_time and self.last_first_text_delay is None:
477
+ self.last_first_text_delay = time.time(
478
+ ) * 1000 - self.last_response_create_time
479
+ elif 'response.audio.delta' == json_data['type']:
480
+ if self.last_response_create_time and self.last_first_audio_delay is None:
481
+ self.last_first_audio_delay = time.time(
482
+ ) * 1000 - self.last_response_create_time
483
+ elif 'response.done' == json_data['type']:
484
+ logger.info(
485
+ '[Metric] response: {}, first text delay: {}, first audio delay: {}'
486
+ .format(self.last_response_id,
487
+ self.last_first_text_delay,
488
+ self.last_first_audio_delay))
489
+ except json.JSONDecodeError:
490
+ logger.error('Failed to parse message as JSON.')
491
+ raise Exception('Failed to parse message as JSON.')
492
+ elif isinstance(message, (bytes, bytearray)):
493
+ # 如果失败,认为是二进制消息
494
+ logger.error(
495
+ 'should not receive binary message in omni realtime api')
496
+ logger.debug('[omni realtime] receive binary {} bytes'.format(
497
+ len(message)))
498
+
499
+ def on_close(self, ws, close_status_code, close_msg):
500
+ self.callback.on_close(close_status_code, close_msg)
501
+
502
+ # WebSocket发生错误的回调函数
503
+ def on_error(self, ws, error):
504
+ print(f'websocket closed due to {error}')
505
+ raise Exception(f'websocket closed due to {error}')
506
+
507
+ # 获取上一个任务的taskId
508
+ def get_session_id(self) -> str:
509
+ return self.session_id
510
+
511
+ def get_last_message(self) -> str:
512
+ return self.last_message
513
+
514
+ def get_last_message(self) -> str:
515
+ return self.last_message
516
+
517
+ def get_last_response_id(self) -> str:
518
+ return self.last_response_id
519
+
520
+ def get_last_first_text_delay(self):
521
+ return self.last_first_text_delay
522
+
523
+ def get_last_first_audio_delay(self):
524
+ return self.last_first_audio_delay
@@ -0,0 +1,5 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .speech_synthesizer import SpeechSynthesizer
4
+
5
+ __all__ = [SpeechSynthesizer]
@@ -0,0 +1,77 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from typing import Generator, Union
4
+
5
+ from dashscope.api_entities.dashscope_response import \
6
+ TextToSpeechResponse
7
+ from dashscope.client.base_api import BaseApi
8
+ from dashscope.common.error import InputRequired, ModelRequired
9
+
10
+
11
+ class SpeechSynthesizer(BaseApi):
12
+ """Text-to-speech interface.
13
+ """
14
+
15
+ task_group = 'aigc'
16
+ task = 'multimodal-generation'
17
+ function = 'generation'
18
+
19
+ class Models:
20
+ qwen_tts = 'qwen-tts'
21
+
22
+ @classmethod
23
+ def call(
24
+ cls,
25
+ model: str,
26
+ text: str,
27
+ api_key: str = None,
28
+ workspace: str = None,
29
+ **kwargs
30
+ ) -> Union[TextToSpeechResponse, Generator[
31
+ TextToSpeechResponse, None, None]]:
32
+ """Call the conversation model service.
33
+
34
+ Args:
35
+ model (str): The requested model, such as 'qwen-tts'
36
+ text (str): Text content used for speech synthesis.
37
+ api_key (str, optional): The api api_key, can be None,
38
+ if None, will retrieve by rule [1].
39
+ [1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
40
+ workspace (str): The dashscope workspace id.
41
+ **kwargs:
42
+ stream(bool, `optional`): Enable server-sent events
43
+ (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
44
+ the result will back partially[qwen-turbo,bailian-v1].
45
+ voice: str
46
+ Voice name.
47
+
48
+ Raises:
49
+ InputRequired: The input must include the text parameter.
50
+ ModelRequired: The input must include the model parameter.
51
+
52
+ Returns:
53
+ Union[TextToSpeechResponse,
54
+ Generator[TextToSpeechResponse, None, None]]: If
55
+ stream is True, return Generator, otherwise TextToSpeechResponse.
56
+ """
57
+ if not text:
58
+ raise InputRequired('text is required!')
59
+ if model is None or not model:
60
+ raise ModelRequired('Model is required!')
61
+ input = {'text': text}
62
+ if 'voice' in kwargs:
63
+ input['voice'] = kwargs.pop('voice')
64
+ response = super().call(model=model,
65
+ task_group=SpeechSynthesizer.task_group,
66
+ task=SpeechSynthesizer.task,
67
+ function=SpeechSynthesizer.function,
68
+ api_key=api_key,
69
+ input=input,
70
+ workspace=workspace,
71
+ **kwargs)
72
+ is_stream = kwargs.get('stream', False)
73
+ if is_stream:
74
+ return (TextToSpeechResponse.from_api_response(rsp)
75
+ for rsp in response)
76
+ else:
77
+ return TextToSpeechResponse.from_api_response(response)
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .qwen_tts_realtime import (AudioFormat, QwenTtsRealtimeCallback,
4
+ QwenTtsRealtime)
5
+
6
+ __all__ = [
7
+ 'AudioFormat',
8
+ 'QwenTtsRealtimeCallback',
9
+ 'QwenTtsRealtime',
10
+ ]