dashscope 1.24.2__py3-none-any.whl → 1.24.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dashscope might be problematic. Click here for more details.

@@ -0,0 +1,579 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import json
4
+ import platform
5
+ import threading
6
+ import time
7
+ import uuid
8
+ from dataclasses import dataclass, field
9
+ from queue import Queue
10
+ import dashscope
11
+ from dashscope.client.base_api import BaseApi
12
+ from dashscope.common.error import (InvalidParameter, ModelRequired)
13
+ import websocket
14
+
15
+ from dashscope.common.logging import logger
16
+ from dashscope.protocol.websocket import ActionType
17
+
18
+
19
+ class TingWuRealtimeCallback:
20
+ """An interface that defines callback methods for getting TingWu results.
21
+ Derive from this class and implement its function to provide your own data.
22
+ """
23
+
24
+ def on_open(self) -> None:
25
+ pass
26
+
27
+ def on_started(self, task_id: str) -> None:
28
+ pass
29
+
30
+ def on_speech_listen(self, result: dict):
31
+ pass
32
+
33
+ def on_recognize_result(self, result: dict):
34
+ pass
35
+
36
+ def on_ai_result(self, result: dict):
37
+ pass
38
+
39
+ def on_stopped(self) -> None:
40
+ pass
41
+
42
+ def on_error(self, error_code: str, error_msg: str) -> None:
43
+ pass
44
+
45
+ def on_close(self, close_status_code, close_msg):
46
+ """
47
+ callback when websocket connection is closed
48
+
49
+ :param close_status_code
50
+ :param close_msg
51
+ """
52
+ pass
53
+
54
+
55
+ class TingWuRealtime(BaseApi):
56
+ """TingWuRealtime interface.
57
+
58
+ Args:
59
+ model (str): The requested model_id.
60
+ callback (TingWuRealtimeCallback): A callback that returns
61
+ speech recognition results.
62
+ app_id (str): The dashscope tingwu app id.
63
+ format (str): The input audio format for TingWu request.
64
+ sample_rate (int): The input audio sample rate.
65
+ terminology (str): The correct instruction set id.
66
+ workspace (str): The dashscope workspace id.
67
+
68
+ **kwargs:
69
+ max_end_silence (int): The maximum end silence time.
70
+ other_params (dict, `optional`): Other parameters.
71
+
72
+ Raises:
73
+ InputRequired: Input is required.
74
+ """
75
+
76
+ SILENCE_TIMEOUT_S = 60
77
+
78
+ def __init__(self,
79
+ model: str,
80
+ callback: TingWuRealtimeCallback,
81
+ audio_format: str = "pcm",
82
+ sample_rate: int = 16000,
83
+ max_end_silence: int = None,
84
+ app_id: str = None,
85
+ terminology: str = None,
86
+ workspace: str = None,
87
+ api_key: str = None,
88
+ base_address: str = None,
89
+ data_id: str = None,
90
+ **kwargs):
91
+ if api_key is None:
92
+ self.api_key = dashscope.api_key
93
+ else:
94
+ self.api_key = api_key
95
+ if base_address is None:
96
+ self.base_address = dashscope.base_websocket_api_url
97
+ else:
98
+ self.base_address = base_address
99
+
100
+ if model is None:
101
+ raise ModelRequired('Model is required!')
102
+
103
+ self.data_id = data_id
104
+ self.max_end_silence = max_end_silence
105
+ self.model = model
106
+ self.audio_format = audio_format
107
+ self.app_id = app_id
108
+ self.terminology = terminology
109
+ self.sample_rate = sample_rate
110
+ # continuous recognition with start() or once recognition with call()
111
+ self._recognition_once = False
112
+ self._callback = callback
113
+ self._running = False
114
+ self._stream_data = Queue()
115
+ self._worker = None
116
+ self._silence_timer = None
117
+ self._kwargs = kwargs
118
+ self._workspace = workspace
119
+ self._start_stream_timestamp = -1
120
+ self._first_package_timestamp = -1
121
+ self._stop_stream_timestamp = -1
122
+ self._on_complete_timestamp = -1
123
+ self.request_id_confirmed = False
124
+ self.last_request_id = uuid.uuid4().hex
125
+ self.request = _Request()
126
+ self.response = _TingWuResponse(self._callback, self.close) # 传递 self.close 作为回调
127
+
128
+ def _on_message(self, ws, message):
129
+ logger.debug(f"<<<<<<< Received message: {message}")
130
+ if isinstance(message, str):
131
+ self.response.handle_text_response(message)
132
+ elif isinstance(message, (bytes, bytearray)):
133
+ self.response.handle_binary_response(message)
134
+
135
+ def _on_error(self, ws, error):
136
+ logger.error(f"Error: {error}")
137
+ if self._callback:
138
+ error_code = "" # 默认错误码
139
+ if "connection" in str(error).lower():
140
+ error_code = "1001" # 连接错误
141
+ elif "timeout" in str(error).lower():
142
+ error_code = "1002" # 超时错误
143
+ elif "authentication" in str(error).lower():
144
+ error_code = "1003" # 认证错误
145
+ self._callback.on_error(error_code=error_code, error_msg=str(error))
146
+
147
+ def _on_close(self, ws, close_status_code, close_msg):
148
+ try:
149
+ logger.debug(
150
+ "WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
151
+ if close_status_code is None:
152
+ close_status_code = 1000
153
+ if close_msg is None:
154
+ close_msg = "websocket is closed"
155
+ self._callback.on_close(close_status_code, close_msg)
156
+ except Exception as e:
157
+ logger.error(f"Error: {e}")
158
+
159
+ def _on_open(self, ws):
160
+ self._callback.on_open()
161
+ self._running = True
162
+
163
+ # def _on_pong(self):
164
+ # logger.debug("on pong")
165
+
166
+ def start(self, **kwargs):
167
+ """
168
+ interface for starting TingWu connection
169
+ """
170
+ assert self._callback is not None, 'Please set the callback to get the TingWu result.' # noqa E501
171
+
172
+ if self._running:
173
+ raise InvalidParameter('TingWu client has started.')
174
+
175
+ # self._start_stream_timestamp = -1
176
+ # self._first_package_timestamp = -1
177
+ # self._stop_stream_timestamp = -1
178
+ # self._on_complete_timestamp = -1
179
+ if self._kwargs is not None and len(self._kwargs) != 0:
180
+ self._kwargs.update(**kwargs)
181
+
182
+ self._connect(self.api_key)
183
+ logger.debug("connected with server.")
184
+ self._send_start_request()
185
+
186
+ def send_audio_data(self, speech_data: bytes):
187
+ """send audio data to server"""
188
+ if self._running:
189
+ self.__send_binary_frame(speech_data)
190
+
191
+ def stop(self):
192
+ if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
193
+ self._callback.on_close(1001, "websocket is not connected")
194
+ return
195
+ _send_speech_json = self.request.generate_stop_request("stop")
196
+ self._send_text_frame(_send_speech_json)
197
+
198
+ """inner class"""
199
+
200
+ def _send_start_request(self):
201
+ """send start request"""
202
+ _start_json = self.request.generate_start_request(
203
+ workspace_id=self._workspace,
204
+ direction_name="start",
205
+ app_id=self.app_id,
206
+ model=self.model,
207
+ audio_format=self.audio_format,
208
+ sample_rate=self.sample_rate,
209
+ terminology=self.terminology,
210
+ max_end_silence=self.max_end_silence,
211
+ data_id=self.data_id,
212
+ **self._kwargs
213
+ )
214
+ # send start request
215
+ self._send_text_frame(_start_json)
216
+
217
+ def _run_forever(self):
218
+ self.ws.run_forever(ping_interval=5, ping_timeout=4)
219
+
220
+ def _connect(self, api_key: str):
221
+ """init websocket connection"""
222
+ self.ws = websocket.WebSocketApp(self.base_address, header=self.request.get_websocket_header(api_key),
223
+ on_open=self._on_open,
224
+ on_message=self._on_message,
225
+ on_error=self._on_error,
226
+ on_close=self._on_close)
227
+ self.thread = threading.Thread(target=self._run_forever)
228
+ # 统一心跳机制配置
229
+ self.ws.ping_interval = 5
230
+ self.ws.ping_timeout = 4
231
+ self.thread.daemon = True
232
+ self.thread.start()
233
+
234
+ self._wait_for_connection()
235
+
236
+ def close(self):
237
+ if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
238
+ return
239
+ self.ws.close()
240
+
241
+ def _wait_for_connection(self):
242
+ """wait for connection using event instead of busy waiting"""
243
+ timeout = 5
244
+ start_time = time.time()
245
+ while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
246
+ time.sleep(0.1) # 短暂休眠,避免密集轮询
247
+
248
+ def _send_text_frame(self, text: str):
249
+ # 避免在日志中记录敏感信息,如API密钥等
250
+ # 只记录非敏感信息
251
+ if '"Authorization"' not in text:
252
+ logger.info('>>>>>> send text frame : %s' % text)
253
+ else:
254
+ logger.info('>>>>>> send text frame with authorization header')
255
+ self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
256
+
257
+ def __send_binary_frame(self, binary: bytes):
258
+ # _log.info('send binary frame length: %d' % len(binary))
259
+ self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
260
+
261
+ def __enter__(self):
262
+ return self
263
+
264
+ def __exit__(self, exc_type, exc_val, exc_tb):
265
+ self.cleanup()
266
+ return False
267
+
268
+ def cleanup(self):
269
+ """cleanup resources"""
270
+ try:
271
+ if self.ws:
272
+ self.ws.close()
273
+ if self.thread and self.thread.is_alive():
274
+ # 设置标志位通知线程退出
275
+ self.thread.join(timeout=2)
276
+ # 清除引用
277
+ self.ws = None
278
+ self.thread = None
279
+ self._callback = None
280
+ self.response = None
281
+ except Exception as e:
282
+ logger.error(f"Error in cleanup: {e}")
283
+
284
+ def send_audio_frame(self, buffer: bytes):
285
+ """Push audio to server
286
+
287
+ Raises:
288
+ InvalidParameter: Cannot send data to an uninitiated recognition.
289
+ """
290
+ if self._running is False:
291
+ raise InvalidParameter('TingWu client has stopped.')
292
+
293
+ if self._start_stream_timestamp < 0:
294
+ self._start_stream_timestamp = time.time() * 1000
295
+ logger.debug('send_audio_frame: {}'.format(len(buffer)))
296
+ self.__send_binary_frame(buffer)
297
+
298
+
299
+ class _Request:
300
+ def __init__(self):
301
+ # websocket header
302
+ self.ws_headers = None
303
+ # request body for voice chat
304
+ self.header = None
305
+ self.payload = None
306
+ # params
307
+ self.task_id = None
308
+ self.app_id = None
309
+ self.workspace_id = None
310
+
311
+ def get_websocket_header(self, api_key):
312
+ ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
313
+ '1.18.0', # dashscope version
314
+ platform.python_version(),
315
+ platform.platform(),
316
+ platform.processor(),
317
+ )
318
+ self.ws_headers = {
319
+ "User-Agent": ua,
320
+ "Authorization": f"bearer {api_key}",
321
+ "Accept": "application/json"
322
+ }
323
+ logger.info('websocket header: {}'.format(self.ws_headers))
324
+ return self.ws_headers
325
+
326
+ def generate_start_request(self, direction_name: str,
327
+ app_id: str,
328
+ model: str = None,
329
+ workspace_id: str = None,
330
+ audio_format: str = None,
331
+ sample_rate: int = None,
332
+ terminology: str = None,
333
+ max_end_silence: int = None,
334
+ data_id: str = None,
335
+ **kwargs
336
+ ) -> str:
337
+ """
338
+ build start request.
339
+ :param app_id: web console app id
340
+ :param direction_name:
341
+ :param workspace_id: web console workspace id
342
+ :param model: model name
343
+ :param audio_format: audio format
344
+ :param sample_rate: sample rate
345
+ :param terminology:
346
+ :param max_end_silence:
347
+ :param data_id:
348
+ :return:
349
+ Args:
350
+ :
351
+ """
352
+ self._get_dash_request_header(ActionType.START)
353
+ parameters = self._get_start_parameters(audio_format=audio_format, sample_rate=sample_rate,
354
+ max_end_silence=max_end_silence,
355
+ terminology=terminology,
356
+ **kwargs)
357
+ self._get_dash_request_payload(direction_name=direction_name, app_id=app_id, workspace_id=workspace_id,
358
+ model=model,
359
+ data_id=data_id,
360
+ request_params=parameters)
361
+
362
+ cmd = {
363
+ "header": self.header,
364
+ "payload": self.payload
365
+ }
366
+ return json.dumps(cmd)
367
+
368
+ @staticmethod
369
+ def _get_start_parameters(audio_format: str = None,
370
+ sample_rate: int = None,
371
+ terminology: str = None,
372
+ max_end_silence: int = None,
373
+ **kwargs):
374
+ """
375
+ build start request parameters inner.
376
+ :param kwargs: parameters
377
+ :return
378
+ """
379
+ parameters = {}
380
+ if audio_format is not None:
381
+ parameters['format'] = audio_format
382
+ if sample_rate is not None:
383
+ parameters['sampleRate'] = sample_rate
384
+ if terminology is not None:
385
+ parameters['terminology'] = terminology
386
+ if max_end_silence is not None:
387
+ parameters['maxEndSilence'] = max_end_silence
388
+ if kwargs is not None and len(kwargs) != 0:
389
+ parameters.update(kwargs)
390
+ return parameters
391
+
392
+ def generate_stop_request(self, direction_name: str) -> str:
393
+ """
394
+ build stop request.
395
+ :param direction_name
396
+ :return
397
+ """
398
+ self._get_dash_request_header(ActionType.FINISHED)
399
+ self._get_dash_request_payload(direction_name, self.app_id)
400
+
401
+ cmd = {
402
+ "header": self.header,
403
+ "payload": self.payload
404
+ }
405
+ return json.dumps(cmd)
406
+
407
+ def _get_dash_request_header(self, action: str):
408
+ """
409
+ :param action: ActionType :run-task, continue-task, finish-task
410
+ """
411
+ if self.task_id is None:
412
+ self.task_id = get_random_uuid()
413
+ self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
414
+
415
+ def _get_dash_request_payload(self, direction_name: str,
416
+ app_id: str,
417
+ workspace_id: str = None,
418
+ custom_input=None,
419
+ model: str = None,
420
+ data_id: str = None,
421
+ request_params=None,
422
+ ):
423
+ """
424
+ build start request payload inner.
425
+ :param direction_name: inner direction name
426
+ :param app_id: web console app id
427
+ :param request_params: start direction body parameters
428
+ :param custom_input: user custom input
429
+ :param data_id: data id
430
+ :param model: model name
431
+ """
432
+ if custom_input is not None:
433
+ input = custom_input
434
+ else:
435
+ input = RequestBodyInput(
436
+ workspace_id=workspace_id,
437
+ app_id=app_id,
438
+ directive=direction_name,
439
+ data_id=data_id
440
+ )
441
+
442
+ self.payload = DashPayload(
443
+ model=model,
444
+ input=input.to_dict(),
445
+ parameters=request_params
446
+ ).to_dict()
447
+
448
+
449
+ class _TingWuResponse:
450
+ def __init__(self, callback: TingWuRealtimeCallback, close_callback=None):
451
+ super().__init__()
452
+ self.task_id = None # 对话ID.
453
+ self._callback = callback
454
+ self._close_callback = close_callback # 保存关闭回调函数
455
+
456
+ def handle_text_response(self, response_json: str):
457
+ """
458
+ handle text response.
459
+ :param response_json: json format response from server
460
+ """
461
+ logger.info("<<<<<< server response: %s" % response_json)
462
+ try:
463
+ # try to parse response as json
464
+ json_data = json.loads(response_json)
465
+ header = json_data.get('header', {})
466
+ if header.get('event') == 'task-failed':
467
+ logger.error('Server returned invalid message: %s' % response_json)
468
+ if self._callback:
469
+ self._callback.on_error(error_code=header.get('error_code'),
470
+ error_msg=header.get('error_message'))
471
+ return
472
+ if header.get('event') == "task-started":
473
+ self._handle_started(header.get('task_id'))
474
+ return
475
+
476
+ payload = json_data.get('payload', {})
477
+ output = payload.get('output', {})
478
+ if output is not None:
479
+ action = output.get('action')
480
+ logger.info("Server response action: %s" % action)
481
+ self._handle_tingwu_agent_text_response(action=action, response_json=json_data)
482
+
483
+ except json.JSONDecodeError:
484
+ logger.error("Failed to parse message as JSON.")
485
+
486
+ def handle_binary_response(self, response_binary: bytes):
487
+ """
488
+ handle binary response.
489
+ :param response_binary: server response binary。
490
+ """
491
+ logger.info("<<<<<< server response binary length: %d" % len(response_binary))
492
+
493
+ def _handle_tingwu_agent_text_response(self, action: str, response_json: dict):
494
+ payload = response_json.get('payload', {})
495
+ output = payload.get('output', {})
496
+ if action == "task-failed":
497
+ self._callback.on_error(error_code=output.get('errorCode'),
498
+ error_msg=output.get('errorMessage'))
499
+ elif action == "speech-listen":
500
+ self._callback.on_speech_listen(response_json)
501
+ elif action == "recognize-result":
502
+ self._callback.on_recognize_result(response_json)
503
+ elif action == "ai-result":
504
+ self._callback.on_ai_result(response_json)
505
+ elif action == "speech-end": # ai-result事件永远会先于speech-end事件
506
+ self._callback.on_stopped()
507
+ if self._close_callback is not None:
508
+ self._close_callback()
509
+ else:
510
+ logger.info("Unknown response name:" + action)
511
+
512
+ def _handle_started(self, task_id: str):
513
+ self.task_id = task_id
514
+ self._callback.on_started(self.task_id)
515
+
516
+
517
+ def get_random_uuid() -> str:
518
+ """generate random uuid."""
519
+ return uuid.uuid4().hex
520
+
521
+
522
+ @dataclass
523
+ class RequestBodyInput():
524
+ app_id: str
525
+ directive: str
526
+ data_id: str = field(default=None)
527
+ workspace_id: str = field(default=None)
528
+
529
+ def to_dict(self):
530
+ body_input = {
531
+ "appId": self.app_id,
532
+ "directive": self.directive,
533
+ }
534
+ if self.workspace_id is not None:
535
+ body_input["workspace_id"] = self.workspace_id
536
+ if self.data_id is not None:
537
+ body_input["dataId"] = self.data_id
538
+ return body_input
539
+
540
+
541
+ @dataclass
542
+ class DashHeader:
543
+ action: str
544
+ task_id: str = field(default=get_random_uuid())
545
+ streaming: str = field(default="duplex") # 默认为 duplex
546
+
547
+ def to_dict(self):
548
+ return {
549
+ "action": self.action,
550
+ "task_id": self.task_id,
551
+ "request_id": self.task_id,
552
+ "streaming": self.streaming
553
+ }
554
+
555
+
556
+ @dataclass
557
+ class DashPayload:
558
+ task_group: str = field(default="aigc")
559
+ function: str = field(default="generation")
560
+ model: str = field(default="")
561
+ task: str = field(default="multimodal-generation")
562
+ parameters: dict = field(default=None)
563
+ input: dict = field(default=None)
564
+
565
+ def to_dict(self):
566
+ payload = {
567
+ "task_group": self.task_group,
568
+ "function": self.function,
569
+ "model": self.model,
570
+ "task": self.task,
571
+ }
572
+
573
+ if self.parameters is not None:
574
+ payload["parameters"] = self.parameters
575
+
576
+ if self.input is not None:
577
+ payload["input"] = self.input
578
+
579
+ return payload
@@ -188,6 +188,6 @@ def check_and_upload(model, elem: dict, api_key):
188
188
  return has_upload
189
189
 
190
190
 
191
- def preprocess_message_element(model: str, elem: List[dict], api_key: str):
191
+ def preprocess_message_element(model: str, elem: dict, api_key: str):
192
192
  is_upload = check_and_upload(model, elem, api_key)
193
193
  return is_upload
dashscope/version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- __version__ = '1.24.2'
3
+ __version__ = '1.24.4'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dashscope
3
- Version: 1.24.2
3
+ Version: 1.24.4
4
4
  Summary: dashscope client sdk library
5
5
  Home-page: https://dashscope.aliyun.com/
6
6
  Author: Alibaba Cloud
@@ -23,6 +23,7 @@ Requires-Dist: aiohttp
23
23
  Requires-Dist: requests
24
24
  Requires-Dist: websocket-client
25
25
  Requires-Dist: cryptography
26
+ Requires-Dist: certifi
26
27
  Provides-Extra: tokenizer
27
28
  Requires-Dist: tiktoken; extra == "tokenizer"
28
29
  Dynamic: author