dashscope 1.24.3__py3-none-any.whl → 1.24.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/audio/tts_v2/speech_synthesizer.py +30 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/version.py +1 -1
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/METADATA +1 -1
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/RECORD +11 -10
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/WHEEL +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/entry_points.txt +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/licenses/LICENSE +0 -0
- {dashscope-1.24.3.dist-info → dashscope-1.24.4.dist-info}/top_level.txt +0 -0
|
@@ -98,6 +98,10 @@ class Request:
|
|
|
98
98
|
volume=50,
|
|
99
99
|
speech_rate=1.0,
|
|
100
100
|
pitch_rate=1.0,
|
|
101
|
+
seed=0,
|
|
102
|
+
synthesis_type=0,
|
|
103
|
+
instruction=None,
|
|
104
|
+
language_hints: list = None,
|
|
101
105
|
):
|
|
102
106
|
self.task_id = self.genUid()
|
|
103
107
|
self.apikey = apikey
|
|
@@ -109,6 +113,10 @@ class Request:
|
|
|
109
113
|
self.volume = volume
|
|
110
114
|
self.speech_rate = speech_rate
|
|
111
115
|
self.pitch_rate = pitch_rate
|
|
116
|
+
self.seed = seed
|
|
117
|
+
self.synthesis_type = synthesis_type
|
|
118
|
+
self.instruction = instruction
|
|
119
|
+
self.language_hints = language_hints
|
|
112
120
|
|
|
113
121
|
def genUid(self):
|
|
114
122
|
# 生成随机UUID
|
|
@@ -156,6 +164,8 @@ class Request:
|
|
|
156
164
|
'rate': self.speech_rate,
|
|
157
165
|
'format': self.format,
|
|
158
166
|
'pitch': self.pitch_rate,
|
|
167
|
+
'seed': self.seed,
|
|
168
|
+
'type': self.synthesis_type
|
|
159
169
|
},
|
|
160
170
|
},
|
|
161
171
|
}
|
|
@@ -163,6 +173,10 @@ class Request:
|
|
|
163
173
|
cmd['payload']['parameters']['bit_rate'] = self.bit_rate
|
|
164
174
|
if additional_params:
|
|
165
175
|
cmd['payload']['parameters'].update(additional_params)
|
|
176
|
+
if self.instruction is not None:
|
|
177
|
+
cmd['payload']['parameters']['instruction'] = self.instruction
|
|
178
|
+
if self.language_hints is not None:
|
|
179
|
+
cmd['payload']['parameters']['language_hints'] = self.language_hints
|
|
166
180
|
return json.dumps(cmd)
|
|
167
181
|
|
|
168
182
|
def getContinueRequest(self, text):
|
|
@@ -207,6 +221,10 @@ class SpeechSynthesizer:
|
|
|
207
221
|
volume=50,
|
|
208
222
|
speech_rate=1.0,
|
|
209
223
|
pitch_rate=1.0,
|
|
224
|
+
seed=0,
|
|
225
|
+
synthesis_type=0,
|
|
226
|
+
instruction=None,
|
|
227
|
+
language_hints: list = None,
|
|
210
228
|
headers=None,
|
|
211
229
|
callback: ResultCallback = None,
|
|
212
230
|
workspace=None,
|
|
@@ -237,6 +255,14 @@ class SpeechSynthesizer:
|
|
|
237
255
|
Dashscope workspace ID.
|
|
238
256
|
url: str
|
|
239
257
|
Dashscope WebSocket URL.
|
|
258
|
+
seed: int
|
|
259
|
+
The seed of the synthesizer, with a range from 0 to 65535. Default is 0.
|
|
260
|
+
synthesis_type: int
|
|
261
|
+
The type of the synthesizer, Default is 0.
|
|
262
|
+
instruction: str
|
|
263
|
+
The instruction of the synthesizer, max length is 128.
|
|
264
|
+
language_hints: list
|
|
265
|
+
The language hints of the synthesizer. supported language: zh, en.
|
|
240
266
|
additional_params: Dict
|
|
241
267
|
Additional parameters for the Dashscope API.
|
|
242
268
|
"""
|
|
@@ -271,6 +297,10 @@ class SpeechSynthesizer:
|
|
|
271
297
|
volume=volume,
|
|
272
298
|
speech_rate=speech_rate,
|
|
273
299
|
pitch_rate=pitch_rate,
|
|
300
|
+
seed=seed,
|
|
301
|
+
synthesis_type=synthesis_type,
|
|
302
|
+
instruction=instruction,
|
|
303
|
+
language_hints=language_hints
|
|
274
304
|
)
|
|
275
305
|
self.last_request_id = self.request.task_id
|
|
276
306
|
self.start_event = threading.Event()
|
dashscope/multimodal/__init__.py
CHANGED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from .tingwu import tingwu
|
|
4
|
+
from .tingwu.tingwu import TingWu
|
|
5
|
+
from .tingwu.tingwu_realtime import TingWuRealtime, TingWuRealtimeCallback
|
|
6
|
+
|
|
7
|
+
from .multimodal_dialog import MultiModalDialog, MultiModalCallback
|
|
8
|
+
from .dialog_state import DialogState
|
|
9
|
+
from .multimodal_constants import *
|
|
10
|
+
from .multimodal_request_params import *
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'tingwu',
|
|
14
|
+
'TingWu',
|
|
15
|
+
'TingWuRealtime',
|
|
16
|
+
'TingWuRealtimeCallback',
|
|
17
|
+
'MultiModalDialog',
|
|
18
|
+
'MultiModalCallback',
|
|
19
|
+
'DialogState'
|
|
20
|
+
]
|
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import platform
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from queue import Queue
|
|
10
|
+
import dashscope
|
|
11
|
+
from dashscope.client.base_api import BaseApi
|
|
12
|
+
from dashscope.common.error import (InvalidParameter, ModelRequired)
|
|
13
|
+
import websocket
|
|
14
|
+
|
|
15
|
+
from dashscope.common.logging import logger
|
|
16
|
+
from dashscope.protocol.websocket import ActionType
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TingWuRealtimeCallback:
|
|
20
|
+
"""An interface that defines callback methods for getting TingWu results.
|
|
21
|
+
Derive from this class and implement its function to provide your own data.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def on_open(self) -> None:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
def on_started(self, task_id: str) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def on_speech_listen(self, result: dict):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def on_recognize_result(self, result: dict):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def on_ai_result(self, result: dict):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def on_stopped(self) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
def on_error(self, error_code: str, error_msg: str) -> None:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def on_close(self, close_status_code, close_msg):
|
|
46
|
+
"""
|
|
47
|
+
callback when websocket connection is closed
|
|
48
|
+
|
|
49
|
+
:param close_status_code
|
|
50
|
+
:param close_msg
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TingWuRealtime(BaseApi):
|
|
56
|
+
"""TingWuRealtime interface.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model (str): The requested model_id.
|
|
60
|
+
callback (TingWuRealtimeCallback): A callback that returns
|
|
61
|
+
speech recognition results.
|
|
62
|
+
app_id (str): The dashscope tingwu app id.
|
|
63
|
+
format (str): The input audio format for TingWu request.
|
|
64
|
+
sample_rate (int): The input audio sample rate.
|
|
65
|
+
terminology (str): The correct instruction set id.
|
|
66
|
+
workspace (str): The dashscope workspace id.
|
|
67
|
+
|
|
68
|
+
**kwargs:
|
|
69
|
+
max_end_silence (int): The maximum end silence time.
|
|
70
|
+
other_params (dict, `optional`): Other parameters.
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
InputRequired: Input is required.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
SILENCE_TIMEOUT_S = 60
|
|
77
|
+
|
|
78
|
+
def __init__(self,
|
|
79
|
+
model: str,
|
|
80
|
+
callback: TingWuRealtimeCallback,
|
|
81
|
+
audio_format: str = "pcm",
|
|
82
|
+
sample_rate: int = 16000,
|
|
83
|
+
max_end_silence: int = None,
|
|
84
|
+
app_id: str = None,
|
|
85
|
+
terminology: str = None,
|
|
86
|
+
workspace: str = None,
|
|
87
|
+
api_key: str = None,
|
|
88
|
+
base_address: str = None,
|
|
89
|
+
data_id: str = None,
|
|
90
|
+
**kwargs):
|
|
91
|
+
if api_key is None:
|
|
92
|
+
self.api_key = dashscope.api_key
|
|
93
|
+
else:
|
|
94
|
+
self.api_key = api_key
|
|
95
|
+
if base_address is None:
|
|
96
|
+
self.base_address = dashscope.base_websocket_api_url
|
|
97
|
+
else:
|
|
98
|
+
self.base_address = base_address
|
|
99
|
+
|
|
100
|
+
if model is None:
|
|
101
|
+
raise ModelRequired('Model is required!')
|
|
102
|
+
|
|
103
|
+
self.data_id = data_id
|
|
104
|
+
self.max_end_silence = max_end_silence
|
|
105
|
+
self.model = model
|
|
106
|
+
self.audio_format = audio_format
|
|
107
|
+
self.app_id = app_id
|
|
108
|
+
self.terminology = terminology
|
|
109
|
+
self.sample_rate = sample_rate
|
|
110
|
+
# continuous recognition with start() or once recognition with call()
|
|
111
|
+
self._recognition_once = False
|
|
112
|
+
self._callback = callback
|
|
113
|
+
self._running = False
|
|
114
|
+
self._stream_data = Queue()
|
|
115
|
+
self._worker = None
|
|
116
|
+
self._silence_timer = None
|
|
117
|
+
self._kwargs = kwargs
|
|
118
|
+
self._workspace = workspace
|
|
119
|
+
self._start_stream_timestamp = -1
|
|
120
|
+
self._first_package_timestamp = -1
|
|
121
|
+
self._stop_stream_timestamp = -1
|
|
122
|
+
self._on_complete_timestamp = -1
|
|
123
|
+
self.request_id_confirmed = False
|
|
124
|
+
self.last_request_id = uuid.uuid4().hex
|
|
125
|
+
self.request = _Request()
|
|
126
|
+
self.response = _TingWuResponse(self._callback, self.close) # 传递 self.close 作为回调
|
|
127
|
+
|
|
128
|
+
def _on_message(self, ws, message):
|
|
129
|
+
logger.debug(f"<<<<<<< Received message: {message}")
|
|
130
|
+
if isinstance(message, str):
|
|
131
|
+
self.response.handle_text_response(message)
|
|
132
|
+
elif isinstance(message, (bytes, bytearray)):
|
|
133
|
+
self.response.handle_binary_response(message)
|
|
134
|
+
|
|
135
|
+
def _on_error(self, ws, error):
|
|
136
|
+
logger.error(f"Error: {error}")
|
|
137
|
+
if self._callback:
|
|
138
|
+
error_code = "" # 默认错误码
|
|
139
|
+
if "connection" in str(error).lower():
|
|
140
|
+
error_code = "1001" # 连接错误
|
|
141
|
+
elif "timeout" in str(error).lower():
|
|
142
|
+
error_code = "1002" # 超时错误
|
|
143
|
+
elif "authentication" in str(error).lower():
|
|
144
|
+
error_code = "1003" # 认证错误
|
|
145
|
+
self._callback.on_error(error_code=error_code, error_msg=str(error))
|
|
146
|
+
|
|
147
|
+
def _on_close(self, ws, close_status_code, close_msg):
|
|
148
|
+
try:
|
|
149
|
+
logger.debug(
|
|
150
|
+
"WebSocket connection closed with status {} and message {}".format(close_status_code, close_msg))
|
|
151
|
+
if close_status_code is None:
|
|
152
|
+
close_status_code = 1000
|
|
153
|
+
if close_msg is None:
|
|
154
|
+
close_msg = "websocket is closed"
|
|
155
|
+
self._callback.on_close(close_status_code, close_msg)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.error(f"Error: {e}")
|
|
158
|
+
|
|
159
|
+
def _on_open(self, ws):
|
|
160
|
+
self._callback.on_open()
|
|
161
|
+
self._running = True
|
|
162
|
+
|
|
163
|
+
# def _on_pong(self):
|
|
164
|
+
# logger.debug("on pong")
|
|
165
|
+
|
|
166
|
+
def start(self, **kwargs):
|
|
167
|
+
"""
|
|
168
|
+
interface for starting TingWu connection
|
|
169
|
+
"""
|
|
170
|
+
assert self._callback is not None, 'Please set the callback to get the TingWu result.' # noqa E501
|
|
171
|
+
|
|
172
|
+
if self._running:
|
|
173
|
+
raise InvalidParameter('TingWu client has started.')
|
|
174
|
+
|
|
175
|
+
# self._start_stream_timestamp = -1
|
|
176
|
+
# self._first_package_timestamp = -1
|
|
177
|
+
# self._stop_stream_timestamp = -1
|
|
178
|
+
# self._on_complete_timestamp = -1
|
|
179
|
+
if self._kwargs is not None and len(self._kwargs) != 0:
|
|
180
|
+
self._kwargs.update(**kwargs)
|
|
181
|
+
|
|
182
|
+
self._connect(self.api_key)
|
|
183
|
+
logger.debug("connected with server.")
|
|
184
|
+
self._send_start_request()
|
|
185
|
+
|
|
186
|
+
def send_audio_data(self, speech_data: bytes):
|
|
187
|
+
"""send audio data to server"""
|
|
188
|
+
if self._running:
|
|
189
|
+
self.__send_binary_frame(speech_data)
|
|
190
|
+
|
|
191
|
+
def stop(self):
|
|
192
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
193
|
+
self._callback.on_close(1001, "websocket is not connected")
|
|
194
|
+
return
|
|
195
|
+
_send_speech_json = self.request.generate_stop_request("stop")
|
|
196
|
+
self._send_text_frame(_send_speech_json)
|
|
197
|
+
|
|
198
|
+
"""inner class"""
|
|
199
|
+
|
|
200
|
+
def _send_start_request(self):
|
|
201
|
+
"""send start request"""
|
|
202
|
+
_start_json = self.request.generate_start_request(
|
|
203
|
+
workspace_id=self._workspace,
|
|
204
|
+
direction_name="start",
|
|
205
|
+
app_id=self.app_id,
|
|
206
|
+
model=self.model,
|
|
207
|
+
audio_format=self.audio_format,
|
|
208
|
+
sample_rate=self.sample_rate,
|
|
209
|
+
terminology=self.terminology,
|
|
210
|
+
max_end_silence=self.max_end_silence,
|
|
211
|
+
data_id=self.data_id,
|
|
212
|
+
**self._kwargs
|
|
213
|
+
)
|
|
214
|
+
# send start request
|
|
215
|
+
self._send_text_frame(_start_json)
|
|
216
|
+
|
|
217
|
+
def _run_forever(self):
|
|
218
|
+
self.ws.run_forever(ping_interval=5, ping_timeout=4)
|
|
219
|
+
|
|
220
|
+
def _connect(self, api_key: str):
|
|
221
|
+
"""init websocket connection"""
|
|
222
|
+
self.ws = websocket.WebSocketApp(self.base_address, header=self.request.get_websocket_header(api_key),
|
|
223
|
+
on_open=self._on_open,
|
|
224
|
+
on_message=self._on_message,
|
|
225
|
+
on_error=self._on_error,
|
|
226
|
+
on_close=self._on_close)
|
|
227
|
+
self.thread = threading.Thread(target=self._run_forever)
|
|
228
|
+
# 统一心跳机制配置
|
|
229
|
+
self.ws.ping_interval = 5
|
|
230
|
+
self.ws.ping_timeout = 4
|
|
231
|
+
self.thread.daemon = True
|
|
232
|
+
self.thread.start()
|
|
233
|
+
|
|
234
|
+
self._wait_for_connection()
|
|
235
|
+
|
|
236
|
+
def close(self):
|
|
237
|
+
if self.ws is None or not self.ws.sock or not self.ws.sock.connected:
|
|
238
|
+
return
|
|
239
|
+
self.ws.close()
|
|
240
|
+
|
|
241
|
+
def _wait_for_connection(self):
|
|
242
|
+
"""wait for connection using event instead of busy waiting"""
|
|
243
|
+
timeout = 5
|
|
244
|
+
start_time = time.time()
|
|
245
|
+
while not (self.ws.sock and self.ws.sock.connected) and (time.time() - start_time) < timeout:
|
|
246
|
+
time.sleep(0.1) # 短暂休眠,避免密集轮询
|
|
247
|
+
|
|
248
|
+
def _send_text_frame(self, text: str):
|
|
249
|
+
# 避免在日志中记录敏感信息,如API密钥等
|
|
250
|
+
# 只记录非敏感信息
|
|
251
|
+
if '"Authorization"' not in text:
|
|
252
|
+
logger.info('>>>>>> send text frame : %s' % text)
|
|
253
|
+
else:
|
|
254
|
+
logger.info('>>>>>> send text frame with authorization header')
|
|
255
|
+
self.ws.send(text, websocket.ABNF.OPCODE_TEXT)
|
|
256
|
+
|
|
257
|
+
def __send_binary_frame(self, binary: bytes):
|
|
258
|
+
# _log.info('send binary frame length: %d' % len(binary))
|
|
259
|
+
self.ws.send(binary, websocket.ABNF.OPCODE_BINARY)
|
|
260
|
+
|
|
261
|
+
def __enter__(self):
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
265
|
+
self.cleanup()
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
def cleanup(self):
|
|
269
|
+
"""cleanup resources"""
|
|
270
|
+
try:
|
|
271
|
+
if self.ws:
|
|
272
|
+
self.ws.close()
|
|
273
|
+
if self.thread and self.thread.is_alive():
|
|
274
|
+
# 设置标志位通知线程退出
|
|
275
|
+
self.thread.join(timeout=2)
|
|
276
|
+
# 清除引用
|
|
277
|
+
self.ws = None
|
|
278
|
+
self.thread = None
|
|
279
|
+
self._callback = None
|
|
280
|
+
self.response = None
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.error(f"Error in cleanup: {e}")
|
|
283
|
+
|
|
284
|
+
def send_audio_frame(self, buffer: bytes):
|
|
285
|
+
"""Push audio to server
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
InvalidParameter: Cannot send data to an uninitiated recognition.
|
|
289
|
+
"""
|
|
290
|
+
if self._running is False:
|
|
291
|
+
raise InvalidParameter('TingWu client has stopped.')
|
|
292
|
+
|
|
293
|
+
if self._start_stream_timestamp < 0:
|
|
294
|
+
self._start_stream_timestamp = time.time() * 1000
|
|
295
|
+
logger.debug('send_audio_frame: {}'.format(len(buffer)))
|
|
296
|
+
self.__send_binary_frame(buffer)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class _Request:
|
|
300
|
+
def __init__(self):
|
|
301
|
+
# websocket header
|
|
302
|
+
self.ws_headers = None
|
|
303
|
+
# request body for voice chat
|
|
304
|
+
self.header = None
|
|
305
|
+
self.payload = None
|
|
306
|
+
# params
|
|
307
|
+
self.task_id = None
|
|
308
|
+
self.app_id = None
|
|
309
|
+
self.workspace_id = None
|
|
310
|
+
|
|
311
|
+
def get_websocket_header(self, api_key):
|
|
312
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
313
|
+
'1.18.0', # dashscope version
|
|
314
|
+
platform.python_version(),
|
|
315
|
+
platform.platform(),
|
|
316
|
+
platform.processor(),
|
|
317
|
+
)
|
|
318
|
+
self.ws_headers = {
|
|
319
|
+
"User-Agent": ua,
|
|
320
|
+
"Authorization": f"bearer {api_key}",
|
|
321
|
+
"Accept": "application/json"
|
|
322
|
+
}
|
|
323
|
+
logger.info('websocket header: {}'.format(self.ws_headers))
|
|
324
|
+
return self.ws_headers
|
|
325
|
+
|
|
326
|
+
def generate_start_request(self, direction_name: str,
|
|
327
|
+
app_id: str,
|
|
328
|
+
model: str = None,
|
|
329
|
+
workspace_id: str = None,
|
|
330
|
+
audio_format: str = None,
|
|
331
|
+
sample_rate: int = None,
|
|
332
|
+
terminology: str = None,
|
|
333
|
+
max_end_silence: int = None,
|
|
334
|
+
data_id: str = None,
|
|
335
|
+
**kwargs
|
|
336
|
+
) -> str:
|
|
337
|
+
"""
|
|
338
|
+
build start request.
|
|
339
|
+
:param app_id: web console app id
|
|
340
|
+
:param direction_name:
|
|
341
|
+
:param workspace_id: web console workspace id
|
|
342
|
+
:param model: model name
|
|
343
|
+
:param audio_format: audio format
|
|
344
|
+
:param sample_rate: sample rate
|
|
345
|
+
:param terminology:
|
|
346
|
+
:param max_end_silence:
|
|
347
|
+
:param data_id:
|
|
348
|
+
:return:
|
|
349
|
+
Args:
|
|
350
|
+
:
|
|
351
|
+
"""
|
|
352
|
+
self._get_dash_request_header(ActionType.START)
|
|
353
|
+
parameters = self._get_start_parameters(audio_format=audio_format, sample_rate=sample_rate,
|
|
354
|
+
max_end_silence=max_end_silence,
|
|
355
|
+
terminology=terminology,
|
|
356
|
+
**kwargs)
|
|
357
|
+
self._get_dash_request_payload(direction_name=direction_name, app_id=app_id, workspace_id=workspace_id,
|
|
358
|
+
model=model,
|
|
359
|
+
data_id=data_id,
|
|
360
|
+
request_params=parameters)
|
|
361
|
+
|
|
362
|
+
cmd = {
|
|
363
|
+
"header": self.header,
|
|
364
|
+
"payload": self.payload
|
|
365
|
+
}
|
|
366
|
+
return json.dumps(cmd)
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def _get_start_parameters(audio_format: str = None,
|
|
370
|
+
sample_rate: int = None,
|
|
371
|
+
terminology: str = None,
|
|
372
|
+
max_end_silence: int = None,
|
|
373
|
+
**kwargs):
|
|
374
|
+
"""
|
|
375
|
+
build start request parameters inner.
|
|
376
|
+
:param kwargs: parameters
|
|
377
|
+
:return
|
|
378
|
+
"""
|
|
379
|
+
parameters = {}
|
|
380
|
+
if audio_format is not None:
|
|
381
|
+
parameters['format'] = audio_format
|
|
382
|
+
if sample_rate is not None:
|
|
383
|
+
parameters['sampleRate'] = sample_rate
|
|
384
|
+
if terminology is not None:
|
|
385
|
+
parameters['terminology'] = terminology
|
|
386
|
+
if max_end_silence is not None:
|
|
387
|
+
parameters['maxEndSilence'] = max_end_silence
|
|
388
|
+
if kwargs is not None and len(kwargs) != 0:
|
|
389
|
+
parameters.update(kwargs)
|
|
390
|
+
return parameters
|
|
391
|
+
|
|
392
|
+
def generate_stop_request(self, direction_name: str) -> str:
|
|
393
|
+
"""
|
|
394
|
+
build stop request.
|
|
395
|
+
:param direction_name
|
|
396
|
+
:return
|
|
397
|
+
"""
|
|
398
|
+
self._get_dash_request_header(ActionType.FINISHED)
|
|
399
|
+
self._get_dash_request_payload(direction_name, self.app_id)
|
|
400
|
+
|
|
401
|
+
cmd = {
|
|
402
|
+
"header": self.header,
|
|
403
|
+
"payload": self.payload
|
|
404
|
+
}
|
|
405
|
+
return json.dumps(cmd)
|
|
406
|
+
|
|
407
|
+
def _get_dash_request_header(self, action: str):
|
|
408
|
+
"""
|
|
409
|
+
:param action: ActionType :run-task, continue-task, finish-task
|
|
410
|
+
"""
|
|
411
|
+
if self.task_id is None:
|
|
412
|
+
self.task_id = get_random_uuid()
|
|
413
|
+
self.header = DashHeader(action=action, task_id=self.task_id).to_dict()
|
|
414
|
+
|
|
415
|
+
def _get_dash_request_payload(self, direction_name: str,
|
|
416
|
+
app_id: str,
|
|
417
|
+
workspace_id: str = None,
|
|
418
|
+
custom_input=None,
|
|
419
|
+
model: str = None,
|
|
420
|
+
data_id: str = None,
|
|
421
|
+
request_params=None,
|
|
422
|
+
):
|
|
423
|
+
"""
|
|
424
|
+
build start request payload inner.
|
|
425
|
+
:param direction_name: inner direction name
|
|
426
|
+
:param app_id: web console app id
|
|
427
|
+
:param request_params: start direction body parameters
|
|
428
|
+
:param custom_input: user custom input
|
|
429
|
+
:param data_id: data id
|
|
430
|
+
:param model: model name
|
|
431
|
+
"""
|
|
432
|
+
if custom_input is not None:
|
|
433
|
+
input = custom_input
|
|
434
|
+
else:
|
|
435
|
+
input = RequestBodyInput(
|
|
436
|
+
workspace_id=workspace_id,
|
|
437
|
+
app_id=app_id,
|
|
438
|
+
directive=direction_name,
|
|
439
|
+
data_id=data_id
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
self.payload = DashPayload(
|
|
443
|
+
model=model,
|
|
444
|
+
input=input.to_dict(),
|
|
445
|
+
parameters=request_params
|
|
446
|
+
).to_dict()
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class _TingWuResponse:
|
|
450
|
+
def __init__(self, callback: TingWuRealtimeCallback, close_callback=None):
|
|
451
|
+
super().__init__()
|
|
452
|
+
self.task_id = None # 对话ID.
|
|
453
|
+
self._callback = callback
|
|
454
|
+
self._close_callback = close_callback # 保存关闭回调函数
|
|
455
|
+
|
|
456
|
+
def handle_text_response(self, response_json: str):
|
|
457
|
+
"""
|
|
458
|
+
handle text response.
|
|
459
|
+
:param response_json: json format response from server
|
|
460
|
+
"""
|
|
461
|
+
logger.info("<<<<<< server response: %s" % response_json)
|
|
462
|
+
try:
|
|
463
|
+
# try to parse response as json
|
|
464
|
+
json_data = json.loads(response_json)
|
|
465
|
+
header = json_data.get('header', {})
|
|
466
|
+
if header.get('event') == 'task-failed':
|
|
467
|
+
logger.error('Server returned invalid message: %s' % response_json)
|
|
468
|
+
if self._callback:
|
|
469
|
+
self._callback.on_error(error_code=header.get('error_code'),
|
|
470
|
+
error_msg=header.get('error_message'))
|
|
471
|
+
return
|
|
472
|
+
if header.get('event') == "task-started":
|
|
473
|
+
self._handle_started(header.get('task_id'))
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
payload = json_data.get('payload', {})
|
|
477
|
+
output = payload.get('output', {})
|
|
478
|
+
if output is not None:
|
|
479
|
+
action = output.get('action')
|
|
480
|
+
logger.info("Server response action: %s" % action)
|
|
481
|
+
self._handle_tingwu_agent_text_response(action=action, response_json=json_data)
|
|
482
|
+
|
|
483
|
+
except json.JSONDecodeError:
|
|
484
|
+
logger.error("Failed to parse message as JSON.")
|
|
485
|
+
|
|
486
|
+
def handle_binary_response(self, response_binary: bytes):
|
|
487
|
+
"""
|
|
488
|
+
handle binary response.
|
|
489
|
+
:param response_binary: server response binary。
|
|
490
|
+
"""
|
|
491
|
+
logger.info("<<<<<< server response binary length: %d" % len(response_binary))
|
|
492
|
+
|
|
493
|
+
def _handle_tingwu_agent_text_response(self, action: str, response_json: dict):
|
|
494
|
+
payload = response_json.get('payload', {})
|
|
495
|
+
output = payload.get('output', {})
|
|
496
|
+
if action == "task-failed":
|
|
497
|
+
self._callback.on_error(error_code=output.get('errorCode'),
|
|
498
|
+
error_msg=output.get('errorMessage'))
|
|
499
|
+
elif action == "speech-listen":
|
|
500
|
+
self._callback.on_speech_listen(response_json)
|
|
501
|
+
elif action == "recognize-result":
|
|
502
|
+
self._callback.on_recognize_result(response_json)
|
|
503
|
+
elif action == "ai-result":
|
|
504
|
+
self._callback.on_ai_result(response_json)
|
|
505
|
+
elif action == "speech-end": # ai-result事件永远会先于speech-end事件
|
|
506
|
+
self._callback.on_stopped()
|
|
507
|
+
if self._close_callback is not None:
|
|
508
|
+
self._close_callback()
|
|
509
|
+
else:
|
|
510
|
+
logger.info("Unknown response name:" + action)
|
|
511
|
+
|
|
512
|
+
def _handle_started(self, task_id: str):
|
|
513
|
+
self.task_id = task_id
|
|
514
|
+
self._callback.on_started(self.task_id)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def get_random_uuid() -> str:
|
|
518
|
+
"""generate random uuid."""
|
|
519
|
+
return uuid.uuid4().hex
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@dataclass
|
|
523
|
+
class RequestBodyInput():
|
|
524
|
+
app_id: str
|
|
525
|
+
directive: str
|
|
526
|
+
data_id: str = field(default=None)
|
|
527
|
+
workspace_id: str = field(default=None)
|
|
528
|
+
|
|
529
|
+
def to_dict(self):
|
|
530
|
+
body_input = {
|
|
531
|
+
"appId": self.app_id,
|
|
532
|
+
"directive": self.directive,
|
|
533
|
+
}
|
|
534
|
+
if self.workspace_id is not None:
|
|
535
|
+
body_input["workspace_id"] = self.workspace_id
|
|
536
|
+
if self.data_id is not None:
|
|
537
|
+
body_input["dataId"] = self.data_id
|
|
538
|
+
return body_input
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
@dataclass
|
|
542
|
+
class DashHeader:
|
|
543
|
+
action: str
|
|
544
|
+
task_id: str = field(default=get_random_uuid())
|
|
545
|
+
streaming: str = field(default="duplex") # 默认为 duplex
|
|
546
|
+
|
|
547
|
+
def to_dict(self):
|
|
548
|
+
return {
|
|
549
|
+
"action": self.action,
|
|
550
|
+
"task_id": self.task_id,
|
|
551
|
+
"request_id": self.task_id,
|
|
552
|
+
"streaming": self.streaming
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
@dataclass
|
|
557
|
+
class DashPayload:
|
|
558
|
+
task_group: str = field(default="aigc")
|
|
559
|
+
function: str = field(default="generation")
|
|
560
|
+
model: str = field(default="")
|
|
561
|
+
task: str = field(default="multimodal-generation")
|
|
562
|
+
parameters: dict = field(default=None)
|
|
563
|
+
input: dict = field(default=None)
|
|
564
|
+
|
|
565
|
+
def to_dict(self):
|
|
566
|
+
payload = {
|
|
567
|
+
"task_group": self.task_group,
|
|
568
|
+
"function": self.function,
|
|
569
|
+
"model": self.model,
|
|
570
|
+
"task": self.task,
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
if self.parameters is not None:
|
|
574
|
+
payload["parameters"] = self.parameters
|
|
575
|
+
|
|
576
|
+
if self.input is not None:
|
|
577
|
+
payload["input"] = self.input
|
|
578
|
+
|
|
579
|
+
return payload
|
dashscope/version.py
CHANGED
|
@@ -3,7 +3,7 @@ dashscope/cli.py,sha256=64oGkevgX0RHPPmMg0sevXDgaFLQNA_0vdtjQ7Z2pHM,26492
|
|
|
3
3
|
dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
|
|
4
4
|
dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
|
|
5
5
|
dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
|
|
6
|
-
dashscope/version.py,sha256
|
|
6
|
+
dashscope/version.py,sha256=-rY4TVBLCGXmd6f3VVXw5GRs4Fvfu8nGa-Yu2KG4ReM,74
|
|
7
7
|
dashscope/aigc/__init__.py,sha256=m51CHEKL3WPq-s14OF-G1Uk3rLj6B6KrU55bbCKU-Ak,500
|
|
8
8
|
dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
|
|
9
9
|
dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
|
|
@@ -46,7 +46,7 @@ dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO
|
|
|
46
46
|
dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
|
|
47
47
|
dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
|
|
48
48
|
dashscope/audio/tts_v2/enrollment.py,sha256=-nrlywYSOP73Bm9ETTSxNnlp-B8ezJcUmd59mVvyvgk,6361
|
|
49
|
-
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=
|
|
49
|
+
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=p764P4TYwLkvvPCpA4VnFwlNbIJbuNbp2d9mxgni7Ws,22047
|
|
50
50
|
dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
dashscope/client/base_api.py,sha256=znAJ65DeHiFw1H7FWK0YrkLz1CoNcyqUxF8EJ3gujeY,52523
|
|
52
52
|
dashscope/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -69,13 +69,14 @@ dashscope/embeddings/multimodal_embedding.py,sha256=NwjQsdkKgUz51ozGjqFDzVlLcZjY
|
|
|
69
69
|
dashscope/embeddings/text_embedding.py,sha256=2MPEyMB99xueDbvFg9kKAe8bgHMDEaFLaFa6GzDWDHg,2108
|
|
70
70
|
dashscope/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
71
|
dashscope/io/input_output.py,sha256=0aXrRJFo1ZqYm_AJWR_w88O4-Btn9np2zUhrrUdBdfw,3992
|
|
72
|
-
dashscope/multimodal/__init__.py,sha256=
|
|
72
|
+
dashscope/multimodal/__init__.py,sha256=fyqeolbDLWVn5wSpPZ3nAOnUBRF9k6mlsy6dCmgjPvI,533
|
|
73
73
|
dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
|
|
74
74
|
dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
|
|
75
75
|
dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
|
|
76
76
|
dashscope/multimodal/multimodal_request_params.py,sha256=Lbxf_kLnFUkhty8AU9wL7ws9tYbmhHPVmsiXLdynlJg,8402
|
|
77
|
-
dashscope/multimodal/tingwu/__init__.py,sha256=
|
|
77
|
+
dashscope/multimodal/tingwu/__init__.py,sha256=Gi9GEM0bdeJlZpvyksSeHOc2--_tG5aF6QAx6TAS2fE,225
|
|
78
78
|
dashscope/multimodal/tingwu/tingwu.py,sha256=01d-QOeuB1QmRhiZqbXJ8pHoGqT0C-xZTjIs_ZBXOyw,2613
|
|
79
|
+
dashscope/multimodal/tingwu/tingwu_realtime.py,sha256=oBeqrZit3uBZHuyI7m9VILz2qaqJRMO0-Nm2eJ5Q63g,20215
|
|
79
80
|
dashscope/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
81
|
dashscope/nlp/understanding.py,sha256=00ado-ibYEzBRT0DgKGd3bohQDNW73xnFhJ_1aa87lw,2880
|
|
81
82
|
dashscope/protocol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -99,9 +100,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
|
|
|
99
100
|
dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
|
|
100
101
|
dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
102
|
dashscope/utils/oss_utils.py,sha256=aZIHlMN2JOfVw6kp0SVrMw_N1MfoTcR_-wiRbJ7DgHw,7501
|
|
102
|
-
dashscope-1.24.
|
|
103
|
-
dashscope-1.24.
|
|
104
|
-
dashscope-1.24.
|
|
105
|
-
dashscope-1.24.
|
|
106
|
-
dashscope-1.24.
|
|
107
|
-
dashscope-1.24.
|
|
103
|
+
dashscope-1.24.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
104
|
+
dashscope-1.24.4.dist-info/METADATA,sha256=uGVIdzKXASvLK5vUwMAFzIOS_qCj1RIGo_XCqvrjloQ,7146
|
|
105
|
+
dashscope-1.24.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
106
|
+
dashscope-1.24.4.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
|
|
107
|
+
dashscope-1.24.4.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
|
|
108
|
+
dashscope-1.24.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|