dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashscope/__init__.py +61 -14
- dashscope/aigc/__init__.py +10 -3
- dashscope/aigc/chat_completion.py +282 -0
- dashscope/aigc/code_generation.py +145 -0
- dashscope/aigc/conversation.py +71 -12
- dashscope/aigc/generation.py +288 -16
- dashscope/aigc/image_synthesis.py +473 -31
- dashscope/aigc/multimodal_conversation.py +299 -14
- dashscope/aigc/video_synthesis.py +610 -0
- dashscope/api_entities/aiohttp_request.py +8 -5
- dashscope/api_entities/api_request_data.py +4 -2
- dashscope/api_entities/api_request_factory.py +68 -20
- dashscope/api_entities/base_request.py +20 -3
- dashscope/api_entities/chat_completion_types.py +344 -0
- dashscope/api_entities/dashscope_response.py +243 -15
- dashscope/api_entities/encryption.py +179 -0
- dashscope/api_entities/http_request.py +216 -62
- dashscope/api_entities/websocket_request.py +43 -34
- dashscope/app/__init__.py +5 -0
- dashscope/app/application.py +203 -0
- dashscope/app/application_response.py +246 -0
- dashscope/assistants/__init__.py +16 -0
- dashscope/assistants/assistant_types.py +175 -0
- dashscope/assistants/assistants.py +311 -0
- dashscope/assistants/files.py +197 -0
- dashscope/audio/__init__.py +4 -2
- dashscope/audio/asr/__init__.py +17 -1
- dashscope/audio/asr/asr_phrase_manager.py +203 -0
- dashscope/audio/asr/recognition.py +167 -27
- dashscope/audio/asr/transcription.py +107 -14
- dashscope/audio/asr/translation_recognizer.py +1006 -0
- dashscope/audio/asr/vocabulary.py +177 -0
- dashscope/audio/qwen_asr/__init__.py +7 -0
- dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
- dashscope/audio/qwen_omni/__init__.py +11 -0
- dashscope/audio/qwen_omni/omni_realtime.py +524 -0
- dashscope/audio/qwen_tts/__init__.py +5 -0
- dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
- dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
- dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
- dashscope/audio/tts/__init__.py +2 -0
- dashscope/audio/tts/speech_synthesizer.py +5 -0
- dashscope/audio/tts_v2/__init__.py +12 -0
- dashscope/audio/tts_v2/enrollment.py +179 -0
- dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
- dashscope/cli.py +157 -37
- dashscope/client/base_api.py +652 -87
- dashscope/common/api_key.py +2 -0
- dashscope/common/base_type.py +135 -0
- dashscope/common/constants.py +13 -16
- dashscope/common/env.py +2 -0
- dashscope/common/error.py +58 -22
- dashscope/common/logging.py +2 -0
- dashscope/common/message_manager.py +2 -0
- dashscope/common/utils.py +276 -46
- dashscope/customize/__init__.py +0 -0
- dashscope/customize/customize_types.py +192 -0
- dashscope/customize/deployments.py +146 -0
- dashscope/customize/finetunes.py +234 -0
- dashscope/embeddings/__init__.py +5 -1
- dashscope/embeddings/batch_text_embedding.py +208 -0
- dashscope/embeddings/batch_text_embedding_response.py +65 -0
- dashscope/embeddings/multimodal_embedding.py +118 -10
- dashscope/embeddings/text_embedding.py +13 -1
- dashscope/{file.py → files.py} +19 -4
- dashscope/io/input_output.py +2 -0
- dashscope/model.py +11 -2
- dashscope/models.py +43 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +28 -0
- dashscope/multimodal/multimodal_dialog.py +648 -0
- dashscope/multimodal/multimodal_request_params.py +313 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu.py +80 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/nlp/__init__.py +0 -0
- dashscope/nlp/understanding.py +64 -0
- dashscope/protocol/websocket.py +3 -0
- dashscope/rerank/__init__.py +0 -0
- dashscope/rerank/text_rerank.py +69 -0
- dashscope/resources/qwen.tiktoken +151643 -0
- dashscope/threads/__init__.py +26 -0
- dashscope/threads/messages/__init__.py +0 -0
- dashscope/threads/messages/files.py +113 -0
- dashscope/threads/messages/messages.py +220 -0
- dashscope/threads/runs/__init__.py +0 -0
- dashscope/threads/runs/runs.py +501 -0
- dashscope/threads/runs/steps.py +112 -0
- dashscope/threads/thread_types.py +665 -0
- dashscope/threads/threads.py +212 -0
- dashscope/tokenizers/__init__.py +7 -0
- dashscope/tokenizers/qwen_tokenizer.py +111 -0
- dashscope/tokenizers/tokenization.py +125 -0
- dashscope/tokenizers/tokenizer.py +45 -0
- dashscope/tokenizers/tokenizer_base.py +32 -0
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/message_utils.py +838 -0
- dashscope/utils/oss_utils.py +243 -0
- dashscope/utils/param_utils.py +29 -0
- dashscope/version.py +3 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
- dashscope-1.25.6.dist-info/RECORD +112 -0
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
- dashscope/deployment.py +0 -129
- dashscope/finetune.py +0 -149
- dashscope-1.8.0.dist-info/RECORD +0 -49
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,886 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import platform
|
|
5
|
+
import random
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from enum import Enum, unique
|
|
10
|
+
|
|
11
|
+
import websocket
|
|
12
|
+
|
|
13
|
+
import dashscope
|
|
14
|
+
from dashscope.common.error import InputRequired, InvalidTask, ModelRequired
|
|
15
|
+
from dashscope.common.logging import logger
|
|
16
|
+
from dashscope.protocol.websocket import (ACTION_KEY, EVENT_KEY, HEADER,
|
|
17
|
+
TASK_ID, ActionType, EventType,
|
|
18
|
+
WebsocketStreamingMode)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ResultCallback:
|
|
22
|
+
"""
|
|
23
|
+
An interface that defines callback methods for getting speech synthesis results. # noqa E501
|
|
24
|
+
Derive from this class and implement its function to provide your own data.
|
|
25
|
+
"""
|
|
26
|
+
def on_open(self) -> None:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def on_complete(self) -> None:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def on_error(self, message) -> None:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def on_close(self) -> None:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def on_event(self, message: str) -> None:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def on_data(self, data: bytes) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@unique
|
|
46
|
+
class AudioFormat(Enum):
|
|
47
|
+
DEFAULT = ('Default', 0, '0', 0)
|
|
48
|
+
WAV_8000HZ_MONO_16BIT = ('wav', 8000, 'mono', 0)
|
|
49
|
+
WAV_16000HZ_MONO_16BIT = ('wav', 16000, 'mono', 16)
|
|
50
|
+
WAV_22050HZ_MONO_16BIT = ('wav', 22050, 'mono', 16)
|
|
51
|
+
WAV_24000HZ_MONO_16BIT = ('wav', 24000, 'mono', 16)
|
|
52
|
+
WAV_44100HZ_MONO_16BIT = ('wav', 44100, 'mono', 16)
|
|
53
|
+
WAV_48000HZ_MONO_16BIT = ('wav', 48000, 'mono', 16)
|
|
54
|
+
|
|
55
|
+
MP3_8000HZ_MONO_128KBPS = ('mp3', 8000, 'mono', 128)
|
|
56
|
+
MP3_16000HZ_MONO_128KBPS = ('mp3', 16000, 'mono', 128)
|
|
57
|
+
MP3_22050HZ_MONO_256KBPS = ('mp3', 22050, 'mono', 256)
|
|
58
|
+
MP3_24000HZ_MONO_256KBPS = ('mp3', 24000, 'mono', 256)
|
|
59
|
+
MP3_44100HZ_MONO_256KBPS = ('mp3', 44100, 'mono', 256)
|
|
60
|
+
MP3_48000HZ_MONO_256KBPS = ('mp3', 48000, 'mono', 256)
|
|
61
|
+
|
|
62
|
+
PCM_8000HZ_MONO_16BIT = ('pcm', 8000, 'mono', 16)
|
|
63
|
+
PCM_16000HZ_MONO_16BIT = ('pcm', 16000, 'mono', 16)
|
|
64
|
+
PCM_22050HZ_MONO_16BIT = ('pcm', 22050, 'mono', 16)
|
|
65
|
+
PCM_24000HZ_MONO_16BIT = ('pcm', 24000, 'mono', 16)
|
|
66
|
+
PCM_44100HZ_MONO_16BIT = ('pcm', 44100, 'mono', 16)
|
|
67
|
+
PCM_48000HZ_MONO_16BIT = ('pcm', 48000, 'mono', 16)
|
|
68
|
+
|
|
69
|
+
OGG_OPUS_8KHZ_MONO_32KBPS = ("opus", 8000, "mono", 32)
|
|
70
|
+
OGG_OPUS_8KHZ_MONO_16KBPS = ("opus", 8000, "mono", 16)
|
|
71
|
+
OGG_OPUS_16KHZ_MONO_16KBPS = ("opus", 16000, "mono", 16)
|
|
72
|
+
OGG_OPUS_16KHZ_MONO_32KBPS = ("opus", 16000, "mono", 32)
|
|
73
|
+
OGG_OPUS_16KHZ_MONO_64KBPS = ("opus", 16000, "mono", 64)
|
|
74
|
+
OGG_OPUS_24KHZ_MONO_16KBPS = ("opus", 24000, "mono", 16)
|
|
75
|
+
OGG_OPUS_24KHZ_MONO_32KBPS = ("opus", 24000, "mono", 32)
|
|
76
|
+
OGG_OPUS_24KHZ_MONO_64KBPS = ("opus", 24000, "mono", 64)
|
|
77
|
+
OGG_OPUS_48KHZ_MONO_16KBPS = ("opus", 48000, "mono", 16)
|
|
78
|
+
OGG_OPUS_48KHZ_MONO_32KBPS = ("opus", 48000, "mono", 32)
|
|
79
|
+
OGG_OPUS_48KHZ_MONO_64KBPS = ("opus", 48000, "mono", 64)
|
|
80
|
+
def __init__(self, format, sample_rate, channels, bit_rate):
|
|
81
|
+
self.format = format
|
|
82
|
+
self.sample_rate = sample_rate
|
|
83
|
+
self.channels = channels
|
|
84
|
+
self.bit_rate = bit_rate
|
|
85
|
+
|
|
86
|
+
def __str__(self):
|
|
87
|
+
return f'{self.format.upper()} with {self.sample_rate}Hz sample rate, {self.channels} channel, {self.bit_rate}'
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Request:
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
apikey,
|
|
94
|
+
model,
|
|
95
|
+
voice,
|
|
96
|
+
format='wav',
|
|
97
|
+
sample_rate=16000,
|
|
98
|
+
bit_rate=64000,
|
|
99
|
+
volume=50,
|
|
100
|
+
speech_rate=1.0,
|
|
101
|
+
pitch_rate=1.0,
|
|
102
|
+
seed=0,
|
|
103
|
+
synthesis_type=0,
|
|
104
|
+
instruction=None,
|
|
105
|
+
language_hints: list = None,
|
|
106
|
+
):
|
|
107
|
+
self.task_id = self.genUid()
|
|
108
|
+
self.apikey = apikey
|
|
109
|
+
self.voice = voice
|
|
110
|
+
self.model = model
|
|
111
|
+
self.format = format
|
|
112
|
+
self.sample_rate = sample_rate
|
|
113
|
+
self.bit_rate = bit_rate
|
|
114
|
+
self.volume = volume
|
|
115
|
+
self.speech_rate = speech_rate
|
|
116
|
+
self.pitch_rate = pitch_rate
|
|
117
|
+
self.seed = seed
|
|
118
|
+
self.synthesis_type = synthesis_type
|
|
119
|
+
self.instruction = instruction
|
|
120
|
+
self.language_hints = language_hints
|
|
121
|
+
|
|
122
|
+
def genUid(self):
|
|
123
|
+
# 生成随机UUID
|
|
124
|
+
return uuid.uuid4().hex
|
|
125
|
+
|
|
126
|
+
def getWebsocketHeaders(self, headers, workspace):
|
|
127
|
+
ua = 'dashscope/%s; python/%s; platform/%s; processor/%s' % (
|
|
128
|
+
'1.18.0', # dashscope version
|
|
129
|
+
platform.python_version(),
|
|
130
|
+
platform.platform(),
|
|
131
|
+
platform.processor(),
|
|
132
|
+
)
|
|
133
|
+
self.headers = {
|
|
134
|
+
'user-agent': ua,
|
|
135
|
+
'Authorization': 'bearer ' + self.apikey,
|
|
136
|
+
}
|
|
137
|
+
if headers:
|
|
138
|
+
self.headers = {**self.headers, **headers}
|
|
139
|
+
if workspace:
|
|
140
|
+
self.headers = {
|
|
141
|
+
**self.headers,
|
|
142
|
+
'X-DashScope-WorkSpace': workspace,
|
|
143
|
+
}
|
|
144
|
+
return self.headers
|
|
145
|
+
|
|
146
|
+
def getStartRequest(self, additional_params=None):
|
|
147
|
+
|
|
148
|
+
cmd = {
|
|
149
|
+
HEADER: {
|
|
150
|
+
ACTION_KEY: ActionType.START,
|
|
151
|
+
TASK_ID: self.task_id,
|
|
152
|
+
'streaming': WebsocketStreamingMode.DUPLEX,
|
|
153
|
+
},
|
|
154
|
+
'payload': {
|
|
155
|
+
'model': self.model,
|
|
156
|
+
'task_group': 'audio',
|
|
157
|
+
'task': 'tts',
|
|
158
|
+
'function': 'SpeechSynthesizer',
|
|
159
|
+
'input': {},
|
|
160
|
+
'parameters': {
|
|
161
|
+
'voice': self.voice,
|
|
162
|
+
'volume': self.volume,
|
|
163
|
+
'text_type': 'PlainText',
|
|
164
|
+
'sample_rate': self.sample_rate,
|
|
165
|
+
'rate': self.speech_rate,
|
|
166
|
+
'format': self.format,
|
|
167
|
+
'pitch': self.pitch_rate,
|
|
168
|
+
'seed': self.seed,
|
|
169
|
+
'type': self.synthesis_type
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
}
|
|
173
|
+
if self.format == 'opus':
|
|
174
|
+
cmd['payload']['parameters']['bit_rate'] = self.bit_rate
|
|
175
|
+
if additional_params:
|
|
176
|
+
cmd['payload']['parameters'].update(additional_params)
|
|
177
|
+
if self.instruction is not None:
|
|
178
|
+
cmd['payload']['parameters']['instruction'] = self.instruction
|
|
179
|
+
if self.language_hints is not None:
|
|
180
|
+
cmd['payload']['parameters']['language_hints'] = self.language_hints
|
|
181
|
+
return json.dumps(cmd)
|
|
182
|
+
|
|
183
|
+
def getContinueRequest(self, text):
|
|
184
|
+
cmd = {
|
|
185
|
+
HEADER: {
|
|
186
|
+
ACTION_KEY: ActionType.CONTINUE,
|
|
187
|
+
TASK_ID: self.task_id,
|
|
188
|
+
'streaming': WebsocketStreamingMode.DUPLEX,
|
|
189
|
+
},
|
|
190
|
+
'payload': {
|
|
191
|
+
'model': self.model,
|
|
192
|
+
'task_group': 'audio',
|
|
193
|
+
'task': 'tts',
|
|
194
|
+
'function': 'SpeechSynthesizer',
|
|
195
|
+
'input': {
|
|
196
|
+
'text': text
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
}
|
|
200
|
+
return json.dumps(cmd)
|
|
201
|
+
|
|
202
|
+
def getFinishRequest(self):
|
|
203
|
+
cmd = {
|
|
204
|
+
HEADER: {
|
|
205
|
+
ACTION_KEY: ActionType.FINISHED,
|
|
206
|
+
TASK_ID: self.task_id,
|
|
207
|
+
'streaming': WebsocketStreamingMode.DUPLEX,
|
|
208
|
+
},
|
|
209
|
+
'payload': {
|
|
210
|
+
'input': {},
|
|
211
|
+
},
|
|
212
|
+
}
|
|
213
|
+
return json.dumps(cmd)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class SpeechSynthesizer:
|
|
217
|
+
def __init__(
|
|
218
|
+
self,
|
|
219
|
+
model,
|
|
220
|
+
voice,
|
|
221
|
+
format: AudioFormat = AudioFormat.DEFAULT,
|
|
222
|
+
volume=50,
|
|
223
|
+
speech_rate=1.0,
|
|
224
|
+
pitch_rate=1.0,
|
|
225
|
+
seed=0,
|
|
226
|
+
synthesis_type=0,
|
|
227
|
+
instruction=None,
|
|
228
|
+
language_hints: list = None,
|
|
229
|
+
headers=None,
|
|
230
|
+
callback: ResultCallback = None,
|
|
231
|
+
workspace=None,
|
|
232
|
+
url=None,
|
|
233
|
+
additional_params=None,
|
|
234
|
+
):
|
|
235
|
+
"""
|
|
236
|
+
CosyVoice Speech Synthesis SDK
|
|
237
|
+
Parameters:
|
|
238
|
+
-----------
|
|
239
|
+
model: str
|
|
240
|
+
Model name.
|
|
241
|
+
voice: str
|
|
242
|
+
Voice name.
|
|
243
|
+
format: AudioFormat
|
|
244
|
+
Synthesis audio format.
|
|
245
|
+
volume: int
|
|
246
|
+
The volume of the synthesized audio, with a range from 0 to 100. Default is 50.
|
|
247
|
+
rate: float
|
|
248
|
+
The speech rate of the synthesized audio, with a range from 0.5 to 2. Default is 1.0.
|
|
249
|
+
pitch: float
|
|
250
|
+
The pitch of the synthesized audio, with a range from 0.5 to 2. Default is 1.0.
|
|
251
|
+
headers: Dict
|
|
252
|
+
User-defined headers.
|
|
253
|
+
callback: ResultCallback
|
|
254
|
+
Callback to receive real-time synthesis results.
|
|
255
|
+
workspace: str
|
|
256
|
+
Dashscope workspace ID.
|
|
257
|
+
url: str
|
|
258
|
+
Dashscope WebSocket URL.
|
|
259
|
+
seed: int
|
|
260
|
+
The seed of the synthesizer, with a range from 0 to 65535. Default is 0.
|
|
261
|
+
synthesis_type: int
|
|
262
|
+
The type of the synthesizer, Default is 0.
|
|
263
|
+
instruction: str
|
|
264
|
+
The instruction of the synthesizer, max length is 128.
|
|
265
|
+
language_hints: list
|
|
266
|
+
The language hints of the synthesizer. supported language: zh, en.
|
|
267
|
+
additional_params: Dict
|
|
268
|
+
Additional parameters for the Dashscope API.
|
|
269
|
+
"""
|
|
270
|
+
self.ws = None
|
|
271
|
+
self.start_event = threading.Event()
|
|
272
|
+
self.complete_event = threading.Event()
|
|
273
|
+
self._stopped = threading.Event()
|
|
274
|
+
self._audio_data: bytes = None
|
|
275
|
+
self._is_started = False
|
|
276
|
+
self._cancel = False
|
|
277
|
+
self._cancel_lock = threading.Lock()
|
|
278
|
+
self.async_call = True
|
|
279
|
+
self._is_first = True
|
|
280
|
+
self.async_call = True
|
|
281
|
+
# since dashscope sdk will send first text in run-task
|
|
282
|
+
self._start_stream_timestamp = -1
|
|
283
|
+
self._first_package_timestamp = -1
|
|
284
|
+
self._recv_audio_length = 0
|
|
285
|
+
self.last_response = None
|
|
286
|
+
self._close_ws_after_use = True
|
|
287
|
+
self.__update_params(model, voice, format, volume, speech_rate,
|
|
288
|
+
pitch_rate, seed, synthesis_type, instruction, language_hints, headers, callback, workspace, url,
|
|
289
|
+
additional_params)
|
|
290
|
+
|
|
291
|
+
def __send_str(self, data: str):
|
|
292
|
+
logger.debug('>>>send {}'.format(data))
|
|
293
|
+
self.ws.send(data)
|
|
294
|
+
|
|
295
|
+
def __connect(self, timeout_seconds=5) -> None:
|
|
296
|
+
"""
|
|
297
|
+
Establish a connection to the Bailian WebSocket server,
|
|
298
|
+
which can be used to pre-establish the connection and reduce interaction latency.
|
|
299
|
+
If this function is not used to create the connection,
|
|
300
|
+
it will be established when you first send text via call or streaming_call.
|
|
301
|
+
Parameters:
|
|
302
|
+
-----------
|
|
303
|
+
timeout: int
|
|
304
|
+
Throws TimeoutError exception if the connection is not established after times out seconds.
|
|
305
|
+
"""
|
|
306
|
+
self.ws = websocket.WebSocketApp(
|
|
307
|
+
self.url,
|
|
308
|
+
header=self.request.getWebsocketHeaders(headers=self.headers,
|
|
309
|
+
workspace=self.workspace),
|
|
310
|
+
on_message=self.on_message,
|
|
311
|
+
on_error=self.on_error,
|
|
312
|
+
on_close=self.on_close,
|
|
313
|
+
)
|
|
314
|
+
self.thread = threading.Thread(target=self.ws.run_forever)
|
|
315
|
+
self.thread.daemon = True
|
|
316
|
+
self.thread.start()
|
|
317
|
+
# 等待连接建立
|
|
318
|
+
start_time = time.time()
|
|
319
|
+
while (not (self.ws.sock and self.ws.sock.connected)
|
|
320
|
+
and (time.time() - start_time) < timeout_seconds):
|
|
321
|
+
time.sleep(0.1) # 短暂休眠,避免密集轮询
|
|
322
|
+
if not (self.ws.sock and self.ws.sock.connected):
|
|
323
|
+
raise TimeoutError(
|
|
324
|
+
'websocket connection could not established within 5s. '
|
|
325
|
+
'Please check your network connection, firewall settings, or server status.'
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
def __is_connected(self) -> bool:
|
|
329
|
+
"""
|
|
330
|
+
Returns True if the connection is established and still exists;
|
|
331
|
+
otherwise, returns False.
|
|
332
|
+
"""
|
|
333
|
+
if not self.ws:
|
|
334
|
+
return False
|
|
335
|
+
if not (self.ws.sock and self.ws.sock.connected):
|
|
336
|
+
return False
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
def __reset(self):
|
|
340
|
+
self.start_event.clear()
|
|
341
|
+
self.complete_event.clear()
|
|
342
|
+
self._stopped.clear()
|
|
343
|
+
self._audio_data: bytes = None
|
|
344
|
+
self._is_started = False
|
|
345
|
+
self._cancel = False
|
|
346
|
+
self.async_call = True
|
|
347
|
+
self._is_first = True
|
|
348
|
+
self.async_call = True
|
|
349
|
+
# since dashscope sdk will send first text in run-task
|
|
350
|
+
self._start_stream_timestamp = -1
|
|
351
|
+
self._first_package_timestamp = -1
|
|
352
|
+
self._recv_audio_length = 0
|
|
353
|
+
self.last_response = None
|
|
354
|
+
|
|
355
|
+
def __update_params(
|
|
356
|
+
self,
|
|
357
|
+
model,
|
|
358
|
+
voice,
|
|
359
|
+
format: AudioFormat = AudioFormat.DEFAULT,
|
|
360
|
+
volume=50,
|
|
361
|
+
speech_rate=1.0,
|
|
362
|
+
pitch_rate=1.0,
|
|
363
|
+
seed=0,
|
|
364
|
+
synthesis_type=0,
|
|
365
|
+
instruction=None,
|
|
366
|
+
language_hints: list = None,
|
|
367
|
+
headers=None,
|
|
368
|
+
callback: ResultCallback = None,
|
|
369
|
+
workspace=None,
|
|
370
|
+
url=None,
|
|
371
|
+
additional_params=None,
|
|
372
|
+
close_ws_after_use=True,
|
|
373
|
+
):
|
|
374
|
+
if model is None:
|
|
375
|
+
raise ModelRequired('Model is required!')
|
|
376
|
+
if format is None:
|
|
377
|
+
raise InputRequired('format is required!')
|
|
378
|
+
if url is None:
|
|
379
|
+
url = dashscope.base_websocket_api_url
|
|
380
|
+
self.url = url
|
|
381
|
+
self.apikey = dashscope.api_key
|
|
382
|
+
if self.apikey is None:
|
|
383
|
+
raise InputRequired('apikey is required!')
|
|
384
|
+
self.headers = headers
|
|
385
|
+
self.workspace = workspace
|
|
386
|
+
self.additional_params = additional_params
|
|
387
|
+
self.model = model
|
|
388
|
+
self.voice = voice
|
|
389
|
+
self.aformat = format.format
|
|
390
|
+
if (self.aformat == 'DEFAULT'):
|
|
391
|
+
self.aformat = 'mp3'
|
|
392
|
+
self.sample_rate = format.sample_rate
|
|
393
|
+
if (self.sample_rate == 0):
|
|
394
|
+
self.sample_rate = 22050
|
|
395
|
+
|
|
396
|
+
self.callback = callback
|
|
397
|
+
if not self.callback:
|
|
398
|
+
self.async_call = False
|
|
399
|
+
self.request = Request(
|
|
400
|
+
apikey=self.apikey,
|
|
401
|
+
model=model,
|
|
402
|
+
voice=voice,
|
|
403
|
+
format=format.format,
|
|
404
|
+
sample_rate=format.sample_rate,
|
|
405
|
+
bit_rate = format.bit_rate,
|
|
406
|
+
volume=volume,
|
|
407
|
+
speech_rate=speech_rate,
|
|
408
|
+
pitch_rate=pitch_rate,
|
|
409
|
+
seed=seed,
|
|
410
|
+
synthesis_type=synthesis_type,
|
|
411
|
+
instruction=instruction,
|
|
412
|
+
language_hints=language_hints
|
|
413
|
+
)
|
|
414
|
+
self.last_request_id = self.request.task_id
|
|
415
|
+
self._close_ws_after_use = close_ws_after_use
|
|
416
|
+
|
|
417
|
+
def __str__(self):
|
|
418
|
+
return '[SpeechSynthesizer {} desc] model:{}, voice:{}, format:{}, sample_rate:{}, connected:{}'.format(
|
|
419
|
+
self.__hash__(), self.model, self.voice, self.aformat,
|
|
420
|
+
self.sample_rate, self.__is_connected())
|
|
421
|
+
|
|
422
|
+
def __start_stream(self, ):
|
|
423
|
+
self._start_stream_timestamp = time.time() * 1000
|
|
424
|
+
self._first_package_timestamp = -1
|
|
425
|
+
self._recv_audio_length = 0
|
|
426
|
+
if self.callback is None:
|
|
427
|
+
raise InputRequired('callback is required!')
|
|
428
|
+
# reset inner params
|
|
429
|
+
self._stopped.clear()
|
|
430
|
+
self._stream_data = ['']
|
|
431
|
+
self._worker = None
|
|
432
|
+
self._audio_data: bytes = None
|
|
433
|
+
|
|
434
|
+
if self._is_started:
|
|
435
|
+
raise InvalidTask('task has already started.')
|
|
436
|
+
# 建立ws连接
|
|
437
|
+
if self.ws is None:
|
|
438
|
+
self.__connect(5)
|
|
439
|
+
# 发送run-task指令
|
|
440
|
+
request = self.request.getStartRequest(self.additional_params)
|
|
441
|
+
self.__send_str(request)
|
|
442
|
+
if not self.start_event.wait(10):
|
|
443
|
+
raise TimeoutError('start speech synthesizer failed within 5s.')
|
|
444
|
+
self._is_started = True
|
|
445
|
+
if self.callback:
|
|
446
|
+
self.callback.on_open()
|
|
447
|
+
|
|
448
|
+
def __submit_text(self, text):
|
|
449
|
+
if not self._is_started:
|
|
450
|
+
raise InvalidTask('speech synthesizer has not been started.')
|
|
451
|
+
|
|
452
|
+
if self._stopped.is_set():
|
|
453
|
+
raise InvalidTask('speech synthesizer task has stopped.')
|
|
454
|
+
request = self.request.getContinueRequest(text)
|
|
455
|
+
self.__send_str(request)
|
|
456
|
+
|
|
457
|
+
def streaming_call(self, text: str):
|
|
458
|
+
"""
|
|
459
|
+
Streaming input mode: You can call the stream_call function multiple times to send text.
|
|
460
|
+
A session will be created on the first call.
|
|
461
|
+
The session ends after calling streaming_complete.
|
|
462
|
+
Parameters:
|
|
463
|
+
-----------
|
|
464
|
+
text: str
|
|
465
|
+
utf-8 encoded text
|
|
466
|
+
"""
|
|
467
|
+
if self._is_first:
|
|
468
|
+
self._is_first = False
|
|
469
|
+
self.__start_stream()
|
|
470
|
+
self.__submit_text(text)
|
|
471
|
+
return None
|
|
472
|
+
|
|
473
|
+
def streaming_complete(self, complete_timeout_millis=600000):
|
|
474
|
+
"""
|
|
475
|
+
Synchronously stop the streaming input speech synthesis task.
|
|
476
|
+
Wait for all remaining synthesized audio before returning
|
|
477
|
+
|
|
478
|
+
Parameters:
|
|
479
|
+
-----------
|
|
480
|
+
complete_timeout_millis: int
|
|
481
|
+
Throws TimeoutError exception if it times out. If the timeout is not None
|
|
482
|
+
and greater than zero, it will wait for the corresponding number of
|
|
483
|
+
milliseconds; otherwise, it will wait indefinitely.
|
|
484
|
+
"""
|
|
485
|
+
if not self._is_started:
|
|
486
|
+
raise InvalidTask('speech synthesizer has not been started.')
|
|
487
|
+
if self._stopped.is_set():
|
|
488
|
+
raise InvalidTask('speech synthesizer task has stopped.')
|
|
489
|
+
request = self.request.getFinishRequest()
|
|
490
|
+
self.__send_str(request)
|
|
491
|
+
if complete_timeout_millis is not None and complete_timeout_millis > 0:
|
|
492
|
+
if not self.complete_event.wait(timeout=complete_timeout_millis /
|
|
493
|
+
1000):
|
|
494
|
+
raise TimeoutError(
|
|
495
|
+
'speech synthesizer wait for complete timeout {}ms'.format(
|
|
496
|
+
complete_timeout_millis))
|
|
497
|
+
else:
|
|
498
|
+
self.complete_event.wait()
|
|
499
|
+
if self._close_ws_after_use:
|
|
500
|
+
self.close()
|
|
501
|
+
self._stopped.set()
|
|
502
|
+
self._is_started = False
|
|
503
|
+
|
|
504
|
+
def __waiting_for_complete(self, timeout):
|
|
505
|
+
if timeout is not None and timeout > 0:
|
|
506
|
+
if not self.complete_event.wait(timeout=timeout / 1000):
|
|
507
|
+
raise TimeoutError(
|
|
508
|
+
f'speech synthesizer wait for complete timeout {timeout}ms'
|
|
509
|
+
)
|
|
510
|
+
else:
|
|
511
|
+
self.complete_event.wait()
|
|
512
|
+
if self._close_ws_after_use:
|
|
513
|
+
self.close()
|
|
514
|
+
self._stopped.set()
|
|
515
|
+
self._is_started = False
|
|
516
|
+
|
|
517
|
+
def async_streaming_complete(self, complete_timeout_millis=600000):
|
|
518
|
+
"""
|
|
519
|
+
Asynchronously stop the streaming input speech synthesis task, returns immediately.
|
|
520
|
+
You need to listen and handle the STREAM_INPUT_TTS_EVENT_SYNTHESIS_COMPLETE event in the on_event callback.
|
|
521
|
+
Do not destroy the object and callback before this event.
|
|
522
|
+
|
|
523
|
+
Parameters:
|
|
524
|
+
-----------
|
|
525
|
+
complete_timeout_millis: int
|
|
526
|
+
Throws TimeoutError exception if it times out. If the timeout is not None
|
|
527
|
+
and greater than zero, it will wait for the corresponding number of
|
|
528
|
+
milliseconds; otherwise, it will wait indefinitely.
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
if not self._is_started:
|
|
532
|
+
raise InvalidTask('speech synthesizer has not been started.')
|
|
533
|
+
if self._stopped.is_set():
|
|
534
|
+
raise InvalidTask('speech synthesizer task has stopped.')
|
|
535
|
+
request = self.request.getFinishRequest()
|
|
536
|
+
self.__send_str(request)
|
|
537
|
+
thread = threading.Thread(target=self.__waiting_for_complete,
|
|
538
|
+
args=(complete_timeout_millis, ))
|
|
539
|
+
thread.start()
|
|
540
|
+
|
|
541
|
+
def streaming_cancel(self):
|
|
542
|
+
"""
|
|
543
|
+
Immediately terminate the streaming input speech synthesis task
|
|
544
|
+
and discard any remaining audio that is not yet delivered.
|
|
545
|
+
"""
|
|
546
|
+
|
|
547
|
+
if not self._is_started:
|
|
548
|
+
raise InvalidTask('speech synthesizer has not been started.')
|
|
549
|
+
if self._stopped.is_set():
|
|
550
|
+
return
|
|
551
|
+
request = self.request.getFinishRequest()
|
|
552
|
+
self.__send_str(request)
|
|
553
|
+
self.ws.close()
|
|
554
|
+
self.start_event.set()
|
|
555
|
+
self.complete_event.set()
|
|
556
|
+
|
|
557
|
+
# 监听消息的回调函数
|
|
558
|
+
def on_message(self, ws, message):
|
|
559
|
+
if isinstance(message, str):
|
|
560
|
+
logger.debug('<<<recv {}'.format(message))
|
|
561
|
+
try:
|
|
562
|
+
# 尝试将消息解析为JSON
|
|
563
|
+
json_data = json.loads(message)
|
|
564
|
+
self.last_response = json_data
|
|
565
|
+
event = json_data['header'][EVENT_KEY]
|
|
566
|
+
# 调用JSON回调
|
|
567
|
+
if EventType.STARTED == event:
|
|
568
|
+
self.start_event.set()
|
|
569
|
+
elif EventType.FINISHED == event:
|
|
570
|
+
self.complete_event.set()
|
|
571
|
+
if self.callback:
|
|
572
|
+
self.callback.on_complete()
|
|
573
|
+
self.callback.on_close()
|
|
574
|
+
elif EventType.FAILED == event:
|
|
575
|
+
self.start_event.set()
|
|
576
|
+
self.complete_event.set()
|
|
577
|
+
if self.async_call:
|
|
578
|
+
self.callback.on_error(message)
|
|
579
|
+
self.callback.on_close()
|
|
580
|
+
else:
|
|
581
|
+
logger.error(f'TaskFailed: {message}')
|
|
582
|
+
raise Exception(f'TaskFailed: {message}')
|
|
583
|
+
elif EventType.GENERATED == event:
|
|
584
|
+
if self.callback:
|
|
585
|
+
self.callback.on_event(message)
|
|
586
|
+
else:
|
|
587
|
+
pass
|
|
588
|
+
except json.JSONDecodeError:
|
|
589
|
+
logger.error('Failed to parse message as JSON.')
|
|
590
|
+
raise Exception('Failed to parse message as JSON.')
|
|
591
|
+
elif isinstance(message, (bytes, bytearray)):
|
|
592
|
+
# 如果失败,认为是二进制消息
|
|
593
|
+
logger.debug('<<<recv binary {}'.format(len(message)))
|
|
594
|
+
if (self._recv_audio_length == 0):
|
|
595
|
+
self._first_package_timestamp = time.time() * 1000
|
|
596
|
+
logger.debug('first package delay {}'.format(
|
|
597
|
+
self._first_package_timestamp -
|
|
598
|
+
self._start_stream_timestamp))
|
|
599
|
+
self._recv_audio_length += len(message) / (2 * self.sample_rate /
|
|
600
|
+
1000)
|
|
601
|
+
current = time.time() * 1000
|
|
602
|
+
current_rtf = (current - self._start_stream_timestamp
|
|
603
|
+
) / self._recv_audio_length
|
|
604
|
+
logger.debug('total audio {} ms, current_rtf: {}'.format(
|
|
605
|
+
self._recv_audio_length, current_rtf))
|
|
606
|
+
# 只有在非异步调用的时候保存音频
|
|
607
|
+
if not self.async_call:
|
|
608
|
+
if self._audio_data is None:
|
|
609
|
+
self._audio_data = bytes(message)
|
|
610
|
+
else:
|
|
611
|
+
self._audio_data = self._audio_data + bytes(message)
|
|
612
|
+
if self.callback:
|
|
613
|
+
self.callback.on_data(message)
|
|
614
|
+
|
|
615
|
+
def call(self, text: str, timeout_millis=None):
|
|
616
|
+
"""
|
|
617
|
+
Speech synthesis.
|
|
618
|
+
If callback is set, the audio will be returned in real-time through the on_event interface.
|
|
619
|
+
Otherwise, this function blocks until all audio is received and then returns the complete audio data.
|
|
620
|
+
|
|
621
|
+
Parameters:
|
|
622
|
+
-----------
|
|
623
|
+
text: str
|
|
624
|
+
utf-8 encoded text
|
|
625
|
+
timeoutMillis:
|
|
626
|
+
Integer or None
|
|
627
|
+
return: bytes
|
|
628
|
+
If a callback is not set during initialization, the complete audio is returned
|
|
629
|
+
as the function's return value. Otherwise, the return value is null.
|
|
630
|
+
If the timeout is set to a value greater than zero and not None,
|
|
631
|
+
it will wait for the corresponding number of milliseconds;
|
|
632
|
+
otherwise, it will wait indefinitely.
|
|
633
|
+
"""
|
|
634
|
+
# print('还不支持非流式语音合成sdk调用大模型,使用流式模拟')
|
|
635
|
+
if self.additional_params is None:
|
|
636
|
+
self.additional_params = {"enable_ssml":True}
|
|
637
|
+
else:
|
|
638
|
+
self.additional_params["enable_ssml"] = True
|
|
639
|
+
if not self.callback:
|
|
640
|
+
self.callback = ResultCallback()
|
|
641
|
+
self.__start_stream()
|
|
642
|
+
self.__submit_text(text)
|
|
643
|
+
if self.async_call:
|
|
644
|
+
self.async_streaming_complete(timeout_millis)
|
|
645
|
+
return None
|
|
646
|
+
else:
|
|
647
|
+
self.streaming_complete(timeout_millis)
|
|
648
|
+
return self._audio_data
|
|
649
|
+
|
|
650
|
+
# WebSocket关闭的回调函数
|
|
651
|
+
def on_close(self, ws, close_status_code, close_msg):
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
# WebSocket发生错误的回调函数
|
|
655
|
+
def on_error(self, ws, error):
|
|
656
|
+
print(f'websocket closed due to {error}')
|
|
657
|
+
raise Exception(f'websocket closed due to {error}')
|
|
658
|
+
|
|
659
|
+
# 关闭WebSocket连接
|
|
660
|
+
def close(self):
|
|
661
|
+
self.ws.close()
|
|
662
|
+
|
|
663
|
+
# 获取上一个任务的taskId
|
|
664
|
+
def get_last_request_id(self):
|
|
665
|
+
return self.last_request_id
|
|
666
|
+
|
|
667
|
+
def get_first_package_delay(self):
|
|
668
|
+
"""First Package Delay is the time between start sending text and receive first audio package
|
|
669
|
+
"""
|
|
670
|
+
return self._first_package_timestamp - self._start_stream_timestamp
|
|
671
|
+
|
|
672
|
+
def get_response(self):
|
|
673
|
+
return self.last_response
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
class SpeechSynthesizerObjectPool:
|
|
677
|
+
_instance_lock = threading.Lock()
|
|
678
|
+
|
|
679
|
+
def __new__(cls, *args, **kwargs):
|
|
680
|
+
if not hasattr(SpeechSynthesizerObjectPool, '_instance'):
|
|
681
|
+
with SpeechSynthesizerObjectPool._instance_lock:
|
|
682
|
+
if not hasattr(SpeechSynthesizerObjectPool, '_instance'):
|
|
683
|
+
SpeechSynthesizerObjectPool._instance = object.__new__(cls)
|
|
684
|
+
return SpeechSynthesizerObjectPool._instance
|
|
685
|
+
|
|
686
|
+
class PoolObject:
|
|
687
|
+
def __init__(self, synthesizer):
|
|
688
|
+
self.synthesizer: SpeechSynthesizer = synthesizer
|
|
689
|
+
self.connect_time = -1
|
|
690
|
+
|
|
691
|
+
def __str__(self):
|
|
692
|
+
return f'synthesizer: {self.synthesizer}, connect_time: {self.connect_time}'
|
|
693
|
+
|
|
694
|
+
def __init__(self,
|
|
695
|
+
max_size: int = 20,
|
|
696
|
+
url=None,
|
|
697
|
+
headers=None,
|
|
698
|
+
workspace=None):
|
|
699
|
+
"""
|
|
700
|
+
Speech synthesis object pool that follows the singleton pattern,
|
|
701
|
+
establishes WebSocket connections in advance to avoid connection overhead.
|
|
702
|
+
The connection pool will maintain a number of pre-created synthesizer objects
|
|
703
|
+
up to max_size; objects taken from the pool do not need to be returned,
|
|
704
|
+
and the pool will automatically replenish them.
|
|
705
|
+
|
|
706
|
+
Parameters:
|
|
707
|
+
-----------
|
|
708
|
+
max_size: int
|
|
709
|
+
Size of the object pool, with a value range of 1 to 100.
|
|
710
|
+
"""
|
|
711
|
+
self.DEFAULT_MODEL = 'cosyvoice-v1'
|
|
712
|
+
self.DEFAULT_VOICE = 'longxiaochun'
|
|
713
|
+
self.DEFAULT_RECONNECT_INTERVAL = 30
|
|
714
|
+
self.DEFAULT_URL = url
|
|
715
|
+
self.DEFAUTL_HEADERS = headers
|
|
716
|
+
self.DEFAULT_WORKSPACE = workspace
|
|
717
|
+
if max_size <= 0:
|
|
718
|
+
raise ValueError('max_size must be greater than 0')
|
|
719
|
+
if max_size > 100:
|
|
720
|
+
raise ValueError('max_size must be less than 100')
|
|
721
|
+
self._pool = []
|
|
722
|
+
# 如果重连中,则会将avaliable置为False,避免被使用
|
|
723
|
+
self._avaliable = []
|
|
724
|
+
self._pool_size = max_size
|
|
725
|
+
for i in range(self._pool_size):
|
|
726
|
+
synthesizer = self.__get_default_synthesizer()
|
|
727
|
+
tmpPoolObject = self.PoolObject(synthesizer)
|
|
728
|
+
tmpPoolObject.synthesizer._SpeechSynthesizer__connect()
|
|
729
|
+
tmpPoolObject.connect_time = time.time()
|
|
730
|
+
self._pool.append(tmpPoolObject)
|
|
731
|
+
self._avaliable.append(True)
|
|
732
|
+
self._borrowed_object_num = 0
|
|
733
|
+
self._remain_object_num = max_size
|
|
734
|
+
self._lock = threading.Lock()
|
|
735
|
+
self._stop = False
|
|
736
|
+
self._stop_lock = threading.Lock()
|
|
737
|
+
self._working_thread = threading.Thread(target=self.__auto_reconnect,
|
|
738
|
+
args=())
|
|
739
|
+
self._working_thread.start()
|
|
740
|
+
|
|
741
|
+
def __get_default_synthesizer(self) -> SpeechSynthesizer:
|
|
742
|
+
return SpeechSynthesizer(model=self.DEFAULT_MODEL,
|
|
743
|
+
voice=self.DEFAULT_VOICE,
|
|
744
|
+
url=self.DEFAULT_URL,
|
|
745
|
+
headers=self.DEFAUTL_HEADERS,
|
|
746
|
+
workspace=self.DEFAULT_WORKSPACE)
|
|
747
|
+
|
|
748
|
+
def __get_reconnect_interval(self):
|
|
749
|
+
return self.DEFAULT_RECONNECT_INTERVAL + random.random() * 10 - 5
|
|
750
|
+
|
|
751
|
+
def __auto_reconnect(self):
|
|
752
|
+
logger.debug(
|
|
753
|
+
'speech synthesizer object pool auto reconnect thread start')
|
|
754
|
+
while True:
|
|
755
|
+
objects_need_to_connect = []
|
|
756
|
+
objects_need_to_renew = []
|
|
757
|
+
logger.debug('scanning queue borr: {}/{} remain: {}/{}'.format(
|
|
758
|
+
self._borrowed_object_num, self._pool_size,
|
|
759
|
+
self._remain_object_num, self._pool_size))
|
|
760
|
+
with self._lock:
|
|
761
|
+
if self._stop:
|
|
762
|
+
return
|
|
763
|
+
|
|
764
|
+
current_time = time.time()
|
|
765
|
+
for idx, poolObject in enumerate(self._pool):
|
|
766
|
+
# 如果超过固定时间没有使用对象,则重连
|
|
767
|
+
if poolObject.connect_time == -1:
|
|
768
|
+
objects_need_to_connect.append(poolObject)
|
|
769
|
+
self._avaliable[idx] = False
|
|
770
|
+
elif (not poolObject.synthesizer.
|
|
771
|
+
_SpeechSynthesizer__is_connected()) or (
|
|
772
|
+
current_time - poolObject.connect_time >
|
|
773
|
+
self.__get_reconnect_interval()):
|
|
774
|
+
objects_need_to_renew.append(poolObject)
|
|
775
|
+
self._avaliable[idx] = False
|
|
776
|
+
for poolObject in objects_need_to_connect:
|
|
777
|
+
logger.info(
|
|
778
|
+
'[SpeechSynthesizerObjectPool] pre-connect new synthesizer'
|
|
779
|
+
)
|
|
780
|
+
poolObject.synthesizer._SpeechSynthesizer__connect()
|
|
781
|
+
poolObject.connect_time = time.time()
|
|
782
|
+
for poolObject in objects_need_to_renew:
|
|
783
|
+
logger.info(
|
|
784
|
+
'[SpeechSynthesizerObjectPool] renew synthesizer after {} s'
|
|
785
|
+
.format(current_time - poolObject.connect_time))
|
|
786
|
+
poolObject.synthesizer = self.__get_default_synthesizer()
|
|
787
|
+
poolObject.synthesizer._SpeechSynthesizer__connect()
|
|
788
|
+
poolObject.connect_time = time.time()
|
|
789
|
+
with self._lock:
|
|
790
|
+
for i in range(len(self._avaliable)):
|
|
791
|
+
self._avaliable[i] = True
|
|
792
|
+
time.sleep(1)
|
|
793
|
+
|
|
794
|
+
def shutdown(self):
|
|
795
|
+
"""
|
|
796
|
+
This is a ThreadSafe Method.
|
|
797
|
+
destroy the object pool
|
|
798
|
+
"""
|
|
799
|
+
logger.debug('[SpeechSynthesizerObjectPool] start shutdown')
|
|
800
|
+
with self._lock:
|
|
801
|
+
self._stop = True
|
|
802
|
+
self._pool = []
|
|
803
|
+
self._working_thread.join()
|
|
804
|
+
logger.debug('[SpeechSynthesizerObjectPool] shutdown complete')
|
|
805
|
+
|
|
806
|
+
def borrow_synthesizer(
|
|
807
|
+
self,
|
|
808
|
+
model,
|
|
809
|
+
voice,
|
|
810
|
+
format: AudioFormat = AudioFormat.DEFAULT,
|
|
811
|
+
volume=50,
|
|
812
|
+
speech_rate=1.0,
|
|
813
|
+
pitch_rate=1.0,
|
|
814
|
+
seed=0,
|
|
815
|
+
synthesis_type=0,
|
|
816
|
+
instruction=None,
|
|
817
|
+
language_hints: list = None,
|
|
818
|
+
headers=None,
|
|
819
|
+
callback: ResultCallback = None,
|
|
820
|
+
workspace=None,
|
|
821
|
+
url=None,
|
|
822
|
+
additional_params=None,
|
|
823
|
+
):
|
|
824
|
+
"""
|
|
825
|
+
This is a ThreadSafe Method.
|
|
826
|
+
get a synthesizer object from the pool.
|
|
827
|
+
objects taken from the pool need to be returned,
|
|
828
|
+
and the pool will automatically replenish them.
|
|
829
|
+
If there is no synthesizer object in the pool,
|
|
830
|
+
a new synthesizer object will be created and returned.
|
|
831
|
+
"""
|
|
832
|
+
logger.debug('[SpeechSynthesizerObjectPool] get synthesizer')
|
|
833
|
+
synthesizer: SpeechSynthesizer = None
|
|
834
|
+
with self._lock:
|
|
835
|
+
# 遍历对象池,如果存在预建连的对象,则返回
|
|
836
|
+
for idx, poolObject in enumerate(self._pool):
|
|
837
|
+
if self._avaliable[
|
|
838
|
+
idx] and poolObject.synthesizer._SpeechSynthesizer__is_connected(
|
|
839
|
+
):
|
|
840
|
+
synthesizer = poolObject.synthesizer
|
|
841
|
+
self._borrowed_object_num += 1
|
|
842
|
+
self._remain_object_num -= 1
|
|
843
|
+
self._pool.pop(idx)
|
|
844
|
+
self._avaliable.pop(idx)
|
|
845
|
+
break
|
|
846
|
+
|
|
847
|
+
# 如果对象池不足,则返回未建连的新对象
|
|
848
|
+
if synthesizer is None:
|
|
849
|
+
synthesizer = self.__get_default_synthesizer()
|
|
850
|
+
logger.warning(
|
|
851
|
+
'[SpeechSynthesizerObjectPool] object pool is exausted, create new synthesizer'
|
|
852
|
+
)
|
|
853
|
+
synthesizer._SpeechSynthesizer__reset()
|
|
854
|
+
synthesizer._SpeechSynthesizer__update_params(model, voice, format,
|
|
855
|
+
volume, speech_rate,
|
|
856
|
+
pitch_rate, seed, synthesis_type, instruction,
|
|
857
|
+
language_hints, self.DEFAUTL_HEADERS,
|
|
858
|
+
callback, self.DEFAULT_WORKSPACE, self.DEFAULT_URL,
|
|
859
|
+
additional_params, False)
|
|
860
|
+
return synthesizer
|
|
861
|
+
|
|
862
|
+
def return_synthesizer(self, synthesizer) -> bool:
|
|
863
|
+
"""
|
|
864
|
+
This is a ThreadSafe Method.
|
|
865
|
+
return a synthesizer object back to the pool.
|
|
866
|
+
"""
|
|
867
|
+
if not isinstance(synthesizer, SpeechSynthesizer):
|
|
868
|
+
logger.error(
|
|
869
|
+
'[SpeechSynthesizerObjectPool] return_synthesizer: synthesizer is not a SpeechSynthesizer object'
|
|
870
|
+
)
|
|
871
|
+
return False
|
|
872
|
+
with self._lock:
|
|
873
|
+
if self._borrowed_object_num <= 0:
|
|
874
|
+
logger.debug(
|
|
875
|
+
'[SpeechSynthesizerObjectPool] pool is full, drop returned object'
|
|
876
|
+
)
|
|
877
|
+
return False
|
|
878
|
+
poolObject = self.PoolObject(synthesizer)
|
|
879
|
+
poolObject.connect_time = time.time()
|
|
880
|
+
self._pool.append(poolObject)
|
|
881
|
+
self._avaliable.append(True)
|
|
882
|
+
self._borrowed_object_num -= 1
|
|
883
|
+
self._remain_object_num += 1
|
|
884
|
+
logger.debug(
|
|
885
|
+
'[SpeechSynthesizerObjectPool] return synthesizer back to pool'
|
|
886
|
+
)
|