dashscope 1.24.7__py3-none-any.whl → 1.24.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dashscope might be problematic. Click here for more details.
- dashscope/audio/asr/recognition.py +1 -1
- dashscope/audio/qwen_omni/omni_realtime.py +45 -1
- dashscope/audio/tts_v2/enrollment.py +9 -4
- dashscope/multimodal/multimodal_request_params.py +32 -2
- dashscope/version.py +1 -1
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/METADATA +1 -1
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/RECORD +11 -11
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/WHEEL +0 -0
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/entry_points.txt +0 -0
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/licenses/LICENSE +0 -0
- {dashscope-1.24.7.dist-info → dashscope-1.24.9.dist-info}/top_level.txt +0 -0
|
@@ -189,7 +189,7 @@ class Recognition(BaseApi):
|
|
|
189
189
|
responses = self.__launch_request()
|
|
190
190
|
for part in responses:
|
|
191
191
|
if part.status_code == HTTPStatus.OK:
|
|
192
|
-
if len(part.output) == 0:
|
|
192
|
+
if len(part.output) == 0 or ('finished' in part.output and part.output['finished'] == True):
|
|
193
193
|
self._on_complete_timestamp = time.time() * 1000
|
|
194
194
|
logger.debug('last package delay {}'.format(
|
|
195
195
|
self.get_last_package_delay()))
|
|
@@ -4,6 +4,7 @@ import json
|
|
|
4
4
|
import platform
|
|
5
5
|
import threading
|
|
6
6
|
import time
|
|
7
|
+
from dataclasses import field, dataclass
|
|
7
8
|
from typing import List
|
|
8
9
|
import uuid
|
|
9
10
|
from enum import Enum, unique
|
|
@@ -29,6 +30,26 @@ class OmniRealtimeCallback:
|
|
|
29
30
|
pass
|
|
30
31
|
|
|
31
32
|
|
|
33
|
+
@dataclass
|
|
34
|
+
class TranslationParams:
|
|
35
|
+
"""
|
|
36
|
+
TranslationParams
|
|
37
|
+
"""
|
|
38
|
+
language: str = field(default=None)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class TranscriptionParams:
|
|
43
|
+
"""
|
|
44
|
+
TranscriptionParams
|
|
45
|
+
"""
|
|
46
|
+
language: str = field(default=None)
|
|
47
|
+
sample_rate: int = field(default=16000)
|
|
48
|
+
input_audio_format: str = field(default="pcm")
|
|
49
|
+
corpus: dict = field(default=None)
|
|
50
|
+
corpus_text: str = field(default=None)
|
|
51
|
+
|
|
52
|
+
|
|
32
53
|
@unique
|
|
33
54
|
class AudioFormat(Enum):
|
|
34
55
|
# format, sample_rate, channels, bit_rate, name
|
|
@@ -171,7 +192,7 @@ class OmniRealtimeConversation:
|
|
|
171
192
|
|
|
172
193
|
def update_session(self,
|
|
173
194
|
output_modalities: List[MultiModality],
|
|
174
|
-
voice: str,
|
|
195
|
+
voice: str = None,
|
|
175
196
|
input_audio_format: AudioFormat = AudioFormat.
|
|
176
197
|
PCM_16000HZ_MONO_16BIT,
|
|
177
198
|
output_audio_format: AudioFormat = AudioFormat.
|
|
@@ -184,6 +205,8 @@ class OmniRealtimeConversation:
|
|
|
184
205
|
turn_detection_threshold: float = 0.2,
|
|
185
206
|
turn_detection_silence_duration_ms: int = 800,
|
|
186
207
|
turn_detection_param: dict = None,
|
|
208
|
+
translation_params: TranslationParams = None,
|
|
209
|
+
transcription_params: TranscriptionParams = None,
|
|
187
210
|
**kwargs) -> None:
|
|
188
211
|
'''
|
|
189
212
|
update session configuration, should be used before create response
|
|
@@ -206,6 +229,13 @@ class OmniRealtimeConversation:
|
|
|
206
229
|
In a quiet environment, it may be necessary to decrease the threshold to improve sensitivity
|
|
207
230
|
turn_detection_silence_duration_ms: int
|
|
208
231
|
duration of silence in milliseconds to detect turn, range [200, 6000]
|
|
232
|
+
translation_params: TranslationParams
|
|
233
|
+
translation params, include language. Only effective with qwen3-livetranslate-flash-realtime model or
|
|
234
|
+
further models. Do not set this parameter for other models.
|
|
235
|
+
transcription_params: TranscriptionParams
|
|
236
|
+
transcription params, include language, sample_rate, input_audio_format, corpus.
|
|
237
|
+
Only effective with qwen3-asr-flash-realtime model or
|
|
238
|
+
further models. Do not set this parameter for other models.
|
|
209
239
|
'''
|
|
210
240
|
self.config = {
|
|
211
241
|
'modalities': [m.value for m in output_modalities],
|
|
@@ -230,6 +260,20 @@ class OmniRealtimeConversation:
|
|
|
230
260
|
self.config['turn_detection'].update(turn_detection_param)
|
|
231
261
|
else:
|
|
232
262
|
self.config['turn_detection'] = None
|
|
263
|
+
if translation_params is not None:
|
|
264
|
+
self.config['translation'] = {
|
|
265
|
+
'language': translation_params.language
|
|
266
|
+
}
|
|
267
|
+
if transcription_params is not None:
|
|
268
|
+
self.config['language'] = transcription_params.language
|
|
269
|
+
if transcription_params.corpus is not None:
|
|
270
|
+
self.config['corpus'] = transcription_params.corpus
|
|
271
|
+
if transcription_params.corpus_text is not None:
|
|
272
|
+
self.config['corpus'] = {
|
|
273
|
+
"text": transcription_params.corpus_text
|
|
274
|
+
}
|
|
275
|
+
self.config['input_audio_format'] = transcription_params.input_audio_format
|
|
276
|
+
self.config['sample_rate']= transcription_params.sample_rate
|
|
233
277
|
self.config.update(kwargs)
|
|
234
278
|
self.__send_str(
|
|
235
279
|
json.dumps({
|
|
@@ -68,20 +68,25 @@ class VoiceEnrollmentService(BaseApi):
|
|
|
68
68
|
logger.debug('>>>>recv', response)
|
|
69
69
|
return response
|
|
70
70
|
|
|
71
|
-
def create_voice(self, target_model: str, prefix: str, url: str) -> str:
|
|
71
|
+
def create_voice(self, target_model: str, prefix: str, url: str, language_hints: List[str] = None) -> str:
|
|
72
72
|
'''
|
|
73
73
|
创建新克隆音色
|
|
74
74
|
param: target_model 克隆音色对应的语音合成模型版本
|
|
75
75
|
param: prefix 音色自定义前缀,仅允许数字和小写字母,小于十个字符。
|
|
76
76
|
param: url 用于克隆的音频文件url
|
|
77
|
+
param: language_hints 克隆音色目标语言
|
|
77
78
|
return: voice_id
|
|
78
79
|
'''
|
|
79
|
-
|
|
80
|
+
|
|
81
|
+
input_params = {
|
|
80
82
|
'action': 'create_voice',
|
|
81
83
|
'target_model': target_model,
|
|
82
84
|
'prefix': prefix,
|
|
83
|
-
'url': url
|
|
84
|
-
}
|
|
85
|
+
'url': url
|
|
86
|
+
}
|
|
87
|
+
if language_hints is not None:
|
|
88
|
+
input_params['language_hints'] = language_hints
|
|
89
|
+
response = self.__call_with_input(input_params)
|
|
85
90
|
self._last_request_id = response.request_id
|
|
86
91
|
if response.status_code == 200:
|
|
87
92
|
return response.output['voice_id']
|
|
@@ -72,7 +72,31 @@ class RequestBodyInput(DashPayloadInput):
|
|
|
72
72
|
"directive": self.directive,
|
|
73
73
|
"dialog_id": self.dialog_id
|
|
74
74
|
}
|
|
75
|
+
@dataclass
|
|
76
|
+
class AsrPostProcessing:
|
|
77
|
+
replace_words: list = field(default=None)
|
|
78
|
+
|
|
79
|
+
def to_dict(self):
|
|
80
|
+
if self.replace_words is None:
|
|
81
|
+
return None
|
|
82
|
+
if len(self.replace_words) == 0:
|
|
83
|
+
return None
|
|
84
|
+
return {
|
|
85
|
+
"replace_words": [word.to_dict() for word in self.replace_words]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class ReplaceWord:
|
|
90
|
+
source: str = field(default=None)
|
|
91
|
+
target: str = field(default=None)
|
|
92
|
+
match_mode: str = field(default=None)
|
|
75
93
|
|
|
94
|
+
def to_dict(self):
|
|
95
|
+
return {
|
|
96
|
+
"source": self.source,
|
|
97
|
+
"target": self.target,
|
|
98
|
+
"match_mode": self.match_mode
|
|
99
|
+
}
|
|
76
100
|
|
|
77
101
|
@dataclass
|
|
78
102
|
class Upstream:
|
|
@@ -80,7 +104,9 @@ class Upstream:
|
|
|
80
104
|
audio_format: str = field(default="pcm") # 上行语音格式,默认pcm.支持pcm/opus
|
|
81
105
|
type: str = field(default="AudioOnly") # 上行类型:AudioOnly 仅语音通话; AudioAndVideo 上传视频
|
|
82
106
|
mode: str = field(default="tap2talk") # 客户端交互模式 push2talk/tap2talk/duplex
|
|
83
|
-
|
|
107
|
+
sample_rate: int = field(default=16000) # 音频采样率
|
|
108
|
+
vocabulary_id: str = field(default=None)
|
|
109
|
+
asr_post_processing: AsrPostProcessing = field(default=None)
|
|
84
110
|
pass_through_params: dict = field(default=None)
|
|
85
111
|
|
|
86
112
|
def to_dict(self):
|
|
@@ -88,8 +114,12 @@ class Upstream:
|
|
|
88
114
|
"type": self.type,
|
|
89
115
|
"mode": self.mode,
|
|
90
116
|
"audio_format": self.audio_format,
|
|
91
|
-
|
|
117
|
+
"sample_rate": self.sample_rate,
|
|
118
|
+
"vocabulary_id": self.vocabulary_id,
|
|
92
119
|
}
|
|
120
|
+
if self.asr_post_processing is not None:
|
|
121
|
+
upstream["asr_post_processing"] = self.asr_post_processing.to_dict()
|
|
122
|
+
|
|
93
123
|
if self.pass_through_params is not None:
|
|
94
124
|
upstream.update(self.pass_through_params)
|
|
95
125
|
return upstream
|
dashscope/version.py
CHANGED
|
@@ -3,7 +3,7 @@ dashscope/cli.py,sha256=64oGkevgX0RHPPmMg0sevXDgaFLQNA_0vdtjQ7Z2pHM,26492
|
|
|
3
3
|
dashscope/files.py,sha256=vRDQygm3lOqBZR73o7KNHs1iTBVuvLncuwJNxIYjzAU,3981
|
|
4
4
|
dashscope/model.py,sha256=B5v_BtYLPqj6raClejBgdKg6WTGwhH_f-20pvsQqmsk,1491
|
|
5
5
|
dashscope/models.py,sha256=dE4mzXkl85G343qVylSGpURPRdA5pZSqXlx6PcxqC_Q,1275
|
|
6
|
-
dashscope/version.py,sha256=
|
|
6
|
+
dashscope/version.py,sha256=MtrJg1M58kTZJEqP85UuTy6JbT8pmsUXF5os_JzMTWI,74
|
|
7
7
|
dashscope/aigc/__init__.py,sha256=kYvYEoRK-NUHyMWpBDNQBz4fVA__uOhHRK2kDTBaWgk,617
|
|
8
8
|
dashscope/aigc/chat_completion.py,sha256=ONlyyssIbfaKKcFo7cEKhHx5OCF2XX810HFzIExW1ho,14813
|
|
9
9
|
dashscope/aigc/code_generation.py,sha256=p_mxDKJLQMW0IjFD46JRlZuEZCRESSVKEfLlAevBtqw,10936
|
|
@@ -32,12 +32,12 @@ dashscope/assistants/files.py,sha256=CaQkZK7TFeMaAxtqMi-1rBVJrlKXdehZG9plNZ6zslo
|
|
|
32
32
|
dashscope/audio/__init__.py,sha256=7e3ejVsDJxEbMHN-9E0nEDfU-CnnQ4JgtgUxqNs0IG4,192
|
|
33
33
|
dashscope/audio/asr/__init__.py,sha256=JoCenJAUVOQXPmAn1toKeFYCfc8BqNn0NKpqjuJvNJc,1055
|
|
34
34
|
dashscope/audio/asr/asr_phrase_manager.py,sha256=vHOLExaKCtjedkihIu7gyfQyarR9rN5JZn79LvlCpco,7693
|
|
35
|
-
dashscope/audio/asr/recognition.py,sha256
|
|
35
|
+
dashscope/audio/asr/recognition.py,sha256=-xMcdwHbjTV1RIQRQguzBzB5pKd1tskrDukLgzsJDi8,20999
|
|
36
36
|
dashscope/audio/asr/transcription.py,sha256=lYzPjh7jJQwjMoxx8-AY0YCMBKNKO0bi7xd5tZGSHPc,9094
|
|
37
37
|
dashscope/audio/asr/translation_recognizer.py,sha256=JgBmhkIl_kqH8uVwop6Fba5KlXccftKFrhaygN9PKjU,39680
|
|
38
38
|
dashscope/audio/asr/vocabulary.py,sha256=N0pMS2x1lDxqJ14FgTGKctfuVkR2_hlEsCNWFcgYpTY,6717
|
|
39
39
|
dashscope/audio/qwen_omni/__init__.py,sha256=MEFxmyxr5H6bW22l_R9073Pl6Ka6knvhrATGT-4UBjI,298
|
|
40
|
-
dashscope/audio/qwen_omni/omni_realtime.py,sha256=
|
|
40
|
+
dashscope/audio/qwen_omni/omni_realtime.py,sha256=b7t14nsciA8YcJ4MGr2GzmDxbgBR2wpbml9ZG_GNqiI,16722
|
|
41
41
|
dashscope/audio/qwen_tts/__init__.py,sha256=JS3axY1grqO0aTIJufZ3KS1JsU6yf6y4K2CQlNvUK9I,132
|
|
42
42
|
dashscope/audio/qwen_tts/speech_synthesizer.py,sha256=7LHR-PXhn-VE1cCOp_82Jq0zE9rMc3xy3dszUeyLLNs,2927
|
|
43
43
|
dashscope/audio/qwen_tts_realtime/__init__.py,sha256=vVkmeJr_mEAn_O0Rh5AU3ICg6qIZqppUryJ5lY8VYPo,254
|
|
@@ -45,7 +45,7 @@ dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py,sha256=uMLglxRjUZgol9Z7MT
|
|
|
45
45
|
dashscope/audio/tts/__init__.py,sha256=xYpMFseUZGgqgj_70zcX2VsLv-L7qxJ3d-bbdj_hO0I,245
|
|
46
46
|
dashscope/audio/tts/speech_synthesizer.py,sha256=vD1xQV-rew8qAsIaAGH5amsNtB0SqdtNhVHhJHGQ-xk,7622
|
|
47
47
|
dashscope/audio/tts_v2/__init__.py,sha256=me9a3_7KsHQxcJ8hx4SeKlY1e_ThHVvGMw7Yn0uoscM,333
|
|
48
|
-
dashscope/audio/tts_v2/enrollment.py,sha256
|
|
48
|
+
dashscope/audio/tts_v2/enrollment.py,sha256=ekeZJz_swhI0OwRANuUwsZjdP0rRoUergSsCUQmsh8E,6577
|
|
49
49
|
dashscope/audio/tts_v2/speech_synthesizer.py,sha256=p764P4TYwLkvvPCpA4VnFwlNbIJbuNbp2d9mxgni7Ws,22047
|
|
50
50
|
dashscope/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
dashscope/client/base_api.py,sha256=znAJ65DeHiFw1H7FWK0YrkLz1CoNcyqUxF8EJ3gujeY,52523
|
|
@@ -73,7 +73,7 @@ dashscope/multimodal/__init__.py,sha256=fyqeolbDLWVn5wSpPZ3nAOnUBRF9k6mlsy6dCmgj
|
|
|
73
73
|
dashscope/multimodal/dialog_state.py,sha256=CtOdfGWhq0ePG3bc8-7inhespETtPD4QDli1513hd1A,1522
|
|
74
74
|
dashscope/multimodal/multimodal_constants.py,sha256=z_QVq01E43FAqKQnDu9vdf89d1zuYlWyANewWTEXVJM,1282
|
|
75
75
|
dashscope/multimodal/multimodal_dialog.py,sha256=HymlaQYp7SgJdoKbT27SNiviyRRoM91zklNBwTHmm1Q,23939
|
|
76
|
-
dashscope/multimodal/multimodal_request_params.py,sha256=
|
|
76
|
+
dashscope/multimodal/multimodal_request_params.py,sha256=iOnATOdv4aRp5ffU8lY2Gu0UNlz-sBCEun2zFG4saFk,9356
|
|
77
77
|
dashscope/multimodal/tingwu/__init__.py,sha256=Gi9GEM0bdeJlZpvyksSeHOc2--_tG5aF6QAx6TAS2fE,225
|
|
78
78
|
dashscope/multimodal/tingwu/tingwu.py,sha256=01d-QOeuB1QmRhiZqbXJ8pHoGqT0C-xZTjIs_ZBXOyw,2613
|
|
79
79
|
dashscope/multimodal/tingwu/tingwu_realtime.py,sha256=oBeqrZit3uBZHuyI7m9VILz2qaqJRMO0-Nm2eJ5Q63g,20215
|
|
@@ -100,9 +100,9 @@ dashscope/tokenizers/tokenizer.py,sha256=3FQVDvMNkCW9ccYeJdjrd_PIMMD3Xv7aNZkaYOE
|
|
|
100
100
|
dashscope/tokenizers/tokenizer_base.py,sha256=5EJIFuizMWESEmLmbd38yJnfeHmPnzZPwsO4aOGjpl4,707
|
|
101
101
|
dashscope/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
102
|
dashscope/utils/oss_utils.py,sha256=aZIHlMN2JOfVw6kp0SVrMw_N1MfoTcR_-wiRbJ7DgHw,7501
|
|
103
|
-
dashscope-1.24.
|
|
104
|
-
dashscope-1.24.
|
|
105
|
-
dashscope-1.24.
|
|
106
|
-
dashscope-1.24.
|
|
107
|
-
dashscope-1.24.
|
|
108
|
-
dashscope-1.24.
|
|
103
|
+
dashscope-1.24.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
104
|
+
dashscope-1.24.9.dist-info/METADATA,sha256=30oaoqG5k4PuhwOh7ndPEvAYH1cFJ3JneX58N8id_qE,7146
|
|
105
|
+
dashscope-1.24.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
106
|
+
dashscope-1.24.9.dist-info/entry_points.txt,sha256=e9C3sOf9zDYL0O5ROEGX6FT8w-QK_kaGRWmPZDHAFys,49
|
|
107
|
+
dashscope-1.24.9.dist-info/top_level.txt,sha256=woqavFJK9zas5xTqynmALqOtlafghjsk63Xk86powTU,10
|
|
108
|
+
dashscope-1.24.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|