cloudlanguagetools 11.5.0__tar.gz → 11.6.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/PKG-INFO +1 -1
- cloudlanguagetools-11.6.0/cloudlanguagetools/alibaba.py +264 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/amazon.py +19 -8
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/constants.py +2 -1
- cloudlanguagetools-11.6.0/cloudlanguagetools/errors.py +26 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/google.py +16 -3
- cloudlanguagetools-11.6.0/cloudlanguagetools/keys.py +1 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/service.py +2 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/servicemanager.py +2 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/PKG-INFO +1 -1
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/SOURCES.txt +1 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/setup.py +1 -1
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_audio.py +112 -40
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_translation.py +22 -20
- cloudlanguagetools-11.5.0/cloudlanguagetools/errors.py +0 -15
- cloudlanguagetools-11.5.0/cloudlanguagetools/keys.py +0 -1
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/LICENSE +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/README.rst +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/__init__.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/argostranslate.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/audio_processing.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/azure.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/cereproc.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/chatapi.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/deepl.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/dictionarylookup.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/easypronunciation.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/elevenlabs.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/encryption.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/epitran.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/forvo.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/fptai.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/languages.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/libretranslate.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/mandarincantonese.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/naver.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/openai.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/options.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/pythainlp.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/spacy.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/test_services.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/tokenization.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/translationlanguage.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/transliterationlanguage.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/ttsvoice.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/vocalware.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/voicen.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/watson.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/wenlin.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/dependency_links.txt +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/requires.txt +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/top_level.txt +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/setup.cfg +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_breakdown.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_chatapi.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_dictionary_lookup.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_llm.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_mock_services.py +0 -0
- {cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/tests/test_servicemanager.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cloudlanguagetools
|
3
|
-
Version: 11.
|
3
|
+
Version: 11.6.0
|
4
4
|
Summary: Interface with various cloud APIs for language processing such as translation, text to speech
|
5
5
|
Home-page: https://github.com/Language-Tools/cloud-language-tools-core
|
6
6
|
Author: Luc
|
@@ -0,0 +1,264 @@
|
|
1
|
+
import json
|
2
|
+
import requests
|
3
|
+
import time
|
4
|
+
import datetime
|
5
|
+
import uuid
|
6
|
+
import urllib
|
7
|
+
import hmac
|
8
|
+
import base64
|
9
|
+
import logging
|
10
|
+
import tempfile
|
11
|
+
from typing import List
|
12
|
+
|
13
|
+
import cloudlanguagetools.service
|
14
|
+
import cloudlanguagetools.options
|
15
|
+
import cloudlanguagetools.constants
|
16
|
+
import cloudlanguagetools.languages
|
17
|
+
import cloudlanguagetools.ttsvoice
|
18
|
+
import cloudlanguagetools.constants
|
19
|
+
import cloudlanguagetools.languages
|
20
|
+
import cloudlanguagetools.ttsvoice
|
21
|
+
import cloudlanguagetools.errors
|
22
|
+
from cloudlanguagetools.options import AudioFormat
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
ALIBABA_VOICE_SPEED_DEFAULT = 0
|
27
|
+
ALIBABA_VOICE_PITCH_DEFAULT = 0
|
28
|
+
|
29
|
+
VOICE_OPTIONS = {
|
30
|
+
'speed': {'type': 'number_int', 'min': -500, 'max': 500, 'default': ALIBABA_VOICE_SPEED_DEFAULT},
|
31
|
+
'pitch': {'type': 'number_int', 'min': -500, 'max': 500, 'default': ALIBABA_VOICE_PITCH_DEFAULT},
|
32
|
+
'volume': {'type': 'number_int', 'min': 0, 'max': 100, 'default': 50},
|
33
|
+
cloudlanguagetools.options.AUDIO_FORMAT_PARAMETER: {
|
34
|
+
'type': cloudlanguagetools.options.ParameterType.list.name,
|
35
|
+
'values': ['pcm', 'wav', 'mp3'],
|
36
|
+
'default': 'mp3'
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
|
41
|
+
class AlibabaService(cloudlanguagetools.service.Service):
|
42
|
+
def __init__(self):
|
43
|
+
self.access_token = None
|
44
|
+
|
45
|
+
def configure(self, config):
|
46
|
+
self.access_key_id = config['access_key_id']
|
47
|
+
self.access_key_secret = config['access_key_secret']
|
48
|
+
self.app_key = config['app_key']
|
49
|
+
|
50
|
+
def refresh_token(self):
|
51
|
+
logger.info("refreshing token")
|
52
|
+
params = {
|
53
|
+
"AccessKeyId": self.access_key_id,
|
54
|
+
"Action": "CreateToken",
|
55
|
+
"Version": "2019-07-17",
|
56
|
+
"Format": "JSON",
|
57
|
+
"RegionId": "ap-southeast-1",
|
58
|
+
"Timestamp": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
59
|
+
"SignatureMethod": "HMAC-SHA1",
|
60
|
+
"SignatureVersion": "1.0",
|
61
|
+
"SignatureNonce": str(uuid.uuid4())
|
62
|
+
}
|
63
|
+
|
64
|
+
# sort by keys alphabetically
|
65
|
+
params = dict(sorted(params.items()))
|
66
|
+
# timestamp needs to be double-quoted by the end
|
67
|
+
params["Timestamp"] = urllib.parse.quote(params["Timestamp"], safe='')
|
68
|
+
|
69
|
+
# urlencode with noop lambda for no quoting - we will quote later
|
70
|
+
params_str = urllib.parse.urlencode(params, quote_via=lambda a, b, c, d: a)
|
71
|
+
|
72
|
+
# this is always /, as we always hit the path / on the API
|
73
|
+
url_encoded = urllib.parse.quote("/", safe='')
|
74
|
+
str_to_sign = f"GET&{url_encoded}&{urllib.parse.quote(params_str, safe='')}"
|
75
|
+
str_to_sign = str_to_sign.encode("utf-8")
|
76
|
+
|
77
|
+
key = self.access_key_secret + "&"
|
78
|
+
key = key.encode("utf-8")
|
79
|
+
|
80
|
+
# calculate HMAC-SHA1 digest and convert to base64
|
81
|
+
dig = hmac.new(key, str_to_sign, "sha1").digest()
|
82
|
+
dig = base64.standard_b64encode(dig).decode("utf-8")
|
83
|
+
|
84
|
+
# signature also needs to be quoted
|
85
|
+
signature = urllib.parse.quote(dig, safe='')
|
86
|
+
params_str = f"Signature={signature}&{params_str}"
|
87
|
+
|
88
|
+
response = requests.get(f"http://nlsmeta.ap-southeast-1.aliyuncs.com/?{params_str}")
|
89
|
+
|
90
|
+
if response.status_code != 200:
|
91
|
+
logger.warning(f"Token request failed: {response.text}")
|
92
|
+
raise cloudlanguagetools.errors.RequestError("Token request failed", None, response.text)
|
93
|
+
|
94
|
+
data = response.json()
|
95
|
+
self.access_token = data["Token"]
|
96
|
+
logger.info(f"Got access token: {self.access_token}")
|
97
|
+
|
98
|
+
def get_tts_audio(self, text, voice_key, options):
|
99
|
+
if not self.access_token or self.access_token["ExpireTime"] <= int(time.time()):
|
100
|
+
self.refresh_token()
|
101
|
+
|
102
|
+
speed = int(options.get('speed', ALIBABA_VOICE_SPEED_DEFAULT))
|
103
|
+
pitch = int(options.get('pitch', ALIBABA_VOICE_PITCH_DEFAULT))
|
104
|
+
voice = voice_key['name']
|
105
|
+
|
106
|
+
params = {
|
107
|
+
"format": "mp3",
|
108
|
+
"appkey": self.app_key,
|
109
|
+
"speech_rate": speed,
|
110
|
+
"pitch_rate": pitch,
|
111
|
+
"text": text,
|
112
|
+
"token": self.access_token["Id"],
|
113
|
+
"voice": voice
|
114
|
+
}
|
115
|
+
|
116
|
+
response = requests.get(
|
117
|
+
"https://nls-gateway-ap-southeast-1.aliyuncs.com/stream/v1/tts",
|
118
|
+
params=params,
|
119
|
+
timeout=cloudlanguagetools.constants.RequestTimeout
|
120
|
+
)
|
121
|
+
|
122
|
+
if response.status_code != 200:
|
123
|
+
data = response.json()
|
124
|
+
error_message = data.get('message', str(data))
|
125
|
+
logger.warning(error_message)
|
126
|
+
raise cloudlanguagetools.errors.RequestError(text, voice, error_message)
|
127
|
+
|
128
|
+
if response.headers['Content-Type'] != 'audio/mpeg':
|
129
|
+
logger.warning(f'Unexpected response type. Response as text: {response.text}')
|
130
|
+
raise cloudlanguagetools.errors.RequestError(
|
131
|
+
text, voice,
|
132
|
+
f'Got bad content type in response: {response.headers["Content-Type"]}'
|
133
|
+
)
|
134
|
+
|
135
|
+
# Create a temporary file and write the audio content to it
|
136
|
+
output_temp_file = tempfile.NamedTemporaryFile(prefix=f'cloudlanguage_tools_{self.__class__.__name__}_audio', suffix='.mp3')
|
137
|
+
output_temp_file.write(response.content)
|
138
|
+
output_temp_file.flush()
|
139
|
+
return output_temp_file
|
140
|
+
|
141
|
+
def get_tts_voice_list(self):
|
142
|
+
# returns list of TtsVoice
|
143
|
+
return []
|
144
|
+
|
145
|
+
def build_tts_voice_v3(self, name: str, voice_id: str, gender: cloudlanguagetools.constants.Gender, audio_languages: List[cloudlanguagetools.languages.AudioLanguage]):
|
146
|
+
return cloudlanguagetools.ttsvoice.TtsVoice_v3(
|
147
|
+
name=name,
|
148
|
+
voice_key={'name': voice_id},
|
149
|
+
options=VOICE_OPTIONS,
|
150
|
+
service=cloudlanguagetools.constants.Service.Alibaba,
|
151
|
+
gender=gender,
|
152
|
+
audio_languages=audio_languages,
|
153
|
+
service_fee=cloudlanguagetools.constants.ServiceFee.paid
|
154
|
+
)
|
155
|
+
|
156
|
+
def get_tts_voice_list_v3(self) -> List[cloudlanguagetools.ttsvoice.TtsVoice_v3]:
|
157
|
+
result = []
|
158
|
+
|
159
|
+
# Standard voices for all scenarios
|
160
|
+
result.extend([
|
161
|
+
# Standard Chinese voices
|
162
|
+
self.build_tts_voice_v3('Xiaoyun (Standard)', 'Xiaoyun',
|
163
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
164
|
+
self.build_tts_voice_v3('Xiaogang (Standard)', 'Xiaogang',
|
165
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
166
|
+
self.build_tts_voice_v3('Ruoxi (Gentle)', 'Ruoxi',
|
167
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
168
|
+
self.build_tts_voice_v3('Siqi (Gentle)', 'Siqi',
|
169
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
170
|
+
self.build_tts_voice_v3('Sijia (Standard)', 'Sijia',
|
171
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
172
|
+
self.build_tts_voice_v3('Sicheng (Standard)', 'Sicheng',
|
173
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
174
|
+
self.build_tts_voice_v3('Aiqi (Gentle)', 'Aiqi',
|
175
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
176
|
+
self.build_tts_voice_v3('Aijia (Standard)', 'Aijia',
|
177
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
178
|
+
self.build_tts_voice_v3('Aicheng (Standard)', 'Aicheng',
|
179
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
180
|
+
self.build_tts_voice_v3('Aida (Standard)', 'Aida',
|
181
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
182
|
+
self.build_tts_voice_v3("Ning'er", 'Ninger',
|
183
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
184
|
+
self.build_tts_voice_v3('Ruilin', 'Ruilin',
|
185
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
186
|
+
|
187
|
+
# Customer service voices
|
188
|
+
self.build_tts_voice_v3('Siyue (Gentle)', 'Siyue',
|
189
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
190
|
+
self.build_tts_voice_v3('Aiya (Harsh)', 'Aiya',
|
191
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
192
|
+
self.build_tts_voice_v3('Aixia (Amiable)', 'Aixia',
|
193
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
194
|
+
self.build_tts_voice_v3('Aimei (Sweet)', 'Aimei',
|
195
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
196
|
+
self.build_tts_voice_v3('Aiyu (Natural)', 'Aiyu',
|
197
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
198
|
+
self.build_tts_voice_v3('Aiyue (Gentle)', 'Aiyue',
|
199
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
200
|
+
self.build_tts_voice_v3('Aijing (Harsh)', 'Aijing',
|
201
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
202
|
+
self.build_tts_voice_v3('Xiaomei (Sweet)', 'Xiaomei',
|
203
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
204
|
+
|
205
|
+
# Regional accent voices
|
206
|
+
self.build_tts_voice_v3('Aina (Zhejiang)', 'Aina',
|
207
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
208
|
+
self.build_tts_voice_v3('Yina (Zhejiang)', 'Yina',
|
209
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
210
|
+
self.build_tts_voice_v3('Sijing (Harsh)', 'Sijing',
|
211
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
212
|
+
|
213
|
+
# Child voices
|
214
|
+
self.build_tts_voice_v3('Sitong (Child)', 'Sitong',
|
215
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
216
|
+
self.build_tts_voice_v3('Xiaobei (Little Girl)', 'Xiaobei',
|
217
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
218
|
+
self.build_tts_voice_v3('Aitong (Child)', 'Aitong',
|
219
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
220
|
+
self.build_tts_voice_v3('Aiwei (Little Girl)', 'Aiwei',
|
221
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
222
|
+
self.build_tts_voice_v3('Aibao (Little Girl)', 'Aibao',
|
223
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
224
|
+
|
225
|
+
# English voices
|
226
|
+
self.build_tts_voice_v3('Harry (British)', 'Harry',
|
227
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
228
|
+
self.build_tts_voice_v3('Abby (American)', 'Abby',
|
229
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_US]),
|
230
|
+
self.build_tts_voice_v3('Andy (American)', 'Andy',
|
231
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.en_US]),
|
232
|
+
self.build_tts_voice_v3('Eric (British)', 'Eric',
|
233
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
234
|
+
self.build_tts_voice_v3('Emily (British)', 'Emily',
|
235
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
236
|
+
self.build_tts_voice_v3('Luna (British)', 'Luna',
|
237
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
238
|
+
self.build_tts_voice_v3('Luca (British)', 'Luca',
|
239
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
240
|
+
self.build_tts_voice_v3('Wendy (British)', 'Wendy',
|
241
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
242
|
+
self.build_tts_voice_v3('William (British)', 'William',
|
243
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
244
|
+
self.build_tts_voice_v3('Olivia (British)', 'Olivia',
|
245
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_GB]),
|
246
|
+
|
247
|
+
# Special dialect voices
|
248
|
+
self.build_tts_voice_v3('Shanshan (Cantonese)', 'Shanshan',
|
249
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_HK]),
|
250
|
+
self.build_tts_voice_v3('Xiaoyue (Sichuan)', 'Xiaoyue',
|
251
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
252
|
+
self.build_tts_voice_v3('Lydia (Bilingual)', 'Lydia',
|
253
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.en_US]),
|
254
|
+
self.build_tts_voice_v3('Aishuo (Natural)', 'Aishuo',
|
255
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN, cloudlanguagetools.languages.AudioLanguage.en_US]),
|
256
|
+
self.build_tts_voice_v3('Qingqing (Taiwanese)', 'Qingqing',
|
257
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
258
|
+
self.build_tts_voice_v3('Cuijie (Northeastern)', 'Cuijie',
|
259
|
+
cloudlanguagetools.constants.Gender.Female, [cloudlanguagetools.languages.AudioLanguage.zh_CN]),
|
260
|
+
self.build_tts_voice_v3('Xiaoze (Hunan)', 'Xiaoze',
|
261
|
+
cloudlanguagetools.constants.Gender.Male, [cloudlanguagetools.languages.AudioLanguage.zh_CN])
|
262
|
+
])
|
263
|
+
|
264
|
+
return result
|
@@ -5,7 +5,8 @@ import os
|
|
5
5
|
import boto3
|
6
6
|
import botocore.exceptions
|
7
7
|
import contextlib
|
8
|
-
|
8
|
+
import logging
|
9
|
+
import pprint
|
9
10
|
|
10
11
|
import cloudlanguagetools.service
|
11
12
|
import cloudlanguagetools.constants
|
@@ -19,6 +20,8 @@ import cloudlanguagetools.audio_processing
|
|
19
20
|
|
20
21
|
from cloudlanguagetools.options import AudioFormat
|
21
22
|
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
22
25
|
DEFAULT_VOICE_PITCH = 0
|
23
26
|
DEFAULT_VOICE_RATE = 100
|
24
27
|
|
@@ -36,7 +39,7 @@ def get_audio_language_enum(language_code):
|
|
36
39
|
return cloudlanguagetools.languages.AudioLanguage[language_enum_name]
|
37
40
|
|
38
41
|
class AmazonVoice(cloudlanguagetools.ttsvoice.TtsVoice):
|
39
|
-
def __init__(self, voice_data):
|
42
|
+
def __init__(self, voice_data, engine: str):
|
40
43
|
# print(voice_data)
|
41
44
|
# {'Gender': 'Female', 'Id': 'Lotte', 'LanguageCode': 'nl-NL', 'LanguageName': 'Dutch', 'Name': 'Lotte', 'SupportedEngines': ['standard']}
|
42
45
|
self.service = cloudlanguagetools.constants.Service.Amazon
|
@@ -45,9 +48,7 @@ class AmazonVoice(cloudlanguagetools.ttsvoice.TtsVoice):
|
|
45
48
|
self.voice_id = voice_data['Id']
|
46
49
|
self.name = voice_data['Name']
|
47
50
|
self.audio_language = get_audio_language_enum(voice_data['LanguageCode'])
|
48
|
-
self.engine =
|
49
|
-
if 'neural' in voice_data['SupportedEngines']:
|
50
|
-
self.engine = 'neural'
|
51
|
+
self.engine = engine
|
51
52
|
|
52
53
|
def get_voice_key(self):
|
53
54
|
return {
|
@@ -125,6 +126,8 @@ class AmazonService(cloudlanguagetools.service.Service):
|
|
125
126
|
AudioFormat.wav: 'pcm'
|
126
127
|
}, options, AudioFormat.mp3)
|
127
128
|
|
129
|
+
logger.info(f'generating audio with voice {voice_key}')
|
130
|
+
|
128
131
|
# wav, we need to convert as described here:
|
129
132
|
# https://aws.amazon.com/blogs/machine-learning/integrating-amazon-polly-with-legacy-ivr-systems-by-converting-output-to-wav-format/
|
130
133
|
|
@@ -141,13 +144,13 @@ class AmazonService(cloudlanguagetools.service.Service):
|
|
141
144
|
# pitch not supported on neural voices
|
142
145
|
prosody_tags = f'rate="{rate_str}"'
|
143
146
|
|
144
|
-
|
145
147
|
ssml_str = f"""<speak>
|
146
148
|
<prosody {prosody_tags} >
|
147
149
|
{text}
|
148
150
|
</prosody>
|
149
151
|
</speak>"""
|
150
152
|
|
153
|
+
|
151
154
|
try:
|
152
155
|
if audio_format == cloudlanguagetools.options.AudioFormat.wav:
|
153
156
|
response = self.polly_client.synthesize_speech(Text=ssml_str,
|
@@ -157,8 +160,14 @@ class AmazonService(cloudlanguagetools.service.Service):
|
|
157
160
|
Engine=voice_key['engine'],
|
158
161
|
SampleRate="16000")
|
159
162
|
else:
|
160
|
-
|
163
|
+
if voice_key['engine'] in ['generative', 'long-form']:
|
164
|
+
logger.info(f'voice: {voice_key}, generating text format: {text}')
|
165
|
+
response = self.polly_client.synthesize_speech(Text=text, TextType="text", OutputFormat=response_format_parameter, VoiceId=voice_key['voice_id'], Engine=voice_key['engine'])
|
166
|
+
else:
|
167
|
+
logger.info(f'voice: {voice_key}, generating ssml format: {ssml_str}')
|
168
|
+
response = self.polly_client.synthesize_speech(Text=ssml_str, TextType="ssml", OutputFormat=response_format_parameter, VoiceId=voice_key['voice_id'], Engine=voice_key['engine'])
|
161
169
|
except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as error:
|
170
|
+
logger.error(f'Amazon Polly exception: {type(error)}')
|
162
171
|
raise cloudlanguagetools.errors.RequestError(str(error))
|
163
172
|
|
164
173
|
if "AudioStream" in response:
|
@@ -187,7 +196,9 @@ class AmazonService(cloudlanguagetools.service.Service):
|
|
187
196
|
response = self.polly_client.describe_voices()
|
188
197
|
# print(response['Voices'])
|
189
198
|
for voice in response['Voices']:
|
190
|
-
|
199
|
+
logger.debug(f'voice: {pprint.pformat(voice)}')
|
200
|
+
for engine in voice['SupportedEngines']:
|
201
|
+
result.append(AmazonVoice(voice, engine))
|
191
202
|
return result
|
192
203
|
|
193
204
|
|
@@ -82,6 +82,7 @@ class Service(StrEnum):
|
|
82
82
|
Wenlin = 'Wenlin'
|
83
83
|
LibreTranslate = 'LibreTranslate'
|
84
84
|
ElevenLabs = 'ElevenLabs'
|
85
|
+
Alibaba = 'Alibaba'
|
85
86
|
TestServiceA = 'TestServiceA'
|
86
87
|
TestServiceB = 'TestServiceB'
|
87
88
|
|
@@ -95,4 +96,4 @@ class DictionaryLookupType(enum.Enum):
|
|
95
96
|
Definitions = enum.auto()
|
96
97
|
PartOfSpeech = enum.auto()
|
97
98
|
MeasureWord = enum.auto()
|
98
|
-
PartOfSpeechDefinitions = enum.auto()
|
99
|
+
PartOfSpeechDefinitions = enum.auto()
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
# these exceptions can be retried
|
3
|
+
class TransientError(Exception):
|
4
|
+
pass
|
5
|
+
|
6
|
+
# no need to retry, something is wrong which won't be fixed on retry
|
7
|
+
class PermanentError(Exception):
|
8
|
+
pass
|
9
|
+
|
10
|
+
class InputError(PermanentError):
|
11
|
+
pass
|
12
|
+
|
13
|
+
class ApiKeyNotFoundError(PermanentError):
|
14
|
+
pass
|
15
|
+
|
16
|
+
class RequestError(TransientError):
|
17
|
+
pass
|
18
|
+
|
19
|
+
class TimeoutError(TransientError):
|
20
|
+
pass
|
21
|
+
|
22
|
+
class NotFoundError(PermanentError):
|
23
|
+
pass
|
24
|
+
|
25
|
+
class OverQuotaError(PermanentError):
|
26
|
+
pass
|
@@ -3,6 +3,7 @@ import tempfile
|
|
3
3
|
import html
|
4
4
|
import base64
|
5
5
|
import logging
|
6
|
+
import pprint
|
6
7
|
import google.cloud.texttospeech
|
7
8
|
import google.cloud.translate_v2
|
8
9
|
import google.api_core.exceptions
|
@@ -37,6 +38,7 @@ GENDER_MAP = {
|
|
37
38
|
class GoogleVoice(cloudlanguagetools.ttsvoice.TtsVoice):
|
38
39
|
def __init__(self, voice_data):
|
39
40
|
logger.debug(f'processing voice {voice_data}')
|
41
|
+
logger.debug(pprint.pformat(voice_data))
|
40
42
|
self.service = cloudlanguagetools.constants.Service.Google
|
41
43
|
self.service_fee = cloudlanguagetools.constants.ServiceFee.paid
|
42
44
|
self.name = voice_data.name
|
@@ -154,8 +156,6 @@ class GoogleService(cloudlanguagetools.service.Service):
|
|
154
156
|
try:
|
155
157
|
client = self.get_client()
|
156
158
|
|
157
|
-
ssml_text = '<speak>' + text + '</speak>'
|
158
|
-
input_text = google.cloud.texttospeech.SynthesisInput(ssml=ssml_text)
|
159
159
|
|
160
160
|
# Note: the voice can also be specified by name.
|
161
161
|
# Names of voices can be retrieved with client.list_voices().
|
@@ -171,8 +171,21 @@ class GoogleService(cloudlanguagetools.service.Service):
|
|
171
171
|
pitch=options.get('pitch', 0.0)
|
172
172
|
)
|
173
173
|
|
174
|
+
|
175
|
+
# prepare speech request
|
176
|
+
ssml_text = '<speak>' + text + '</speak>'
|
177
|
+
input_text = google.cloud.texttospeech.SynthesisInput(ssml=ssml_text)
|
178
|
+
|
179
|
+
# some voices don't support SSML and it's weirdly not documented
|
180
|
+
non_ssml_voices = ['Chirp', 'Journey']
|
181
|
+
non_ssml_voice_found = any(s in text for s in voice_key['name'])
|
182
|
+
if non_ssml_voice_found:
|
183
|
+
logger.info(f'with voice {voice_key}, use non-ssml input')
|
184
|
+
input_text = google.cloud.texttospeech.SynthesisInput(text=text)
|
185
|
+
|
186
|
+
speech_request = {"input": input_text, "voice": voice, "audio_config": audio_config}
|
174
187
|
response = client.synthesize_speech(
|
175
|
-
request=
|
188
|
+
request=speech_request
|
176
189
|
)
|
177
190
|
|
178
191
|
# The response's audio_content is binary.
|
@@ -0,0 +1 @@
|
|
1
|
+
KEYS='gAAAAABnrQvMFfc-BMsxOuIQoYBOy1DUx6trTwT-GWVije9JGGDmwhFHPGpxLGLH1V-CyDelTiHazv7IFK1mffZn6Zqk_Pd7QWPVd1PvOCxRuQdjRfI-FcEw_ORxbs2JMMc5LVkad7OMNgH-eEDJTQ8GNLD2PpoOpK1MVUv7_m612S-Hg3DeO44fZ4QYTGhsl12sSTTVRZCbK8Uy2URwc4uaeSRPRhlKJeZ1Ed3jMRI0slDZLYwUVmerdyWxDtZaDJS2n8AZ5Dd6QC4cswBigNtuCgmtZ4lG_2j6mN8Gc5jVsXFosCmewzgcrZ4E1BHYg3q7LZXzh1YydM_SmHHt95akzfIFVbuxSOila0jbiGuxDK0RKqj3O-ut1N0ZZBtxYlE_6VrBgCVHNllcjMxbDfpmTi0f6OzpYItquyDhV93xoGlOltRNh5_NvbIhd-L9NoqGvEEI7am5-_sHpGPlSHUAfCymgjc0e2ScvsT_kAdPkQfq3aUfN-ic-LwW7Zm0Yft_gpgNQr88l6O33sd3XLDKHhFFne_Hj45rEZxYscUzqjersnnFGAVLQ9M8LwVIXKEV3GiNsIJRnDW8E70LjE-oIj7AjI8hhoN49aCsS9E8Rc3y1LecwppZ0DWuunaKRHfC-o5z-EpFuDiJg-G3cTw0TvNFSAJrpMuZTo34JyXjccyIl9_-Ie86h0DRlO6dbW5wdpFcj4YTcSklhFQ9rPBMXaTR72l1W2Eq6YckeMu437HswlBggIY1jzjlXDMAhTbIUzOwSjFh-2ydANqtdueMwUce9LHnc6gIWap1gaPIWe1iRbrOyTpziFXNkTEmFwaVVhFitWxf7V0yP26QeedFVscv2F9STeq5haLtVUPDKYwjkjQVnzcSVRV3IQpPVTeqfn60QYisZHD7kZt3NX3i-Tk9lAR5PGYqmO6FLPeGQFLrG6lauuYkas2qQaP89XX0f__9geXw59T7Mip3Pipm8Cq_YUcqeqqItos2HsUW8VKxPZeeBAjqspUgywzJ7NbPBb7qVs-zAQZ_S75DGoPOIcZ4YhKXKAecZEv8A4dbCCeoJ3e8yJexKFkz10DTd6qDLYs1ncHFystCgm2bEAH9RamktCBGjEyGUpMSTv7l6WSe7kQZ9xUyutp9v2qsMGubvuDWPQy22HIpyw59M72M7uUDNh3tNYtriTliztHl3ZnSPazQOIkXBEPiHdSHaVaGuQXgN_0aUHMy3YTpNNHz3OMk2rhdzOD-CRBNKuMk6AT7f1TsOKzUwnfsiUv05VmQ9dZTbZRUCUl98oarNRLV3XDgbcwo6iCD5pXeCf0WkVbxJg6t9vvg7LP0vdalLgUO3tdYOaIXG1e8M6onk4wK5F3gJhFxjewqEf8PrFpNALxZegXymq-x9qGlmFYnXtbecSzcPryzNUIZ5S0DEBlCVSeg2b1n60WMLNpzyhoUs-pWgRl4UaHs9Y69Z-QbmhnMYYMfM9X99FxN0Bcfm0EU3TlBcdZIF3NQ_r3c3RNPdSRrbHayI9_J8ScPthBqSYvo0grstUJckELTr3TVhMccJtAcq7nS5zRRdJRmoQD93u_GntR2EjCxHVBOuxxd6HdPT0xOQReguJsEpyCta5RhJ9qytmL4eN8TirkXSDe27rKSQnWKNfE7vVpqs6Wz7cR_FH3lnUjkDQFhlqV29pojoWHqNwTf1ZX5lZJIaPwIbLW3JyxmYpFF1Gl3yOVEwXRaHWJrCtoFTcI6qUpnTf1d04ZPG160AtlITd4Zq6LwBB9nX3GdqNBDIz9hDSb4F7F5lEh4H7vfcfhtOzKzCyx2vcNi6ZClu23GyeO1CU39FkF2apC7XIs3GguId2e7z5mprkLcyoJMrfCdtJ65GmXVVMDKEePoDY3KtUXNMvsjEZhIjaRuYe_njHGQ6iGpdT-pq7U_AGYaWq0yur86wWcIPM593-9xEgdQEF1zlMJP-NGmvygWLJn2WFnnzBmMWEUL6hCoNYxfdk8x56cE9Le9FH3LFcnvQJxtOIE8xEfx_cf72qpMbuqTx92vMOt1sq0hPH3szbj1ZRGyoZ1xyJxhvrVJykqcJdef1jkRQs0-fRB4rUBh9YmshbaZh-DhJE_W7dRxSt0IgYR8avtrUb13i-somxs0MTaHevdRuX-jHgWoT97Tx4yS4dIkJuJnQzw54M1z-X5vckUTa7zbXXwSNZOLJQhTc7ttekRmW_qDtKRZzmGIl3xPRFIyaKXUW8k5W9igc5y8pr63ED8fJ5YcV__s7gv_3UbT2hWdKr4xX1dlktRnHJTYX9-hVgmmEh9NIWE-w2_sxL4oIBKbC-U5D9P0UdYxw02fe0XMaYxZKHIBzmyYFXdBAFZb1VbLhGZEv6V3vV1JIRo8uxzA-XK0xC8HOkAuKYGj9XEB-1SRMZAfZi1EGRiPwz23aXelargPrwbxMNKb0hPV0Vu4mYJy-E7Aq3BsV4dVA75vbXh1K-XpXfNwwsOohIUsdkLHqCn0t2qYe0oEMqMrHtP47H3I9FnS6ucDA_v2Cj47vSet8np5iIHM1npyYwlsxTHWYZ-ShWzg0f2CZ9HZft0Rv0voo4SS966bzfG8oKiB7-QgV79oI98pVM3lIBbL5Xy--cAaPfIg4Rs2eIy0e48IbYGffiCie12FSV18Wtz6MPDpCYOag4xRsJQAxHc-ixq-QfFXEe1lsA_1YkbL13xF2OtvEauZXUr3GuZNCwJA3uHGwME4A_HqJDXn9HKg7bvxE2S7klYRQD0Hp8tTGX2ACmPbmD_VB-0c3h8Bh5nehClOh2aC54xgaCGj9U6ZP39skE5kd8kYFHRH751Hg6IKl3ZI1GhWopmHGl1rN84t20R8tjlTX_b8r5M-xrgAEDD05Qo3CMBZSSSlGWbyOHXdJLJAu6wbbhmmdMYPnHEZLmRGuJerP9iO5BdrwABvMi0nmvbMakiiLlA2MAo9mF-UoAzGGcvJXlJrMaOTbuOz2z-pgDQRgRaZlurf7kcc8fkHrOu1OkEOdZUsbPsJHux-JX36J4qq6DZzFWF30fCIgEIHK2m24O-qz9yD7QCo7aU8lO0T-M2gX7BmRAl76_IcZHod2McFVBXC5GVJoxXNiBdfi6JGeDY3sI2vkheKVSYecmRziHingUfXNDO8exX9z-heXlh7cRbeFX3XYTWrh_Koz-fQaKjpPHncB9_ox6_230UKupnyLfPNgnBbjJFI6vbSHjkJD5S8o7q1gvcysj_qvznGIxbd5gCeaZG-zAqHxSCLcQPQShmuADE_OXyM9ZkFBod4QF8EN1m-WCUEzhii4dVqWwGZQAuA-MTVlVBxvZM2k8xoFhFkD45uWu6V2mln14hvxfSuYUZkeDAhHEVZfe8Le-8oaCT2IU8rJhzF8I-gEagcDRZANr4CfsRKHiWrp8blm_UJgFDEzhR7aUJjALDUHqynNcP9re2OAv5X2R6s3nVG-gJ4eu0EW1HnX9tJtxH5E8ehSbx0mGQzZuP5fYcJ3RRYIxk_i4PDCx7QRaawv9oi7E-UEcTDDLp2TUkytBNOlUJcNs3kqfmjqQV-GA5c50apyMcHwAvMCXbMAshLDMc8sH0VHDMejZHspBRyvnZXLAhdQ0CVa9Lde_BRv5hPxslqfHiUCSm6x9BcCbXqER0MRfm65TMK3so28pGtIMl8QhzFaIBHUN0SkxX1tM3DzIhNmXEp0BLF-gHRyUCGjLaUPCUtV8efwqQcv8hB3Vm1almBIIGX2co0KO79qaWfZHAhXH2SEecYLPns2wb4jMouCcJB91mpzUz-vod54Ks1C-wR4M7uKQF0q9_luWuOjDcfwVELNTdiEyuroum347H_S0b0-mksmjCEVzjrpfyrajVCIgaUOx6RbrW6dxbfR15nSssrdY-I8XbeHCccu1f5qdq1t8vx7lzrOXMrML9T04iNFF132tkG5y_SVnN0ZXpvrEAxd0IuMAEFvIujovs6IAoy37xBX-vAOP_KV74MscDxJiAroiRCab7d3YI5j8vStdgEEurFGQ-3DLRbggW5O3kM1FJygHF0givqqvo1S4br9uG1drgl9U2JuKPsB2s8QpGIUUi9g1VJ3k8AIg5sBeAlM5SEtrXsXoWMhd-JywBSZmpBpdRDPdazxANwC968B1ol2IApTBVnmFuXHtEKyhqM8mlpAq6jkyG0LZ41wgrHNtrGcjY9DCxP9JpmToGitZLxvyVP1A9uPhK5d6raIGJUNCcQvcNc5HUFkNxrSTMmFQ5VbHh-GqiTF8gDboPpr9jBTSNECYUk9N_jEZIrL1x4rbYyDZKLAws_7QBkln6h20X8VLd1l1HvgSaWC-nhpvvRsOjLblTM1HRy7h9ZZV7zeBGw5IKYH5ZdXqx46kgazJcoE6Ap-IZpmhZEj9gP_VA-SxFkec1gMH3awk68igpKVGaHgtWyLDBW62o4QwCkbN3PX7kKNxHSAkD0AtS6Uu6B6pcqGwdSidr1SPkQukygWIXdrLlWyjbnnKxax0BVbYfuBIYuZd0I-WQx4eZbTXRBMbgmlljMu_AlzfMsLy9fQhEUpr_J6ujJmCPbkGBC6FEzMNGmnDMZ5tk3D_QiWPPNJXAN-_WkriJOTXxvnPDtOX4qn_nnFBV6W7g4q_PV4Q_gle3H_yGsw8GkrSSW5AB6jZV774yLfANxWiEXtD_gpaBHm3rCl1LiXrWraA5f8iE4W-ZFqFslXzd9f72mTyBjbiTwHftLJppr_SbWZ04N4IIiOZbE7bxlAuq0INlOVpu8EoJcEgSSF0SuZrunW8s2qUSuNivnUeZoq2fMsfH4sZZ9OSO1xJQRRUjPPKS_97eK3_V-OxSwVcag2qD2RXWvPaAKJGVuJifOfskD_OtFcX6O1Jts0NfqB6hwFrNhs9wtulpsk90-zAF4LckzhDSEIjGEn5tozAVLvqkZpv8l-dc8X0Yb_T4ieWabHor3G778-ATP4B1xcCa4grJjSYLNhAljVKB5O_Wmc2PBYLu3BZYvZdnuyjge7YeuRHowOXlhGDUcO42ZzxFhD2sesIU2oXUKFg8Xj4Wiq1bHtd7PiRJ2M6KLfma8JXpfoKwqOFxnpM5ojlFwlKFbCBy4vRMwLeq6vw_iBfNK8xtgk4Aw6EE7XRYBCO98No4FTNixyRLfCqq9LAWI_Z1C0FvOMTkagdTmrhCafWvxKZPBqv7klfTS3biHzyqBhEWzVQBeMQYOC88QaXZ0iPPs1-pKW8ttpkj38WuhpCrDGR21KxeibHk6vlT9ajBysbkH_blDfUgAEHucIAdn-WkWzN_K18kSpLtoMaptqEF39DAlqmA80ox1-TQDaRc6cyPqzKuPVpFF2ly7mEa2yI5ifAIMmu-Ohi7XmXM57XZHXCz1GZSlxkN48hwU5Say8qUqV05djwmRgLaXYhcVdEbBv9Xjob31RpOEXQTRHQyZtz7C5VjfFcxYdrigsUrc6xH8pbNoDZiPMuDU8XWGcctCdBH2tzhmbRnayOGwkyt8q4xPMWbl3XtgHNLb_8OLfwSNQs-8cXHhaj_e2dDvRtLJqcM8WUJVSriW6Ra_q9OVdz2v4-utgJ0agcMEmuF6EogvlTF9vLVmBrPqrkboTaa35wnYR0mGFcnxnZ076DK7MxCIhSXxwYWMxsCvc_2FytMWQQpyqh9Pb7PputSHB5VIgVxWGAVPfNk_uGBoguRiDh7bYEevktSyGAyXWvUi7vc5terqztSFa4XCtrvonnHVxnNFEYX3LoqCnnZA3qDIRdcej-uJRGq3jdZlNVsml7Xhw9K4ABqEULHThFGdf3cDuewYt00ZTKxeTDbG5xcRW7D3f7PqgQZk2vj3jIdZZgROQmjHzNPdqMBsg8U_IapLkU63ZftxqldhgMza3jHayaAvFxLuEBX1YnZcBnXbVv2WEZGayhC1NaO6wQq4gT71mRF7ROq-1dKfEbRDmc26EehW20d6Lh0XPIxMquMwJZG-xD_hy40xmxlg9f-WvJU13OMVNkk-LC4GwBTLvOQ3aD0Kc0wgX0clZ08gxDIaFWZz33Qed5On__Ip7BQEsFqBkuEvSm-N_7_rPtyhIJj8QInn-9CTPLCHOSAjCUz01lheM5NBZcudmHhplVRJnl3Qj0_2aRGu6UjZJ-jX3tC-Par1GIZ3eK218iYKo5BOW23B1MIhbXkxLbraLO_0dUkSI_1qeq2OOfeQ0fAZ3Nbw5_UD6QHLDZOaB-8n5zAFxUZQ5WuwQBTnDoy7iMjUpBksmP02A72yR1Ia-DTxlA_7IwXa2q4gSH4feyEcejO3tYdDFs38UBP6Xlv9pJZDQ65lCAd0bp3yiVyuQtkcdvS0Xn5Xp-Nc43Z4Xl36FlVmll4zV6DEk69ome75QrMe1gIOQfo796-RrywrEho-KfOXSeSj9UG8OC8fXE4uR2KnVOa7yuVw_4gEM9aUheHFW9_6zJPb03__Ca5tSKJq6OEndhyWHWcjwIo7f2fTI7t_sMUQfXgnfXZYm7iHh4etwhoqUS94VOwyAV4VmDuhU7EsSIglPVvm'
|
@@ -24,6 +24,8 @@ class Service():
|
|
24
24
|
try:
|
25
25
|
kwargs['timeout'] = cloudlanguagetools.constants.RequestTimeout
|
26
26
|
response = self.post_request(url, **kwargs)
|
27
|
+
if response.status_code >= 400:
|
28
|
+
logger.error(f'{self.get_service_name()} audio request failed with status code {response.status_code}: {response.content}')
|
27
29
|
response.raise_for_status()
|
28
30
|
output_temp_file = tempfile.NamedTemporaryFile(prefix='clt_audio_')
|
29
31
|
output_temp_filename = output_temp_file.name
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/servicemanager.py
RENAMED
@@ -28,6 +28,7 @@ import cloudlanguagetools.spacy
|
|
28
28
|
import cloudlanguagetools.wenlin
|
29
29
|
import cloudlanguagetools.libretranslate
|
30
30
|
import cloudlanguagetools.openai
|
31
|
+
import cloudlanguagetools.alibaba
|
31
32
|
import cloudlanguagetools.encryption
|
32
33
|
import cloudlanguagetools.translationlanguage
|
33
34
|
|
@@ -63,6 +64,7 @@ class ServiceManager():
|
|
63
64
|
self.services[cloudlanguagetools.constants.Service.MandarinCantonese] = cloudlanguagetools.mandarincantonese.MandarinCantoneseService()
|
64
65
|
self.services[cloudlanguagetools.constants.Service.Wenlin] = cloudlanguagetools.wenlin.WenlinService()
|
65
66
|
self.services[cloudlanguagetools.constants.Service.OpenAI] = cloudlanguagetools.openai.OpenAIService()
|
67
|
+
self.services[cloudlanguagetools.constants.Service.Alibaba] = cloudlanguagetools.alibaba.AlibabaService()
|
66
68
|
|
67
69
|
def configure_default(self):
|
68
70
|
# use the stored keys to configure services
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cloudlanguagetools
|
3
|
-
Version: 11.
|
3
|
+
Version: 11.6.0
|
4
4
|
Summary: Interface with various cloud APIs for language processing such as translation, text to speech
|
5
5
|
Home-page: https://github.com/Language-Tools/cloud-language-tools-core
|
6
6
|
Author: Luc
|
@@ -6,7 +6,7 @@ from setuptools.command.install import install
|
|
6
6
|
# twine upload dist/*
|
7
7
|
|
8
8
|
setup(name='cloudlanguagetools',
|
9
|
-
version='11.
|
9
|
+
version='11.6.0',
|
10
10
|
description='Interface with various cloud APIs for language processing such as translation, text to speech',
|
11
11
|
long_description=open('README.rst', encoding='utf-8').read(),
|
12
12
|
url='https://github.com/Language-Tools/cloud-language-tools-core',
|
@@ -12,6 +12,8 @@ import json
|
|
12
12
|
import time
|
13
13
|
import pprint
|
14
14
|
import functools
|
15
|
+
import tempfile
|
16
|
+
import backoff
|
15
17
|
|
16
18
|
import audio_utils
|
17
19
|
|
@@ -22,6 +24,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')
|
|
22
24
|
import cloudlanguagetools
|
23
25
|
import cloudlanguagetools.servicemanager
|
24
26
|
import cloudlanguagetools.options
|
27
|
+
import cloudlanguagetools.errors
|
25
28
|
from cloudlanguagetools.languages import Language
|
26
29
|
from cloudlanguagetools.languages import AudioLanguage
|
27
30
|
from cloudlanguagetools.constants import Service
|
@@ -34,7 +37,7 @@ def get_manager():
|
|
34
37
|
|
35
38
|
return manager
|
36
39
|
|
37
|
-
|
40
|
+
BACKOFF_MAX_TIME=30
|
38
41
|
|
39
42
|
def skip_unreliable_clt_test():
|
40
43
|
def decorator(func):
|
@@ -47,6 +50,18 @@ def skip_unreliable_clt_test():
|
|
47
50
|
return decorator
|
48
51
|
|
49
52
|
|
53
|
+
@backoff.on_exception(backoff.expo,
|
54
|
+
requests.exceptions.RequestException,
|
55
|
+
max_time=BACKOFF_MAX_TIME)
|
56
|
+
def get_tts_voice_list_json_with_retry(manager):
|
57
|
+
return manager.get_tts_voice_list_json()
|
58
|
+
|
59
|
+
@backoff.on_exception(backoff.expo,
|
60
|
+
requests.exceptions.RequestException,
|
61
|
+
max_time=BACKOFF_MAX_TIME)
|
62
|
+
def get_tts_voice_list_v3_with_retry(manager):
|
63
|
+
return manager.get_tts_voice_list_v3()
|
64
|
+
|
50
65
|
class TestAudio(unittest.TestCase):
|
51
66
|
|
52
67
|
ENGLISH_INPUT_TEXT = 'This is the best restaurant in town.'
|
@@ -59,17 +74,8 @@ class TestAudio(unittest.TestCase):
|
|
59
74
|
def setUpClass(cls):
|
60
75
|
cls.manager = get_manager()
|
61
76
|
cls.language_list = cls.manager.get_language_list()
|
62
|
-
|
63
|
-
|
64
|
-
while success == False and num_tries >= 0:
|
65
|
-
num_tries -= 1
|
66
|
-
try:
|
67
|
-
cls.voice_list = cls.manager.get_tts_voice_list_json()
|
68
|
-
cls.voice_list_v3 = cls.manager.get_tts_voice_list_v3()
|
69
|
-
success = True
|
70
|
-
except requests.exceptions.ReadTimeout as e:
|
71
|
-
logging.exception(f'could not get voice list, timeout')
|
72
|
-
time.sleep(1)
|
77
|
+
cls.voice_list = get_tts_voice_list_json_with_retry(cls.manager)
|
78
|
+
cls.voice_list_v3 = get_tts_voice_list_v3_with_retry(cls.manager)
|
73
79
|
|
74
80
|
import http.client as http_client
|
75
81
|
http_client.HTTPConnection.debuglevel = 1
|
@@ -89,7 +95,8 @@ class TestAudio(unittest.TestCase):
|
|
89
95
|
|
90
96
|
def get_voice_by_service_and_name(self, service: Service, voice_name) -> cloudlanguagetools.ttsvoice.TtsVoice_v3:
|
91
97
|
subset = [x for x in self.voice_list_v3 if voice_name in x.name and x.service == service]
|
92
|
-
|
98
|
+
num_voices = len(subset)
|
99
|
+
self.assertEqual(num_voices, 1, msg=f'found {num_voices} voices for {service} and {voice_name}, expected 1')
|
93
100
|
return subset[0]
|
94
101
|
|
95
102
|
def get_voice_by_lambda(self, service: Service, filter_func, assert_unique=True):
|
@@ -107,23 +114,16 @@ class TestAudio(unittest.TestCase):
|
|
107
114
|
voice_service = voice.service.name
|
108
115
|
return self.verify_voice_internal(voice_key, voice_service, text, recognition_language)
|
109
116
|
|
110
|
-
|
117
|
+
@backoff.on_exception(backoff.expo,
|
118
|
+
cloudlanguagetools.errors.TransientError,
|
119
|
+
max_time=BACKOFF_MAX_TIME)
|
120
|
+
def get_tts_audio_with_retry(self, text, voice_service, voice_key):
|
121
|
+
audio_temp_file = self.manager.get_tts_audio(text, voice_service, voice_key, {})
|
122
|
+
return audio_temp_file
|
111
123
|
|
112
|
-
|
113
|
-
num_tries = max_tries
|
114
|
-
get_tts_audio_success = False
|
115
|
-
|
116
|
-
while get_tts_audio_success != True and num_tries >= 0:
|
117
|
-
num_tries -= 1
|
118
|
-
try:
|
119
|
-
logging.info(f"attempting to retrieve audio from {voice_service}, attempts: {num_tries}")
|
120
|
-
audio_temp_file = self.manager.get_tts_audio(text, voice_service, voice_key, {})
|
121
|
-
get_tts_audio_success = True
|
122
|
-
except cloudlanguagetools.errors.TimeoutError as exception:
|
123
|
-
time.sleep(1) # allow retry
|
124
|
+
def verify_voice_internal(self, voice_key, voice_service, text, recognition_language):
|
124
125
|
|
125
|
-
|
126
|
-
raise Exception(f"could not retrieve audio from {voice_service} after {max_tries} tries")
|
126
|
+
audio_temp_file = self.get_tts_audio_with_retry(text, voice_service, voice_key)
|
127
127
|
|
128
128
|
# check file format
|
129
129
|
is_mp3 = audio_utils.is_mp3_format(audio_temp_file.name)
|
@@ -135,11 +135,39 @@ class TestAudio(unittest.TestCase):
|
|
135
135
|
print(f.read())
|
136
136
|
|
137
137
|
self.assertTrue(is_mp3)
|
138
|
-
|
139
|
-
|
140
|
-
|
138
|
+
|
139
|
+
audio_format = cloudlanguagetools.options.AudioFormat.mp3
|
140
|
+
|
141
|
+
self.recognize_and_verify_text(audio_temp_file, text, recognition_language, audio_format)
|
142
|
+
|
143
|
+
|
144
|
+
def recognize_and_verify_text(self,
|
145
|
+
audio_temp_file: tempfile.NamedTemporaryFile,
|
146
|
+
expected_text: str,
|
147
|
+
recognition_language: str,
|
148
|
+
audio_format: cloudlanguagetools.options.AudioFormat):
|
149
|
+
# recognize text
|
150
|
+
# ==============
|
151
|
+
sanitized_expected_text = audio_utils.sanitize_recognized_text(expected_text)
|
152
|
+
# first, try openwhisper
|
153
|
+
logger.info(f'attempting to recognize text using OpenAI. expected text: {sanitized_expected_text}')
|
154
|
+
audio_text_openai = audio_utils.speech_to_text_openai(self.manager, audio_temp_file, audio_format)
|
155
|
+
sanitized_openai_text = audio_utils.sanitize_recognized_text(audio_text_openai)
|
156
|
+
if sanitized_expected_text == sanitized_openai_text:
|
157
|
+
# openai text matches
|
158
|
+
logger.info(f'found a match on OpenAI with {sanitized_openai_text}=={sanitized_expected_text}')
|
159
|
+
return
|
160
|
+
else:
|
161
|
+
logger.warning(f'failed to recognize text using OpenAI. expected text: {sanitized_expected_text} got: {sanitized_openai_text}')
|
162
|
+
|
163
|
+
# second, try azure
|
164
|
+
logger.info(f'attempting to recognize text using Azure. expected text: {sanitized_expected_text}')
|
165
|
+
audio_text = audio_utils.speech_to_text_azure_wav(self.manager, audio_temp_file, recognition_language, audio_format)
|
166
|
+
sanitized_azure_text = audio_utils.sanitize_recognized_text(audio_text)
|
167
|
+
self.assertEqual(sanitized_expected_text, sanitized_azure_text)
|
141
168
|
|
142
169
|
def verify_service_audio_language(self, text, service, audio_language, recognition_language):
|
170
|
+
"""Legacy version using voice_list"""
|
143
171
|
# logging.info(f'verify_service_audio: service: {service} audio_language: {audio_language}')
|
144
172
|
voices = self.get_voice_list_service_audio_language(service, audio_language)
|
145
173
|
self.assertGreaterEqual(len(voices), 1, f'at least one voice for service {service}, language {audio_language}')
|
@@ -184,8 +212,9 @@ class TestAudio(unittest.TestCase):
|
|
184
212
|
# pprint.pprint(mandarin_azure_voices)
|
185
213
|
|
186
214
|
xiaochen = [x for x in mandarin_azure_voices if 'Xiaochen' in x.name]
|
187
|
-
|
188
|
-
#
|
215
|
+
logger.debug(f'xiaochen voices: {pprint.pformat(xiaochen)}')
|
216
|
+
self.assertEqual(len(xiaochen), 4, str(xiaochen)) # there is a regular and a multilingual
|
217
|
+
# and also a DragonHD, and DragonHD V1
|
189
218
|
|
190
219
|
xiaochen_single_language = [x for x in xiaochen if len(x.audio_languages) == 1][0]
|
191
220
|
self.assertEquals(xiaochen_single_language.audio_languages, [AudioLanguage.zh_CN])
|
@@ -263,7 +292,8 @@ class TestAudio(unittest.TestCase):
|
|
263
292
|
mandarin_azure_voices = [x for x in azure_voices if AudioLanguage.zh_CN in x.audio_languages]
|
264
293
|
|
265
294
|
xiaochen = [x for x in mandarin_azure_voices if 'Xiaochen' in x.name]
|
266
|
-
self.assertEqual(len(xiaochen),
|
295
|
+
self.assertEqual(len(xiaochen), 4) # there is a regular and a multilingual, and dragonhd,
|
296
|
+
# and dragonhd latest
|
267
297
|
|
268
298
|
xiaochen_single_language = [x for x in xiaochen if len(x.audio_languages) == 1][0]
|
269
299
|
xiaochen_multilingual = [x for x in xiaochen if len(x.audio_languages) > 1][0]
|
@@ -317,6 +347,16 @@ class TestAudio(unittest.TestCase):
|
|
317
347
|
source_text = '老人家'
|
318
348
|
self.verify_service_audio_language(source_text, Service.CereProc, AudioLanguage.zh_CN, 'zh-CN')
|
319
349
|
|
350
|
+
def test_mandarin_alibaba(self):
|
351
|
+
# pytest test_audio.py -k test_mandarin_alibaba
|
352
|
+
source_text = '老人家'
|
353
|
+
self.verify_service_audio_language_v3(source_text, Service.Alibaba, AudioLanguage.zh_CN, 'zh-CN')
|
354
|
+
|
355
|
+
def test_english_alibaba(self):
|
356
|
+
# pytest test_audio.py -k test_english_alibaba
|
357
|
+
source_text = 'I am not interested.'
|
358
|
+
self.verify_service_audio_language_v3(source_text, Service.Alibaba, AudioLanguage.en_GB, 'en-GB')
|
359
|
+
|
320
360
|
@skip_unreliable_clt_test()
|
321
361
|
def test_mandarin_vocalware(self):
|
322
362
|
# pytest test_audio.py -k test_mandarin_vocalware
|
@@ -393,8 +433,17 @@ class TestAudio(unittest.TestCase):
|
|
393
433
|
|
394
434
|
def test_ssml_english_amazon(self):
|
395
435
|
# pytest test_audio.py -k test_ssml_english_amazon
|
436
|
+
|
437
|
+
us_standard_neural_voices = [x for x in self.voice_list_v3 if
|
438
|
+
x.service == Service.Amazon and
|
439
|
+
x.voice_key['engine'] in ['standard', 'neural'] and
|
440
|
+
AudioLanguage.en_US in x.audio_languages]
|
441
|
+
|
442
|
+
# choose random voice from standard_neural_voices
|
443
|
+
standard_neural_voice = random.choice(us_standard_neural_voices)
|
396
444
|
source_text = 'hello <break time="200ms"/>world'
|
397
|
-
self.
|
445
|
+
self.verify_voice_v3(standard_neural_voice, source_text, 'en-US')
|
446
|
+
|
398
447
|
|
399
448
|
def test_ssml_english_watson(self):
|
400
449
|
# pytest test_audio.py -k test_ssml_english_watson
|
@@ -487,8 +536,9 @@ class TestAudio(unittest.TestCase):
|
|
487
536
|
options = {'rate': 0.8, 'pitch': -10}
|
488
537
|
|
489
538
|
audio_temp_file = self.manager.get_tts_audio(source_text, service, voice_key, options)
|
490
|
-
|
491
|
-
self.
|
539
|
+
|
540
|
+
self.recognize_and_verify_text(
|
541
|
+
audio_temp_file, source_text, 'fr-FR', cloudlanguagetools.options.AudioFormat.mp3)
|
492
542
|
|
493
543
|
def test_azure_format_ogg(self):
|
494
544
|
service = 'Azure'
|
@@ -507,6 +557,18 @@ class TestAudio(unittest.TestCase):
|
|
507
557
|
audio_text = audio_utils.speech_to_text(self.manager, audio_temp_file, 'fr-FR', audio_format=cloudlanguagetools.options.AudioFormat.ogg_opus)
|
508
558
|
self.assertEqual(audio_utils.sanitize_recognized_text(source_text), audio_utils.sanitize_recognized_text(audio_text))
|
509
559
|
|
560
|
+
def verify_service_audio_language_v3(self, text, service, audio_language, recognition_language):
|
561
|
+
"""Version using voice_list_v3"""
|
562
|
+
voices = [x for x in self.voice_list_v3 if x.service == service and audio_language in x.audio_languages]
|
563
|
+
self.assertGreaterEqual(len(voices), 1, f'at least one voice for service {service}, language {audio_language}')
|
564
|
+
|
565
|
+
# pick 3 random voices
|
566
|
+
max_voices = 3
|
567
|
+
if len(voices) > max_voices:
|
568
|
+
voices = random.sample(voices, max_voices)
|
569
|
+
for voice in voices:
|
570
|
+
self.verify_voice_v3(voice, text, recognition_language)
|
571
|
+
|
510
572
|
def verify_wav_voice(self, voice: cloudlanguagetools.ttsvoice.TtsVoice_v3, text: str, recognition_language: str):
|
511
573
|
# assert that the wav format is in the list of supported formats
|
512
574
|
self.assertTrue(cloudlanguagetools.options.AudioFormat.wav.name in
|
@@ -514,8 +576,12 @@ class TestAudio(unittest.TestCase):
|
|
514
576
|
options = {cloudlanguagetools.options.AUDIO_FORMAT_PARAMETER: cloudlanguagetools.options.AudioFormat.wav.name}
|
515
577
|
audio_temp_file = self.manager.get_tts_audio(text, voice.service, voice.voice_key, options)
|
516
578
|
audio_utils.assert_is_wav_format(self, audio_temp_file.name)
|
517
|
-
|
518
|
-
self.
|
579
|
+
|
580
|
+
self.recognize_and_verify_text(
|
581
|
+
audio_temp_file,
|
582
|
+
text,
|
583
|
+
recognition_language,
|
584
|
+
cloudlanguagetools.options.AudioFormat.wav)
|
519
585
|
|
520
586
|
def test_azure_format_wav(self):
|
521
587
|
fr_voice = self.get_voice_by_service_and_name(Service.Azure, 'Denise')
|
@@ -525,6 +591,11 @@ class TestAudio(unittest.TestCase):
|
|
525
591
|
fr_voice = self.get_voice_by_service_and_name(Service.Amazon, 'Mathieu')
|
526
592
|
self.verify_wav_voice(fr_voice, self.FRENCH_INPUT_TEXT, 'fr-FR')
|
527
593
|
|
594
|
+
def test_amazon_generative(self):
|
595
|
+
amy_generative_voice = self.get_voice_by_lambda(Service.Amazon,
|
596
|
+
lambda x: x.voice_key['engine'] == 'generative' and 'Amy' in x.name)
|
597
|
+
self.verify_voice_v3(amy_generative_voice, self.ENGLISH_INPUT_TEXT, 'en-GB')
|
598
|
+
|
528
599
|
def test_elevenlabs_format_wav(self):
|
529
600
|
fr_voice = self.get_voice_by_lambda(Service.ElevenLabs,
|
530
601
|
lambda x: 'Charlotte' in x.name and x.voice_key['model_id'] == 'eleven_multilingual_v2')
|
@@ -551,7 +622,8 @@ class TestAudio(unittest.TestCase):
|
|
551
622
|
self.verify_wav_voice(en_voice, self.ENGLISH_INPUT_TEXT, 'en-US')
|
552
623
|
|
553
624
|
|
554
|
-
|
625
|
+
@pytest.mark.skip(reason="journey voice seems to be gone")
|
626
|
+
def test_google_voice_journey_old(self):
|
555
627
|
service = 'Google'
|
556
628
|
source_text = self.ENGLISH_INPUT_TEXT
|
557
629
|
|
@@ -7,6 +7,7 @@ import json
|
|
7
7
|
import pprint
|
8
8
|
import time
|
9
9
|
import requests
|
10
|
+
import backoff
|
10
11
|
|
11
12
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
12
13
|
|
@@ -22,25 +23,21 @@ def get_manager():
|
|
22
23
|
return manager
|
23
24
|
|
24
25
|
class TestTranslation(unittest.TestCase):
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
time.sleep(retry_delay)
|
41
|
-
else:
|
42
|
-
logging.error(f"Max retries reached. Unable to set up the test environment.")
|
43
|
-
raise # Re-raise the last exception if all retries failed
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
@backoff.on_exception(backoff.expo,
|
29
|
+
requests.exceptions.RequestException,
|
30
|
+
max_time=60)
|
31
|
+
def get_all_language_data(cls):
|
32
|
+
cls.language_list = cls.manager.get_language_list()
|
33
|
+
cls.translation_language_list = cls.manager.get_translation_language_list_json()
|
34
|
+
cls.transliteration_language_list = cls.manager.get_transliteration_language_list_json()
|
35
|
+
cls.tokenization_options = cls.manager.get_tokenization_options_json()
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def setUpClass(cls):
|
39
|
+
cls.manager = get_manager()
|
40
|
+
cls.get_all_language_data()
|
44
41
|
|
45
42
|
def test_language_list(self):
|
46
43
|
self.assertTrue(len(self.language_list) > 0)
|
@@ -119,7 +116,11 @@ class TestTranslation(unittest.TestCase):
|
|
119
116
|
'delivery people'])
|
120
117
|
self.translate_text(Service.Google, '中国有很多外国人', Language.zh_cn, Language.en, 'There are many foreigners in China')
|
121
118
|
self.translate_text(Service.Azure, '成本很低', Language.zh_cn, Language.fr, 'Le coût est faible')
|
122
|
-
self.translate_text(Service.Google, '换登机牌', Language.zh_cn, Language.fr,
|
119
|
+
self.translate_text(Service.Google, '换登机牌', Language.zh_cn, Language.fr,
|
120
|
+
["Changer la carte d'embarquement",
|
121
|
+
"changer de carte d'embarquement",
|
122
|
+
"changer la carte d'embarquement",
|
123
|
+
"échanger la carte d'embarquement"])
|
123
124
|
self.translate_text(Service.Amazon, '换登机牌', Language.zh_cn, Language.fr,
|
124
125
|
["utilisez votre carte d'embarquement", # seems wrong, but amazon returns this occasionally
|
125
126
|
"modifier la carte d'embar", # seems wrong, but amazon returns this occasionally
|
@@ -197,6 +198,7 @@ class TestTranslation(unittest.TestCase):
|
|
197
198
|
self.assertTrue('Watson' in result)
|
198
199
|
|
199
200
|
possible_french_translations = [
|
201
|
+
'Très faible coût',
|
200
202
|
'Le coût est faible',
|
201
203
|
'Le coût est très faible',
|
202
204
|
'à bas prix',
|
@@ -1 +0,0 @@
|
|
1
|
-
KEYS='gAAAAABnCcYzERlg56peIVaJoWPgT7m06v9-gZ-xuDco6PWxZEu-BRxgHR9UaqN3g8HzKcfVSS-j1rBf-x0SHZlFQwcRlIQIY1S9mE3Wm8I6_xCGkFfqeuLkc8c9ScDIFnMt5MtLevVchJWklkl86HH6qoUZyzmzpQm5ihwdShaCwqryQZ95UDzuFis0rPDLX63gvmdWvXSiYRjXb4hO53ajE380bmjjp9E9mbH01Y6aKAz1hBpGWjEmepnajkkopM3uw-mCz0ABq9Ysdhs0yd9UklwvbOgUgdKL1BC_st0xt2piEShHtu0lQL5LOB1ZXBYHzFhk8wLlpftvDhYcFXapoZYBlg6d3tTKoOonPumN78CSIjQdxvTcG6mcA0p1VlAurcN4-YgtbDvtObsN5clpmDXXPupwMwf7Wddmut4sGGvMJXEba4EP5G6R_3Se8F6GRfawOgLVBAJ-SSpdKszJBu9w-BPIOzXkZ0D_ddQwC1CKURM4Kpjv0phHbqulykHgHe1l8mDr8ycC87647jowIz55An2ZttpRo9mdJztPGeuEjoz5RvQHJQvjmhAT7vajWnXv7sB2y50qUnWTF5ip1rde3GfDgmNzrOd7jSe8dxonAtFziKBCiCqc3pFfh7YgeHzWCR7lJcF7wIZvGJ-goLFiUT9uayXXVAVbPFku6et1DnUpDU48nyrO7lUvmwqRprTM706UWbbdZk8dBn7n91KzzjR4AbQhQ3hl6QuvA4WoJYdpzAaUN_4LUiWHNFWtd3uBLjHqTBLOHUlAETkkPkssm48pBqqALSJBWOrNSBIcLhAQI-JcJoer_fgH4KG4pDaeKgmGF7X26xi4rE48GsYwd7Tf2kRMQrHfyLefYDocMTCqH2I7NHs4i66LDvj_Zn9nmkSyuAbO1o-cnWvGmmrEymA39Jn1oiEG-f4gTv7kTkURSZu-FtBie9dKXTWG-EHy8qr7acFitEyuLnBfUpA75OWCEJG7wB4D5DfNti3zagUTq_DSjTBmTGSLRIlS0TVBEiyQ9Vjfpy4QaCd5AFXSXSI-sUzH_wIbY5kDysM8OJ_8rD1MeTDuXRJWLMJmokcOkjR8AXpVPxWCBvDmFnYhwjyA1AkvqpIUCw0H2W59ZQFG-k_AmKJXc3ZGMwEQQuE-fSlZr9jbwAGNTWJPvOVvZWil_yoT8Rb6P_KuA5px6CtfIERCEf-lpag_j6Rn9dGXAFtPrY1kCFCL6pFG9pgi8pHzSHpIkzCfYN1sGytUz6iiWeEV_ua9OCOyoTmuwHJBmMzD2SNMXaNmH3tcniIalbG817u0RUTgsgc79FlMnJV8fgub5kxw4Kedh9S8J6P4P2Z7JyF7x44IQxc9nGPLfT37wb8LibO_esJ-d8hI4XMyApoupt-hzCRNvPbmHRGyzoty9ubtkx_1mGLOMLjCDAEZqBpI1utKYL3j-U1eJDOJGBBugkvTCok9HkbMwWqsEUwMITfwpgz1Oc2ce-DK65Hg75KO55larQlJUU5QkjU65MRduSreCNT-8kLdWTSpyeReUGA1YhbmPhGNd3yzmEVnJV0VDxhrVjljabRpQIbYJ-13OXyRsrNWv386BDW5Wt8RH3MgyluTPh3azuaQkVn3u9C37cy1CgV67BaW0VPevpKFzAclVTQHcFI2zT4byccJSS8sOztnQ2Gzwl1ACFMGwAoso68JENQhEE8jMBLbI6Q-xENbSEaXj7E24oiTFTKsI--b8e1-dyLlTYl1aa-sdj6KKyx0Yr1MTWTZfzqo0Ep2-9JRJAeOBR_5_QMQBoRRwjJXmUeQGMSDynXaExHdn-lkggGxF6Rx_1iXwsvY_7n6uSM_Qg96KHS1eDFhyev1vBZ34MIgRFiYo8Ehr4uGJeXZqwAwFBQERkYR_xFPPgJ1ESk3J5xDa-WtvTgr7iEIulNfvbmx8rCZxvDwh0t6GhUmuLDf2xV0JS_hi7wRGSOLZ1Q4VtbDUr3q11FbKYIvoD_pHYKdqUovVn0oYUmwysWIKaZ-05GUF7Tu9SYluv5JB4cKf8r-Wi2GUQBC-KUnlfYsrGcLlje_puy8XGsU09NUzDDYeRkfeB3M6ZEk3agyfRf15_3813L05MoswWpQ2Kt5UUgY1SVgwxKAJltwN4tHzO2SUrxxalf7eBNV8PO8bPcIXMJwNh_jq6jPqzYbSBk2LRSuUWz_oQl2yUo2uBoL8kPAcR8WMp2uGymgHwPz9-msey2Vk1rfYcwK3yQY3P9KYN55heTdEua-QnS9d_rREqtsdToUH6WtPzMB9zwTrXe7wThP6u9FbqYkgzlCEqzdG-5OnOTZdRcQGhroxpL6fy17_B5RsTvBcjqvBiuR9X4C0mVgae1lwMQWF95ljH3j81oeUhFESdWUy94bluNAL-dHfTRoBwJQ9oaSWAT84sS6GyW98TUIM1M0pOphxLkFKndaD0lsHVxKQObKML0xb3QAACwt3lbcCAqA6_r33zBxreL4_GUGcPkG3eu-dTTztT-aQeL4DqgaOskOPdPuSi3ygUYMBLZ1slWpq6R9sT-pk9LtRMl192vy77fenhW2EEaPp3lgbwAzTsSmwL8Abtd_vEQ9gIAKHdZq0wBy0KUKQjPmho4Ry6tfsjJs2X-qpX0c-fsP_CCqZ8EMuMFVLd1xqtlA59Utp7lj9K-AObYX2ohAALIO4X146csvOyV0BgUoHu5ycNk6T93JcyeocSvOlgLjgn9YgAvgHw0tFSq8EyYGUb9ewrKyjNHuzFffjzvwgfupVJ1PiA8tO7TZuLMSY1NYnSQE3yLluKQNlH2oq9JN2XQ4wtr71yQq2R47GvcqpVqPn_VPCdcKnWqNRo4qlB8dRtfzyACWYRkLzeqWtQ5gEU5vipViECyBnQurpCBNVuMOckfff_dnBhMSkuhsvGbnecX7ThH5As8gDQG23TWb1agL-cnOKS6OPvZty1MiL0lB0jcvTNSHsgE2X1c9d_B_-yII9_EKnEoOh36y7YEWHN66sZE0qG2muawG31EnaYCtBU8FBUILcOI7xKwLZfZm2LxIdSd4oNd6fEWaRtYD-z4V0MkOhpWh2oUuxtpeB9tVMKv8UHBFifRrR87-3xykOEUSBDvTUb33tahvfSeDsEk24FEwa0ZJfq0NXma4a-MdpnvHDSblDPCrpw7AKY1wWWg-oEy4vMsO14qAIcFKgl6A4vWlGPO_rkunrgm9JlTXISRUqHDkgjxdFxdwlsDu-x1oIXvLpTbWveFM9E066Z7Rk0VKvaw8EPOw6FgqYH81yaHhhMzY8BMYiPgzteqtEwUceGoU2H3LXOK5-xw6_24c14nftqLjk0ujhY23HC8I3C8gA42LZsX6gD21rTVafSaS-XD4x2RMOfuhqsKvwODTu7tBx-HRBEekZRhVe_HNYHTi24XyDhQsCfLRZ4OtkI3j_-uGL34Am_qtapr3aQLTs0jLtBkeqmtpRVhw6Z3t2jcoHAzjhzlPUyTn_Pjc9RXWZ_mBNk_Fjl-waZZNii0kRqqvr99yjNZqopeweKfLtVLjHObpyL5DC9KTw7VUIAQgywFxFj7pgWmlPIdjQsBBIsqGFElOZmS-7UZ-ti79A2PEboqJYmDKOQXLMAdOt07O0GPornqeh-IYmaXWQ-BWdaMuaXVdq0Z1aRdi-mryOpcvy02N3L__vUdLXBQZIXG7iPA-Lu2t3S1ixn5ytVLPvCTmWPCl9BmCSZzy5vJK2BvSTF8spGW4sO2EAS_JktsvdnqH2RbTtenf7QcZP1c6aqyQTz44aW0KDnEMyTDBrwGwGesA4iVAZv6oxwSmD-d075paAXb5F9lu_9h8wkjllZyVDsYxpu2tRXNBynROQzSF4nvhdy3PGGSXKz0bPf5Ia9iZuUBg5ndH2SXgtFzW23lgbjNujgFfFi5bVqikyh8xJuIhMeRio8YwZP8IqSZwnezsIdoDEvihcclssrsyK4D9z9jx4UEHgK3dYTc5IpINmv4AnzKCEmMYgeHPnpTNWE7z7wk42rTQ--DuDe3mdwvAdqYfxSYMFu8MM-N1ZOqG2f_UljIbYnFQpYCiiXt8peffWYNx0NvKgRTuBzPZDJlZwGoi5H5Do_sht5kxX3fY4K9UegHcnnUPYvSm6aXOTNkHBC4fy3KGw-qxCrFpW8tx8twc0m2aYMXTR2XmHReLWOjrM6wFvGqf6JyDAieQEAJLCmAFjAgmeDHPpUymRvs4Cd9Xj821aCXoqDbE1MKNmr5OtIx8l8ktJzzzArrnfebu-S138_yD5ZtxddIx4D6jrUrdeTAb_IpSq5Ov9R4u7xZLhl19WhXDUQFcafVS19eyAZkh-JWkQKuVHuXg66rPCw4zFxeAHGnmu-Y7p7nsN303DGMD5b1ug4ameHGc9JpvGnfNEpw7dbbevmFwuAjkBOkutC1v2kVvmO4tnR5t3kEgJyBbgRMT4IldfFhWpMO0fdVjLAJCcTuJqm1jtE9xwbzL-OVmnNpjrG4Nh-kuOFEww8MjF9OgH8I1cGUqZpd9QSi6QfoVDA7Rb6DYcpRjgeSM2LVxD1nZT01ta1mD7AW-aprakQWk7MqVtU9RUbprmkNnL1td5AT7y9JoRukC0QhB-QtM8z2VwrX5k92bk7SoXXbmBpVysm-FKrzuY8yhYXCuyK996Pp7orkuZLbtAma1mRVOboRxx25GDKwPwZQN3vzBGxgcrTmr5Bw7uzaRIkPIO6kCLrRfgPHE0BPJcAI8jlDws1SSGrLZ1kpeOMwpoVtXij9jUFttwQWx0RGfXLYKK30H9cvIxrgaAwkIaq7J5-BeUmzPvZxZENTgyAgqFQWf_CbKgCDr8BTCtjCCsXsxk0QdgRL4B7ceBZ4orFjeCgWh2fEHg9YVNgegTL5vR5YH8hCnz4vBjA2diOU4ZMUvMdHQsIIbSK4Y29Lz3L27qsK8V55Go16-1u_ExHC5o5cV25cce4gzzJUugoHctrLWa73C8K5dlqh16W9gP_jekpfUPj52sfoi-aF9CteSvB8gyBjRtYNCEBjxNgAx_rTNZHL1jhEnZiGUmvs2B8ZSzdX8eWzr-NL5WN5xrysnMgPqCMBwnIRZrqgaccUGsuYJ9qag5WUcvhVS2QIwUj7_be41soNj1_5tA6Q-3dGdVNS8P7iPi79tSgP3FVcGCnW9-P2t8b6nBSn7FsbHLH-ytSNOVRcKyCt0jEa8x0-PRsnza67tWz_logEmPrRPrSaC4r7eCD04IIoZuSM4EcqaFWMIXEm_BNU9vcHSTRfZioxafIQAR7sMjZy0vxUZPIAsaodSu-QfmqJfH0eSXaEpfCngFiidD0YUz2pB8lQMRAvl_eXPynXccgyk7PsjtrMAjh-l48_C3t7hw1BYAPl8BLQzvOBB5__GOAofxItQa50VIx_FK2aT95n_6u-CcYSYP37y_y84AzuTEmPafqN0Y-0m-pjlq56P63RJXmH84VOHv1cfxV-5v4wf7vbrpU2WZOVrgudO0uvsU4dDjuzMkjiTSvqGNSnvp4HUrpZ0YSq8pwegHYTx8TpTFA8oo5UsM0jJY4nTDk9zPe2hRH_RDomNCJKYoTKNjcWaTn0apqq-0LyNtOBx2qAtYN9HKny-WMlI4EGIO4ZoU2j_Krln9NGXSo_CwuW58NBirfQ7RkMmByJFml7KymuhuCaN5SzKb3rTaOgU7Y-RI16c14YgC__8QDzDJeXKNDnlKs83qQWoFijrBAuSFw49G-OUpYEpx3pt7bmYpirL_NOaKfJ7_simblVpMJp6B3bbBnmg5YcnTNHwLOcW99CwTIximeASfGYuMqELYKnLw1nPTFOuZbw8M5IjacSwMiIWD0Oj0ZgiRaNzUWpubLXpRsk5Ap8fzVmIuQrN2BoyKdmvrvN3Ibwx8LolfWjE0I72R2bliQndIDbYkMVKUsZidigbCszLeQTm2wb8DOi4sryp3m7o6R-kEcw0aoFHnWezRqjA-h7oLUzwyM-Vd7o33HwxrxCiq5LbGvNmtQ4Y0uLndqWy4BaiIPKLXPx8hicexxyG_nb0TZ7MX2iDl0nmYvwe_-owxmw-0GQ7dVZfeLCJhjcR6R1ltjSe6KAeoOWBPymHwf2y7O_K31vw1VXfBG_Z8IP9LjMtNz9xNbDMat0V_MTCxDCUNLpKfRjjCr3jQOZQ8cEaBnIUC7ZXuqI6uZVNrAbc0zNSyC_PESFDV2-f1aQ426GpMZqXX2yFI5ieF2G9cPbpWgDez_J12EHTBFiUPoHxFi_0dSVz3qCWOgUCnbgBlQqyy1VXl38gJnKGSED6knhOrNZz0Y3CT1b_B1110EScTeePvzQRIx7R39ob1sxN'
|
File without changes
|
File without changes
|
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/argostranslate.py
RENAMED
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/audio_processing.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/dictionarylookup.py
RENAMED
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/easypronunciation.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/libretranslate.py
RENAMED
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/mandarincantonese.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools/translationlanguage.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/requires.txt
RENAMED
File without changes
|
{cloudlanguagetools-11.5.0 → cloudlanguagetools-11.6.0}/cloudlanguagetools.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|