cloudlanguagetools 11.2.1__tar.gz → 11.3.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/PKG-INFO +1 -1
  2. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/azure.py +98 -23
  3. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/constants.py +5 -0
  4. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/elevenlabs.py +96 -14
  5. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/forvo.py +11 -4
  6. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/languages.py +3 -0
  7. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/openai.py +107 -79
  8. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/service.py +16 -0
  9. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/servicemanager.py +8 -0
  10. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/ttsvoice.py +17 -1
  11. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools.egg-info/PKG-INFO +1 -1
  12. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/setup.py +1 -1
  13. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/LICENSE +0 -0
  14. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/README.rst +0 -0
  15. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/__init__.py +0 -0
  16. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/amazon.py +0 -0
  17. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/argostranslate.py +0 -0
  18. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/cereproc.py +0 -0
  19. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/chatapi.py +0 -0
  20. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/deepl.py +0 -0
  21. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/dictionarylookup.py +0 -0
  22. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/easypronunciation.py +0 -0
  23. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/encryption.py +0 -0
  24. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/epitran.py +0 -0
  25. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/errors.py +0 -0
  26. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/fptai.py +0 -0
  27. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/google.py +0 -0
  28. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/keys.py +0 -0
  29. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/libretranslate.py +0 -0
  30. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/mandarincantonese.py +0 -0
  31. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/naver.py +0 -0
  32. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/options.py +0 -0
  33. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/pythainlp.py +0 -0
  34. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/spacy.py +0 -0
  35. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/test_services.py +0 -0
  36. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/tokenization.py +0 -0
  37. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/translationlanguage.py +0 -0
  38. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/transliterationlanguage.py +0 -0
  39. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/vocalware.py +0 -0
  40. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/voicen.py +0 -0
  41. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/watson.py +0 -0
  42. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools/wenlin.py +0 -0
  43. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools.egg-info/SOURCES.txt +0 -0
  44. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools.egg-info/dependency_links.txt +0 -0
  45. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools.egg-info/requires.txt +0 -0
  46. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/cloudlanguagetools.egg-info/top_level.txt +0 -0
  47. {cloudlanguagetools-11.2.1 → cloudlanguagetools-11.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloudlanguagetools
3
- Version: 11.2.1
3
+ Version: 11.3.0
4
4
  Summary: Interface with various cloud APIs for language processing such as translation, text to speech
5
5
  Home-page: https://github.com/Language-Tools/cloud-language-tools-core
6
6
  Author: Luc
@@ -6,6 +6,7 @@ import operator
6
6
  import pydub
7
7
  import logging
8
8
  import pprint
9
+ from typing import List
9
10
 
10
11
  import cloudlanguagetools.service
11
12
  import cloudlanguagetools.constants
@@ -33,6 +34,29 @@ GENDER_MAP = {
33
34
  'Neutral': cloudlanguagetools.constants.Gender.Any,
34
35
  }
35
36
 
37
+ VOICE_OPTIONS = {
38
+ 'rate' : {
39
+ 'type': cloudlanguagetools.options.ParameterType.number.name,
40
+ 'min': 0.5,
41
+ 'max': 3.0,
42
+ 'default': 1.0
43
+ },
44
+ 'pitch': {
45
+ 'type': cloudlanguagetools.options.ParameterType.number.name,
46
+ 'min': -100,
47
+ 'max': 100,
48
+ 'default': 0
49
+ },
50
+ cloudlanguagetools.options.AUDIO_FORMAT_PARAMETER: {
51
+ 'type': cloudlanguagetools.options.ParameterType.list.name,
52
+ 'values': [
53
+ cloudlanguagetools.options.AudioFormat.mp3.name,
54
+ cloudlanguagetools.options.AudioFormat.ogg_opus.name,
55
+ ],
56
+ 'default': cloudlanguagetools.options.AudioFormat.mp3.name
57
+ }
58
+ }
59
+
36
60
  class AzureVoice(cloudlanguagetools.ttsvoice.TtsVoice):
37
61
  def __init__(self, voice_data):
38
62
  # print(voice_data)
@@ -64,28 +88,52 @@ class AzureVoice(cloudlanguagetools.ttsvoice.TtsVoice):
64
88
  return f'{self.display_name} ({self.voice_type})'
65
89
 
66
90
  def get_options(self):
67
- return {
68
- 'rate' : {
69
- 'type': cloudlanguagetools.options.ParameterType.number.name,
70
- 'min': 0.5,
71
- 'max': 3.0,
72
- 'default': 1.0
73
- },
74
- 'pitch': {
75
- 'type': cloudlanguagetools.options.ParameterType.number.name,
76
- 'min': -100,
77
- 'max': 100,
78
- 'default': 0
79
- },
80
- cloudlanguagetools.options.AUDIO_FORMAT_PARAMETER: {
81
- 'type': cloudlanguagetools.options.ParameterType.list.name,
82
- 'values': [
83
- cloudlanguagetools.options.AudioFormat.mp3.name,
84
- cloudlanguagetools.options.AudioFormat.ogg_opus.name,
85
- ],
86
- 'default': cloudlanguagetools.options.AudioFormat.mp3.name
87
- }
88
- }
91
+ return VOICE_OPTIONS
92
+
93
+ def locale_to_audio_language(locale: str) -> cloudlanguagetools.languages.AudioLanguage:
94
+ locale = AUDIO_LOCALE_OVERRIDE_MAP.get(locale, locale)
95
+ language_enum_name = locale.replace('-', '_')
96
+ audio_language = cloudlanguagetools.languages.AudioLanguage[language_enum_name]
97
+ return audio_language
98
+
99
+ def build_tts_voice_v3(voice_data) -> cloudlanguagetools.ttsvoice.TtsVoice_v3:
100
+ local_name = voice_data['LocalName']
101
+ display_name = voice_data['DisplayName']
102
+ voice_type = voice_data['VoiceType']
103
+
104
+ # build all attributes required for TtsVoice_v3
105
+ # name
106
+ if local_name != display_name:
107
+ voice_name = f"{display_name} {local_name} ({voice_type})"
108
+ else:
109
+ voice_name = f'{display_name} ({voice_type})'
110
+ voice_key = {
111
+ 'name': voice_data['Name']
112
+ }
113
+ options = VOICE_OPTIONS
114
+ service = cloudlanguagetools.constants.Service.Azure
115
+ gender = GENDER_MAP[voice_data['Gender']]
116
+ service_fee = cloudlanguagetools.constants.ServiceFee.paid
117
+
118
+ azure_locale_list = [voice_data['Locale']]
119
+ if 'SecondaryLocaleList' in voice_data:
120
+ azure_locale_list = voice_data['SecondaryLocaleList']
121
+ # ensure the main locale is present
122
+ azure_locale_list.append(voice_data['Locale'])
123
+ # unique array
124
+ azure_locale_list = list(set(azure_locale_list))
125
+
126
+ audio_languages = [locale_to_audio_language(locale) for locale in azure_locale_list]
127
+
128
+ return cloudlanguagetools.ttsvoice.TtsVoice_v3(
129
+ name=voice_name,
130
+ voice_key=voice_key,
131
+ options=options,
132
+ service=service,
133
+ gender=gender,
134
+ audio_languages=audio_languages,
135
+ service_fee=service_fee)
136
+
89
137
 
90
138
  def get_translation_language_enum(language_id):
91
139
  # print(f'language_id: {language_id}')
@@ -250,7 +298,11 @@ class AzureService(cloudlanguagetools.service.Service):
250
298
 
251
299
  result = synthesizer.speak_ssml(ssml_str)
252
300
  if result.reason != azure.cognitiveservices.speech.ResultReason.SynthesizingAudioCompleted:
253
- error_message = f'Could not generate audio: {result.cancellation_details.reason} {result.cancellation_details.error_details}'
301
+ # special case errors:
302
+ if 'standard voices will no longer be supported' in result.cancellation_details.error_details:
303
+ error_message = 'Azure Standard voices are not supported anymore, please switch to Neural voices.'
304
+ else:
305
+ error_message = f'Could not generate audio: {result.cancellation_details.reason} {result.cancellation_details.error_details}'
254
306
  raise cloudlanguagetools.errors.RequestError(error_message)
255
307
 
256
308
  stream = azure.cognitiveservices.speech.AudioDataStream(result)
@@ -281,6 +333,29 @@ class AzureService(cloudlanguagetools.service.Service):
281
333
  logging.error(f'could not process voice for {voice_data}', exc_info=True)
282
334
  return result
283
335
 
336
+ def get_tts_voice_list_v3(self) -> List[cloudlanguagetools.ttsvoice.TtsVoice_v3]:
337
+ # returns list of TtsVoice_v3
338
+
339
+ token = self.get_token()
340
+
341
+ base_url = f'https://{self.region}.tts.speech.microsoft.com/'
342
+ path = 'cognitiveservices/voices/list'
343
+ constructed_url = base_url + path
344
+ headers = {
345
+ 'Authorization': 'Bearer ' + token,
346
+ }
347
+ response = requests.get(constructed_url, headers=headers)
348
+ if response.status_code == 200:
349
+ voice_list = json.loads(response.content)
350
+ result = []
351
+ for voice_data in voice_list:
352
+ # print(voice_data['Status'])
353
+ try:
354
+ result.append(build_tts_voice_v3(voice_data))
355
+ except:
356
+ logger.exception(f'could not process voice for {voice_data}')
357
+ return result
358
+
284
359
  def get_translation(self, text, from_language_key, to_language_key):
285
360
  base_url = f'{self.url_translator_base}/translate?api-version=3.0'
286
361
  params = f'&to={to_language_key}&from={from_language_key}'
@@ -46,6 +46,11 @@ class RequestMode(enum.Enum):
46
46
  dynamic = enum.auto()
47
47
  edit = enum.auto()
48
48
 
49
+ class APIVersion(enum.Enum):
50
+ v1 = enum.auto()
51
+ v2 = enum.auto()
52
+ v3 = enum.auto()
53
+
49
54
  # service and language related constants
50
55
  # ======================================
51
56
 
@@ -27,6 +27,21 @@ GENDER_MAP = {
27
27
  'female': cloudlanguagetools.constants.Gender.Female,
28
28
  }
29
29
 
30
+ VOICE_OPTIONS = {
31
+ 'stability' : {
32
+ 'type': cloudlanguagetools.options.ParameterType.number.name,
33
+ 'min': 0.0,
34
+ 'max': 1.0,
35
+ 'default': DEFAULT_STABILITY
36
+ },
37
+ 'similarity_boost' : {
38
+ 'type': cloudlanguagetools.options.ParameterType.number.name,
39
+ 'min': 0.0,
40
+ 'max': 1.0,
41
+ 'default': DEFAULT_SIMILARITY_BOOST
42
+ },
43
+ }
44
+
30
45
  class ElevenLabsVoice(cloudlanguagetools.ttsvoice.TtsVoice):
31
46
  def __init__(self, voice_data, language: cloudlanguagetools.languages.AudioLanguage, model_id, model_short_name):
32
47
  # pprint.pprint(voice_data)
@@ -50,20 +65,7 @@ class ElevenLabsVoice(cloudlanguagetools.ttsvoice.TtsVoice):
50
65
  return f'{self.name} ({self.model_short_name})'
51
66
 
52
67
  def get_options(self):
53
- return {
54
- 'stability' : {
55
- 'type': cloudlanguagetools.options.ParameterType.number.name,
56
- 'min': 0.0,
57
- 'max': 1.0,
58
- 'default': DEFAULT_STABILITY
59
- },
60
- 'similarity_boost' : {
61
- 'type': cloudlanguagetools.options.ParameterType.number.name,
62
- 'min': 0.0,
63
- 'max': 1.0,
64
- 'default': DEFAULT_SIMILARITY_BOOST
65
- },
66
- }
68
+ return VOICE_OPTIONS
67
69
 
68
70
  class ElevenLabsService(cloudlanguagetools.service.Service):
69
71
  def __init__(self):
@@ -187,3 +189,83 @@ class ElevenLabsService(cloudlanguagetools.service.Service):
187
189
 
188
190
  return result
189
191
 
192
+
193
+ def get_tts_voice_list_v3(self) -> List[cloudlanguagetools.ttsvoice.TtsVoice_v3]:
194
+ result = []
195
+
196
+ # first, get all models to get list of languages
197
+ url = "https://api.elevenlabs.io/v1/models"
198
+ response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout)
199
+ response.raise_for_status()
200
+ model_data = response.json()
201
+
202
+ # restrict to models that can do text to speech (elevenlabs introduced voice conversion)
203
+ model_data = [model for model in model_data if model['can_do_text_to_speech']]
204
+
205
+ #pprint.pprint(model_data)
206
+ # model_data:
207
+ # [{'can_be_finetuned': True,
208
+ # 'can_do_text_to_speech': True,
209
+ # 'can_do_voice_conversion': False,
210
+ # 'description': 'Use our standard English language model to generate speech '
211
+ # 'in a variety of voices, styles and moods.',
212
+ # 'languages': [{'language_id': 'en', 'name': 'English'}],
213
+ # 'model_id': 'eleven_monolingual_v1',
214
+ # 'name': 'Eleven Monolingual v1',
215
+ # 'token_cost_factor': 1.0},
216
+ # {'can_be_finetuned': True,
217
+ # 'can_do_text_to_speech': True,
218
+ # 'can_do_voice_conversion': True,
219
+ # 'description': 'Generate lifelike speech in multiple languages and create '
220
+ # 'content that resonates with a broader audience. ',
221
+ # 'languages': [{'language_id': 'en', 'name': 'English'},
222
+ # {'language_id': 'de', 'name': 'German'},
223
+ # {'language_id': 'pl', 'name': 'Polish'},
224
+ # {'language_id': 'es', 'name': 'Spanish'},
225
+ # {'language_id': 'it', 'name': 'Italian'},
226
+ # {'language_id': 'fr', 'name': 'French'},
227
+ # {'language_id': 'pt', 'name': 'Portuguese'},
228
+ # {'language_id': 'hi', 'name': 'Hindi'}],
229
+ # 'model_id': 'eleven_multilingual_v1',
230
+ # 'name': 'Eleven Multilingual v1',
231
+ # 'token_cost_factor': 1.0}]
232
+ #
233
+
234
+
235
+ # now, retrieve voice list
236
+ # call elevenlabs API to list TTS voices
237
+ url = "https://api.elevenlabs.io/v1/voices"
238
+
239
+ response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout)
240
+ response.raise_for_status()
241
+
242
+ data = response.json()
243
+
244
+ for model in model_data:
245
+ model_id = model['model_id']
246
+ model_name = model['name']
247
+ model_short_name = model_name.replace('Eleven ', '').strip()
248
+ # for language_record in model['languages']:
249
+ for voice_data in data['voices']:
250
+ try:
251
+ languages = model['languages']
252
+ language_id_list = [language_record['language_id'] for language_record in languages]
253
+ audio_language_enum_list = [self.get_audio_language(language_id) for language_id in language_id_list]
254
+ voice = cloudlanguagetools.ttsvoice.TtsVoice_v3(
255
+ name=voice_data['name'],
256
+ voice_key={
257
+ 'voice_id': voice_data['voice_id'],
258
+ 'model_id': model_id,
259
+ },
260
+ options=VOICE_OPTIONS,
261
+ service=cloudlanguagetools.constants.Service.ElevenLabs,
262
+ gender=GENDER_MAP.get(voice_data['labels']['gender'], cloudlanguagetools.constants.Gender.Male),
263
+ audio_languages=audio_language_enum_list,
264
+ service_fee=cloudlanguagetools.constants.ServiceFee.paid
265
+ )
266
+ result.append(voice)
267
+ except Exception as e:
268
+ logger.exception(f'ElevenLabs: error processing voice_data: {voice_data}')
269
+
270
+ return result
271
+
@@ -46,7 +46,7 @@ class ForvoVoice(cloudlanguagetools.ttsvoice.TtsVoice):
46
46
  return f'{self.get_audio_language_name()}, {self.get_gender().name}, {self.service.name}'
47
47
 
48
48
  def get_voice_shortname(self):
49
- return None
49
+ return f'{self.language_code}-{self.country_code}'
50
50
 
51
51
  def get_options(self):
52
52
  return {}
@@ -57,6 +57,10 @@ class ForvoService(cloudlanguagetools.service.Service):
57
57
  def __init__(self):
58
58
  self.url_base = 'https://apicommercial.forvo.com'
59
59
  self.build_audio_language_map()
60
+
61
+ # on 2024/07, forvo started throwing some errors with SSL verification, suspect an incorrect
62
+ # setup on their side but they are taking too long to fix it.
63
+ self.verify_ssl = False
60
64
 
61
65
  def configure(self, config):
62
66
  self.key = config['key']
@@ -87,7 +91,8 @@ class ForvoService(cloudlanguagetools.service.Service):
87
91
  url = f'{self.url_base}/key/{self.key}/format/json/action/word-pronunciations/word/{encoded_text}/language/{language}{sex_param}{username_param}/order/rate-desc/limit/1{country_code}'
88
92
 
89
93
  try:
90
- response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout)
94
+ response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout,
95
+ verify=self.verify_ssl)
91
96
  response.raise_for_status()
92
97
 
93
98
  data = response.json()
@@ -98,7 +103,8 @@ class ForvoService(cloudlanguagetools.service.Service):
98
103
  audio_url = items[0]['pathmp3']
99
104
  output_temp_file = tempfile.NamedTemporaryFile()
100
105
  output_temp_filename = output_temp_file.name
101
- audio_request = requests.get(audio_url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout)
106
+ audio_request = requests.get(audio_url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout,
107
+ verify=self.verify_ssl)
102
108
  open(output_temp_filename, 'wb').write(audio_request.content)
103
109
  return output_temp_file
104
110
  except requests.exceptions.ReadTimeout as exception:
@@ -275,7 +281,8 @@ class ForvoService(cloudlanguagetools.service.Service):
275
281
 
276
282
  # https://api.forvo.com/documentation/word-pronunciations/
277
283
  url = f'{self.url_base}/key/{self.key}/format/json/action/language-list/min-pronunciations/5000'
278
- response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout)
284
+ response = requests.get(url, headers=self.get_headers(), timeout=cloudlanguagetools.constants.RequestTimeout,
285
+ verify=self.verify_ssl)
279
286
  if response.status_code == 200:
280
287
  data = response.json()
281
288
  languages = data['items']
@@ -332,6 +332,8 @@ class AudioLanguage(enum.Enum):
332
332
  id_ID = (Language.id_, "Indonesian")
333
333
  is_IS = (Language.is_, "Icelandic")
334
334
  it_IT = (Language.it, "Italian")
335
+ iu_Latn_CA = (Language.iu_latn, "Inuktitut (Latin, Canada)")
336
+ iu_Cans_CA = (Language.iu, "Inuktitut (Syllabics, Canada)")
335
337
  ja_JP = (Language.ja, "Japanese")
336
338
  jv_ID = (Language.jw, "Javanese (Indonesia)")
337
339
  ka_GE = (Language.ka, "Georgian (Georgia)")
@@ -354,6 +356,7 @@ class AudioLanguage(enum.Enum):
354
356
  ne_NP = (Language.ne, "Nepali (Nepal)")
355
357
  nl_BE = (Language.nl, "Dutch (Belgium)")
356
358
  nl_NL = (Language.nl, "Dutch (Netherlands)")
359
+ or_IN = (Language.or_, "Oriya (India)")
357
360
  pa_IN = (Language.pa, "Punjabi (India)")
358
361
  pl_PL = (Language.pl, "Polish")
359
362
  ps_AF = (Language.ps, "Pashto (Afghanistan)")
@@ -17,27 +17,7 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
  DEFAULT_TTS_SPEED = 1.0
19
19
 
20
- class OpenAIVoice(cloudlanguagetools.ttsvoice.TtsVoice):
21
- def __init__(self, name: str,
22
- audio_language: cloudlanguagetools.languages.AudioLanguage,
23
- gender: cloudlanguagetools.constants.Gender):
24
- self.name = name
25
- self.gender = gender
26
- self.audio_language = audio_language
27
- self.service = cloudlanguagetools.constants.Service.OpenAI
28
- self.service_fee = cloudlanguagetools.constants.ServiceFee.paid
29
-
30
- def get_voice_key(self):
31
- return {
32
- 'name': self.name,
33
- 'language': self.audio_language.name
34
- }
35
-
36
- def get_voice_shortname(self):
37
- return self.name
38
-
39
- def get_options(self):
40
- return {
20
+ VOICE_OPTIONS = {
41
21
  'speed' : {
42
22
  'type': cloudlanguagetools.options.ParameterType.number.name,
43
23
  'min': 0.25,
@@ -52,63 +32,9 @@ class OpenAIVoice(cloudlanguagetools.ttsvoice.TtsVoice):
52
32
  ],
53
33
  'default': cloudlanguagetools.options.AudioFormat.mp3.name
54
34
  }
55
- }
56
-
57
- class OpenAIService(cloudlanguagetools.service.Service):
58
- def __init__(self):
59
- self.chatbot_model = "gpt-3.5-turbo"
60
-
61
- def configure(self, config):
62
- self.api_key = config['api_key']
63
- self.client = OpenAI(api_key=self.api_key)
64
-
65
- def single_prompt(self, prompt, max_tokens):
66
- messages = [
67
- {'role': 'user', 'content': prompt}
68
- ]
69
-
70
- if max_tokens != None:
71
- response = self.client.chat.completions.create(model=self.chatbot_model,
72
- messages=messages,
73
- max_tokens=max_tokens)
74
- else:
75
- response = self.client.chat.completions.create(model=self.chatbot_model,
76
- messages=messages)
77
- logger.debug(pprint.pformat(response))
78
- tokens_used = response.usage.total_tokens
79
- response_text = response.choices[0].message.content
80
- return response_text, tokens_used
81
-
82
- def full_query(self, messages, max_tokens):
83
- if max_tokens != None:
84
- response = self.client.chat.completions.create(model=self.chatbot_model,
85
- messages=messages,
86
- max_tokens=max_tokens)
87
- else:
88
- response = self.client.chat.completions.create(model=self.chatbot_model,
89
- messages=messages)
90
- logger.debug(pprint.pformat(response))
91
- return response
92
-
93
- def speech_to_text(self, filepath, audio_format: cloudlanguagetools.options.AudioFormat):
35
+ }
94
36
 
95
- if audio_format in [cloudlanguagetools.options.AudioFormat.ogg_opus, cloudlanguagetools.options.AudioFormat.ogg_vorbis]:
96
- # need to convert to wav first
97
- sound = pydub.AudioSegment.from_ogg(filepath)
98
- wav_tempfile = tempfile.NamedTemporaryFile(prefix='cloudlanguagetools_OpenAI_speech_to_text', suffix='.wav')
99
- sound.export(wav_tempfile.name, format="wav")
100
- filepath = wav_tempfile.name
101
-
102
- logger.debug(f'opening file {filepath}')
103
- audio_file= open(filepath, "rb")
104
- transcript = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
105
- return transcript.text
106
-
107
-
108
- def get_tts_voice_list(self) -> List[OpenAIVoice]:
109
- result = []
110
-
111
- supported_languages = [
37
+ TTS_SUPPORTED_LANGUAGES = [
112
38
  AudioLanguage.af_ZA,
113
39
  AudioLanguage.ar_XA,
114
40
  AudioLanguage.hy_AM,
@@ -169,9 +95,85 @@ class OpenAIService(cloudlanguagetools.service.Service):
169
95
  AudioLanguage.ur_IN,
170
96
  AudioLanguage.vi_VN,
171
97
  AudioLanguage.cy_GB
172
- ]
98
+ ]
173
99
 
174
- for audio_language in supported_languages:
100
+ class OpenAIVoice(cloudlanguagetools.ttsvoice.TtsVoice):
101
+ def __init__(self, name: str,
102
+ audio_language: cloudlanguagetools.languages.AudioLanguage,
103
+ gender: cloudlanguagetools.constants.Gender):
104
+ self.name = name
105
+ self.gender = gender
106
+ self.audio_language = audio_language
107
+ self.service = cloudlanguagetools.constants.Service.OpenAI
108
+ self.service_fee = cloudlanguagetools.constants.ServiceFee.paid
109
+
110
+ def get_voice_key(self):
111
+ return {
112
+ 'name': self.name,
113
+ 'language': self.audio_language.name
114
+ }
115
+
116
+ def get_voice_shortname(self):
117
+ return self.name
118
+
119
+ def get_options(self):
120
+ return VOICE_OPTIONS
121
+
122
+ class OpenAIService(cloudlanguagetools.service.Service):
123
+ def __init__(self):
124
+ self.chatbot_model = "gpt-3.5-turbo"
125
+
126
+ def configure(self, config):
127
+ self.api_key = config['api_key']
128
+ self.client = OpenAI(api_key=self.api_key)
129
+
130
+ def single_prompt(self, prompt, max_tokens):
131
+ messages = [
132
+ {'role': 'user', 'content': prompt}
133
+ ]
134
+
135
+ if max_tokens != None:
136
+ response = self.client.chat.completions.create(model=self.chatbot_model,
137
+ messages=messages,
138
+ max_tokens=max_tokens)
139
+ else:
140
+ response = self.client.chat.completions.create(model=self.chatbot_model,
141
+ messages=messages)
142
+ logger.debug(pprint.pformat(response))
143
+ tokens_used = response.usage.total_tokens
144
+ response_text = response.choices[0].message.content
145
+ return response_text, tokens_used
146
+
147
+ def full_query(self, messages, max_tokens):
148
+ if max_tokens != None:
149
+ response = self.client.chat.completions.create(model=self.chatbot_model,
150
+ messages=messages,
151
+ max_tokens=max_tokens)
152
+ else:
153
+ response = self.client.chat.completions.create(model=self.chatbot_model,
154
+ messages=messages)
155
+ logger.debug(pprint.pformat(response))
156
+ return response
157
+
158
+ def speech_to_text(self, filepath, audio_format: cloudlanguagetools.options.AudioFormat):
159
+
160
+ if audio_format in [cloudlanguagetools.options.AudioFormat.ogg_opus, cloudlanguagetools.options.AudioFormat.ogg_vorbis]:
161
+ # need to convert to wav first
162
+ sound = pydub.AudioSegment.from_ogg(filepath)
163
+ wav_tempfile = tempfile.NamedTemporaryFile(prefix='cloudlanguagetools_OpenAI_speech_to_text', suffix='.wav')
164
+ sound.export(wav_tempfile.name, format="wav")
165
+ filepath = wav_tempfile.name
166
+
167
+ logger.debug(f'opening file {filepath}')
168
+ audio_file= open(filepath, "rb")
169
+ transcript = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
170
+ return transcript.text
171
+
172
+
173
+ def get_tts_voice_list(self) -> List[OpenAIVoice]:
174
+ result = []
175
+
176
+ for audio_language in TTS_SUPPORTED_LANGUAGES:
175
177
  result.extend([
176
178
  OpenAIVoice('alloy', audio_language, cloudlanguagetools.constants.Gender.Female),
177
179
  OpenAIVoice('echo', audio_language, cloudlanguagetools.constants.Gender.Male),
@@ -182,6 +184,32 @@ class OpenAIService(cloudlanguagetools.service.Service):
182
184
  ])
183
185
  return result
184
186
 
187
+ def build_tts_voice_v3(self, voice_name, gender):
188
+ return cloudlanguagetools.ttsvoice.TtsVoice_v3(
189
+ name=voice_name,
190
+ voice_key={
191
+ 'name': voice_name
192
+ },
193
+ options=VOICE_OPTIONS,
194
+ service=cloudlanguagetools.constants.Service.OpenAI,
195
+ gender=gender,
196
+ audio_languages=TTS_SUPPORTED_LANGUAGES,
197
+ service_fee=cloudlanguagetools.constants.ServiceFee.paid
198
+ )
199
+
200
+ def get_tts_voice_list_v3(self) -> List[cloudlanguagetools.ttsvoice.TtsVoice_v3]:
201
+ # returns list of TtsVoice_v3
202
+
203
+ result = [
204
+ self.build_tts_voice_v3('alloy', cloudlanguagetools.constants.Gender.Female),
205
+ self.build_tts_voice_v3('echo', cloudlanguagetools.constants.Gender.Male),
206
+ self.build_tts_voice_v3('fable', cloudlanguagetools.constants.Gender.Female),
207
+ self.build_tts_voice_v3('onyx', cloudlanguagetools.constants.Gender.Male),
208
+ self.build_tts_voice_v3('nova', cloudlanguagetools.constants.Gender.Female),
209
+ self.build_tts_voice_v3('shimmer', cloudlanguagetools.constants.Gender.Female)
210
+ ]
211
+ return result
212
+
185
213
  def get_tts_audio(self, text, voice_key, options):
186
214
  # https://platform.openai.com/docs/guides/text-to-speech
187
215
  # https://platform.openai.com/docs/api-reference/audio/createSpeech?lang=python
@@ -1,8 +1,10 @@
1
1
  import requests
2
2
  import tempfile
3
3
  import logging
4
+ from typing import List
4
5
 
5
6
  import cloudlanguagetools.constants
7
+ import cloudlanguagetools.ttsvoice
6
8
 
7
9
  logger = logging.getLogger(__name__)
8
10
 
@@ -40,6 +42,20 @@ class Service():
40
42
  def get_tts_voice_list(self):
41
43
  return []
42
44
 
45
+ def get_tts_voice_list_v3(self) -> List[cloudlanguagetools.ttsvoice.TtsVoice_v3]:
46
+ # the default implementation will convert list of voices to list of TtsVoice_v3
47
+ voices = self.get_tts_voice_list()
48
+ voices_v3 = [cloudlanguagetools.ttsvoice.TtsVoice_v3(
49
+ name=voice.get_voice_shortname(),
50
+ voice_key=voice.get_voice_key(),
51
+ options=voice.get_options(),
52
+ service=voice.service,
53
+ gender=voice.get_gender(),
54
+ audio_languages=[voice.audio_language],
55
+ service_fee=voice.service_fee
56
+ ) for voice in voices]
57
+ return voices_v3
58
+
43
59
  def get_translation_language_list(self):
44
60
  return []
45
61
 
@@ -152,6 +152,14 @@ class ServiceManager():
152
152
  tts_voice_list = self.get_tts_voice_list()
153
153
  return [voice.json_obj() for voice in tts_voice_list]
154
154
 
155
+ @cachetools.cached(cache=cachetools.TTLCache(maxsize=1024, ttl=cloudlanguagetools.constants.TTLCacheTimeout))
156
+ def get_tts_voice_list_v3(self):
157
+ result = []
158
+ for key, service in self.services.items():
159
+ logging.info(f'retrieving voice list from {key}')
160
+ result.extend(service.get_tts_voice_list_v3())
161
+ return result
162
+
155
163
  @cachetools.cached(cache=cachetools.TTLCache(maxsize=1024, ttl=cloudlanguagetools.constants.TTLCacheTimeout))
156
164
  def get_translation_language_list(self) -> List[cloudlanguagetools.translationlanguage.TranslationLanguage]:
157
165
  result = []
@@ -1,4 +1,8 @@
1
1
  import json
2
+ import dataclasses
3
+ from typing import List, Dict, Any
4
+ import cloudlanguagetools.constants
5
+ import cloudlanguagetools.languages
2
6
 
3
7
  class TtsVoice():
4
8
  def __init__(self):
@@ -34,4 +38,16 @@ class TtsVoice():
34
38
  }
35
39
 
36
40
  def __repr__(self):
37
- return json.dumps(self.json_obj(), indent=4, sort_keys=True, ensure_ascii=False)
41
+ return json.dumps(self.json_obj(), indent=4, sort_keys=True, ensure_ascii=False)
42
+
43
+ # this class is used with API version 3
44
+ # support for multilingual voices
45
+ @dataclasses.dataclass
46
+ class TtsVoice_v3:
47
+ name: str
48
+ voice_key: Dict[str, Any]
49
+ options: Dict[str, Dict[str, Any]]
50
+ service: cloudlanguagetools.constants.Service
51
+ gender: cloudlanguagetools.constants.Gender
52
+ audio_languages: List[cloudlanguagetools.languages.AudioLanguage]
53
+ service_fee: cloudlanguagetools.constants.ServiceFee
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloudlanguagetools
3
- Version: 11.2.1
3
+ Version: 11.3.0
4
4
  Summary: Interface with various cloud APIs for language processing such as translation, text to speech
5
5
  Home-page: https://github.com/Language-Tools/cloud-language-tools-core
6
6
  Author: Luc
@@ -6,7 +6,7 @@ from setuptools.command.install import install
6
6
  # twine upload dist/*
7
7
 
8
8
  setup(name='cloudlanguagetools',
9
- version='11.2.1',
9
+ version='11.3.0',
10
10
  description='Interface with various cloud APIs for language processing such as translation, text to speech',
11
11
  long_description=open('README.rst', encoding='utf-8').read(),
12
12
  url='https://github.com/Language-Tools/cloud-language-tools-core',