phoonnx 0.1.1a1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phoonnx/config.py CHANGED
@@ -45,6 +45,7 @@ class PhonemeType(str, Enum):
45
45
  MISAKI = "misaki"
46
46
  ESPEAK = "espeak"
47
47
  GRUUT = "gruut"
48
+ GORUUT = "goruut"
48
49
  EPITRAN = "epitran"
49
50
  BYT5 = "byt5"
50
51
  CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
@@ -394,7 +395,7 @@ def get_phonemizer(phoneme_type: PhonemeType,
394
395
  from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
395
396
  ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
396
397
  G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
397
- TransphonePhonemizer, MirandesePhonemizer,
398
+ TransphonePhonemizer, MirandesePhonemizer, GoruutPhonemizer,
398
399
  GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
399
400
  KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
400
401
  CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
@@ -407,6 +408,8 @@ def get_phonemizer(phoneme_type: PhonemeType,
407
408
  phonemizer = CharsiuPhonemizer(model)
408
409
  elif phoneme_type == PhonemeType.GRUUT:
409
410
  phonemizer = GruutPhonemizer()
411
+ elif phoneme_type == PhonemeType.GORUUT:
412
+ phonemizer = GoruutPhonemizer()
410
413
  elif phoneme_type == PhonemeType.EPITRAN:
411
414
  phonemizer = EpitranPhonemizer()
412
415
  elif phoneme_type == PhonemeType.MISAKI:
@@ -11,7 +11,7 @@ from phoonnx.phonemizers.ja import PyKakasiPhonemizer, CutletPhonemizer, OpenJTa
11
11
  from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
12
12
  from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
13
13
  XpinyinPhonemizer, JiebaPhonemizer)
14
- from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
14
+ from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer, GoruutPhonemizer,
15
15
  GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer, TransphonePhonemizer)
16
16
  from phoonnx.phonemizers.mwl import MirandesePhonemizer
17
17
 
@@ -22,6 +22,7 @@ Phonemizer = Union[
22
22
  CharsiuPhonemizer,
23
23
  EspeakPhonemizer,
24
24
  GruutPhonemizer,
25
+ GoruutPhonemizer,
25
26
  EpitranPhonemizer,
26
27
  TransphonePhonemizer,
27
28
  MirandesePhonemizer,
@@ -436,6 +436,187 @@ class GruutPhonemizer(BasePhonemizer):
436
436
  return pho.strip()
437
437
 
438
438
 
439
+ class GoruutPhonemizer(BasePhonemizer):
440
+ """
441
+ A phonemizer class that uses the pygoruut library to convert text into phonemes.
442
+ https://github.com/neurlang/pygoruut/
443
+ """
444
+ GORUUT_LANGS_NON_STD = [
445
+ 'BengaliDhaka', 'BengaliRahr', 'MalayArab', 'VietnameseCentral', 'VietnameseSouthern',
446
+ 'EnglishAmerican', 'EnglishBritish', 'NahuatlClassical', 'Hebrew2', 'Hebrew3',
447
+ 'MinnanTawianese', 'MinnanHokkien', 'MinnanTawianese2', 'MinnanHokkien2']
448
+ ISO639 = {
449
+ "af": "Afrikaans",
450
+ "am": "Amharic",
451
+ "ar": "Arabic",
452
+ "az": "Azerbaijani",
453
+ "be": "Belarusian",
454
+ "bn": "Bengali",
455
+ "my": "Burmese",
456
+ "ceb": "Cebuano",
457
+ "ce": "Chechen",
458
+ "zh": "ChineseMandarin",
459
+ "cs": "Czech",
460
+ "da": "Danish",
461
+ "nl": "Dutch",
462
+ "dz": "Dzongkha",
463
+ "en": "English",
464
+ "eo": "Esperanto",
465
+ "fa": "Farsi",
466
+ "fi": "Finnish",
467
+ "fr": "French",
468
+ "de": "German",
469
+ "el": "Greek",
470
+ "gu": "Gujarati",
471
+ "ha": "Hausa",
472
+ "he": "Hebrew",
473
+ "hi": "Hindi",
474
+ "hu": "Hungarian",
475
+ "is": "Icelandic",
476
+ "id": "Indonesian",
477
+ "tts": "Isan",
478
+ "it": "Italian",
479
+ "jam": "Jamaican",
480
+ "ja": "Japanese",
481
+ "jv": "Javanese",
482
+ "kk": "Kazakh",
483
+ "ko": "Korean",
484
+ "lb": "Luxembourgish",
485
+ "mk": "Macedonian",
486
+ "ml": "Malayalam",
487
+ "ms": "MalayLatin",
488
+ "mt": "Maltese",
489
+ "mr": "Marathi",
490
+ "mn": "Mongolian",
491
+ "ne": "Nepali",
492
+ "no": "Norwegian",
493
+ "ps": "Pashto",
494
+ "pl": "Polish",
495
+ "pt": "Portuguese",
496
+ "pa": "Punjabi",
497
+ "ro": "Romanian",
498
+ "ru": "Russian",
499
+ "sk": "Slovak",
500
+ "es": "Spanish",
501
+ "sw": "Swahili",
502
+ "sv": "Swedish",
503
+ "ta": "Tamil",
504
+ "te": "Telugu",
505
+ "th": "Thai",
506
+ "bo": "Tibetan",
507
+ "tr": "Turkish",
508
+ "uk": "Ukrainian",
509
+ "ur": "Urdu",
510
+ "ug": "Uyghur",
511
+ "vi": "VietnameseNorthern",
512
+ "zu": "Zulu",
513
+ "hy": "Armenian",
514
+ "eu": "Basque",
515
+ "bg": "Bulgarian",
516
+ "ca": "Catalan",
517
+ "ny": "Chichewa",
518
+ "hr": "Croatian",
519
+ "et": "Estonian",
520
+ "gl": "Galician",
521
+ "ka": "Georgian",
522
+ "km": "KhmerCentral",
523
+ "lo": "Lao",
524
+ "lv": "Latvian",
525
+ "lt": "Lithuanian",
526
+ "sr": "Serbian",
527
+ "tl": "Tagalog",
528
+ "yo": "Yoruba",
529
+ "sq": "Albanian",
530
+ "an": "Aragonese",
531
+ "as": "Assamese",
532
+ "ba": "Bashkir",
533
+ "bpy": "BishnupriyaManipuri",
534
+ "bs": "Bosnian",
535
+ "chr": "Cherokee",
536
+ "cu": "Chuvash",
537
+ "gla": "GaelicScottish",
538
+ "gle": "GaelicIrish",
539
+ "kl": "Greenlandic",
540
+ "gn": "Guarani",
541
+ "ht": "HaitianCreole",
542
+ "haw": "Hawaiian",
543
+ "io": "Ido",
544
+ "ia": "Interlingua",
545
+ "kn": "Kannada",
546
+ "quc": "Kiche",
547
+ "kok": "Konkani",
548
+ "ku": "Kurdish",
549
+ "ky": "Kyrgyz",
550
+ "qdb": "LangBelta",
551
+ "ltg": "Latgalian",
552
+ "la": "LatinClassical",
553
+ "lat": "LatinEcclesiastical",
554
+ "lfn": "LinguaFrancaNova",
555
+ "jbo": "Lojban",
556
+ "smj": "LuleSaami",
557
+ "mi": "Maori",
558
+ "nah": "NahuatlCentral",
559
+ "nci": "NahuatlMecayapan",
560
+ "ncz": "NahuatlTetelcingo",
561
+ "nog": "Nogai",
562
+ "om": "Oromo",
563
+ "pap": "Papiamento",
564
+ "qu": "Quechua",
565
+ "qya": "Quenya",
566
+ "tn": "Setswana",
567
+ "shn": "ShanTaiYai",
568
+ "sjn": "Sindarin",
569
+ "sd": "Sindhi",
570
+ "si": "Sinhala",
571
+ "sl": "Slovenian",
572
+ "tt": "Tatar",
573
+ "tk": "Turkmen",
574
+ "uz": "Uzbek",
575
+ "cyw": "WelshNorth",
576
+ "cys": "WelshSouth",
577
+ "yue": "Cantonese"
578
+ }
579
+
580
+ def __init__(self, remote_url=None):
581
+ super().__init__(Alphabet.IPA)
582
+ from pygoruut.pygoruut import Pygoruut
583
+ from pygoruut.pygoruut_languages import PygoruutLanguages
584
+
585
+ self.pygoruut_langs = PygoruutLanguages()
586
+ if remote_url is not None:
587
+ # 'https://hashtron.cloud'
588
+ self.pygoruut = Pygoruut(api=remote_url)
589
+ else:
590
+ self.pygoruut = Pygoruut()
591
+
592
+ @classmethod
593
+ def get_lang(cls, target_lang: str) -> str:
594
+ """
595
+ Validates and returns the closest supported language code.
596
+
597
+ Args:
598
+ target_lang (str): The language code to validate.
599
+
600
+ Returns:
601
+ str: The validated language code.
602
+
603
+ Raises:
604
+ ValueError: If the language code is unsupported.
605
+ """
606
+ if target_lang in cls.GORUUT_LANGS_NON_STD:
607
+ return target_lang
608
+ if target_lang.lower() == "en-us":
609
+ return 'EnglishAmerican'
610
+ if target_lang.lower() == "en-gb" or target_lang.lower() == "en-uk":
611
+ return 'EnglishBritish'
612
+ lang = cls.match_lang(target_lang, list(cls.ISO639))
613
+ return cls.ISO639[lang]
614
+
615
+ def phonemize_string(self, text: str, lang: str) -> str:
616
+ lang = self.get_lang(lang)
617
+ return str(self.pygoruut.phonemize(language=lang, sentence=text))
618
+
619
+
439
620
  class EpitranPhonemizer(BasePhonemizer):
440
621
  """
441
622
  """
@@ -1178,6 +1359,7 @@ if __name__ == "__main__":
1178
1359
  byt5 = ByT5Phonemizer()
1179
1360
  espeak = EspeakPhonemizer()
1180
1361
  gruut = GruutPhonemizer()
1362
+ goruut = GoruutPhonemizer(remote_url='https://hashtron.cloud')
1181
1363
  epitr = EpitranPhonemizer()
1182
1364
  charsiu = CharsiuPhonemizer()
1183
1365
  misaki = MisakiPhonemizer()
@@ -1194,6 +1376,7 @@ if __name__ == "__main__":
1194
1376
  phonemes1e = charsiu.phonemize(text1, lang)
1195
1377
  phonemes1f = misaki.phonemize(text1, lang)
1196
1378
  phonemes1g = tphone.phonemize(text1, lang)
1379
+ phonemes1h = goruut.phonemize(text1, lang)
1197
1380
  print(f" Espeak Phonemes: {phonemes1}")
1198
1381
  print(f" Gruut Phonemes: {phonemes1b}")
1199
1382
  print(f" byt5 Phonemes: {phonemes1c}")
@@ -1201,6 +1384,7 @@ if __name__ == "__main__":
1201
1384
  print(f" Charsiu Phonemes: {phonemes1e}")
1202
1385
  print(f" Misaki Phonemes: {phonemes1f}")
1203
1386
  print(f" Transphone Phonemes: {phonemes1g}")
1387
+ print(f" Goruut Phonemes: {phonemes1h}")
1204
1388
 
1205
1389
  lang = "nl"
1206
1390
  sentence = "DJ's en bezoekers van Tomorrowland waren woensdagavond dolblij toen het paradepaardje van het festival alsnog opende in Oostenrijk op de Mainstage.\nWant het optreden van Metallica, waar iedereen zo blij mee was, zou hoe dan ook doorgaan, aldus de DJ die het nieuws aankondigde."
@@ -1,5 +1,4 @@
1
1
  from phoonnx.phonemizers.base import BasePhonemizer, Alphabet
2
- from mwl_phonemizer import CRFOrthoCorrector
3
2
 
4
3
 
5
4
  class MirandesePhonemizer(BasePhonemizer):
@@ -7,6 +6,7 @@ class MirandesePhonemizer(BasePhonemizer):
7
6
 
8
7
  def __init__(self):
9
8
  super().__init__(Alphabet.IPA)
9
+ from mwl_phonemizer import CRFOrthoCorrector
10
10
  self.pho = CRFOrthoCorrector()
11
11
 
12
12
  @classmethod
phoonnx/version.py CHANGED
@@ -1,8 +1,8 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
- VERSION_MINOR = 1
4
- VERSION_BUILD = 1
5
- VERSION_ALPHA = 1
3
+ VERSION_MINOR = 2
4
+ VERSION_BUILD = 0
5
+ VERSION_ALPHA = 0
6
6
  # END_VERSION_BLOCK
7
7
 
8
8
  VERSION_STR = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.1.1a1
3
+ Version: 0.2.0
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -1,14 +1,14 @@
1
1
  phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- phoonnx/config.py,sha256=aGX7PWq3_nLdn3zCp0orMsdNQaiBfjZZmvGWd_VU2KQ,19858
2
+ phoonnx/config.py,sha256=DKgsU03g8jrAuMcVqbu-w3MWPXOUihFtRnavg6WGQ1Y,19983
3
3
  phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
4
4
  phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
5
- phoonnx/version.py,sha256=Ll52GTfYXTiW5lVvWRVNSo3-ay5hISE7zYWp2aFzXvQ,237
5
+ phoonnx/version.py,sha256=PnyMSsZaXZdPjuXrw8HLZfaosKeET2xuCUkvdWi6vRI,237
6
6
  phoonnx/voice.py,sha256=JXjmbrhJd4mmTiLgz4O_Pa5_rKGUC9xzuBfqxYDw3Mg,19420
7
7
  phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
8
8
  phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
9
9
  phoonnx/locale/gl/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
10
10
  phoonnx/locale/pt/phonetic_spellings.txt,sha256=KntS8QMynEJ5A3Clvcjq4qlmL-ThSbhfD6v0nKSrlqs,49
11
- phoonnx/phonemizers/__init__.py,sha256=NcI_iwtWBD2hIFTfEzLo-XOhVvmZpwAVFC3sqIFS1MY,1776
11
+ phoonnx/phonemizers/__init__.py,sha256=Ryregys3nWJrXJrOEJA-6xzOiXZWOCIwtYgd4KLW6UE,1816
12
12
  phoonnx/phonemizers/ar.py,sha256=xxILq5iyH0kcI-NqFfRK4abGtpdUbykBjt_dZmPuO2w,3216
13
13
  phoonnx/phonemizers/base.py,sha256=FHvAsvSjAl_oSa1GoeEi96CQ_JO_xkKXWq0ukuMxiuo,8660
14
14
  phoonnx/phonemizers/en.py,sha256=N2SVoVhplQao7Ej5TXbxJU-YkAgkY0Fr9iYBFnsjFSE,9271
@@ -17,8 +17,8 @@ phoonnx/phonemizers/gl.py,sha256=jEFKJJViHufZtB7lGNwWQCdWGiNKDCVZ_GRYXTaw_2c,661
17
17
  phoonnx/phonemizers/he.py,sha256=49OFS34wSFvvR9B3z2bGSzSLmlIvnn2HtkHBOkHS9Ns,1383
18
18
  phoonnx/phonemizers/ja.py,sha256=Xojsrt715ihnIiEk9K6giYqDo9Iykw-SHfIidrHtHSU,3834
19
19
  phoonnx/phonemizers/ko.py,sha256=kwWoOFqanCB8kv2JRx17A0hP78P1wbXlX6e8VBn1ezQ,2989
20
- phoonnx/phonemizers/mul.py,sha256=-h6uN_laUD-unNRGThzjyiOZpN6pSl4uinCndg5-0TA,94184
21
- phoonnx/phonemizers/mwl.py,sha256=9bwKmKQ-fXQQKK04fmKbT9QiraD0r3rKdNFZkWZP-eI,999
20
+ phoonnx/phonemizers/mul.py,sha256=Y_M5BUY4Yka6Ba62Eea1HvgC6FTrrigaulo4KNRi1vE,99580
21
+ phoonnx/phonemizers/mwl.py,sha256=xAOB1Bz_uVO14WbYlSFgvPxsezxzUKFwy6GT2mDgP2w,1007
22
22
  phoonnx/phonemizers/vi.py,sha256=_XJc-Xeawr1Lxr7o8mE_hJao1aGcj4g01XYAOxC_Scg,1311
23
23
  phoonnx/phonemizers/zh.py,sha256=88Ywq8h9LDanlyz8RHjRSCY_PRK_Dq808tBADyrgaP8,9657
24
24
  phoonnx/thirdparty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -83,7 +83,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
83
83
  phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
84
84
  phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
85
85
  phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
86
- phoonnx-0.1.1a1.dist-info/METADATA,sha256=HkE1TTQPlxfTGOzlG55_lO4BH_4mjNlhL7vr_UrAwyI,8250
87
- phoonnx-0.1.1a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
88
- phoonnx-0.1.1a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
89
- phoonnx-0.1.1a1.dist-info/RECORD,,
86
+ phoonnx-0.2.0.dist-info/METADATA,sha256=dusaBmyR3gLPkJJ4ZHCre1N_8AWsY6W-Q3kNEpImGjA,8248
87
+ phoonnx-0.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
88
+ phoonnx-0.2.0.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
89
+ phoonnx-0.2.0.dist-info/RECORD,,