phoonnx 0.1.0a3__py3-none-any.whl → 0.2.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phoonnx/config.py CHANGED
@@ -45,9 +45,12 @@ class PhonemeType(str, Enum):
45
45
  MISAKI = "misaki"
46
46
  ESPEAK = "espeak"
47
47
  GRUUT = "gruut"
48
+ GORUUT = "goruut"
48
49
  EPITRAN = "epitran"
49
50
  BYT5 = "byt5"
50
51
  CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
52
+ TRANSPHONE = "transphone"
53
+ MIRANDESE = "mwl_phonemizer"
51
54
 
52
55
  DEEPPHONEMIZER = "deepphonemizer" # en
53
56
  OPENPHONEMIZER = "openphonemizer" # en
@@ -392,6 +395,7 @@ def get_phonemizer(phoneme_type: PhonemeType,
392
395
  from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
393
396
  ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
394
397
  G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
398
+ TransphonePhonemizer, MirandesePhonemizer, GoruutPhonemizer,
395
399
  GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
396
400
  KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
397
401
  CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
@@ -404,10 +408,16 @@ def get_phonemizer(phoneme_type: PhonemeType,
404
408
  phonemizer = CharsiuPhonemizer(model)
405
409
  elif phoneme_type == PhonemeType.GRUUT:
406
410
  phonemizer = GruutPhonemizer()
411
+ elif phoneme_type == PhonemeType.GORUUT:
412
+ phonemizer = GoruutPhonemizer()
407
413
  elif phoneme_type == PhonemeType.EPITRAN:
408
414
  phonemizer = EpitranPhonemizer()
409
415
  elif phoneme_type == PhonemeType.MISAKI:
410
416
  phonemizer = MisakiPhonemizer()
417
+ elif phoneme_type == PhonemeType.TRANSPHONE:
418
+ phonemizer = TransphonePhonemizer()
419
+ elif phoneme_type == PhonemeType.MIRANDESE:
420
+ phonemizer = MirandesePhonemizer()
411
421
  elif phoneme_type == PhonemeType.DEEPPHONEMIZER:
412
422
  phonemizer = DeepPhonemizer(model)
413
423
  elif phoneme_type == PhonemeType.OPENPHONEMIZER:
@@ -11,8 +11,9 @@ from phoonnx.phonemizers.ja import PyKakasiPhonemizer, CutletPhonemizer, OpenJTa
11
11
  from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
12
12
  from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
13
13
  XpinyinPhonemizer, JiebaPhonemizer)
14
- from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
15
- GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer)
14
+ from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer, GoruutPhonemizer,
15
+ GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer, TransphonePhonemizer)
16
+ from phoonnx.phonemizers.mwl import MirandesePhonemizer
16
17
 
17
18
  Phonemizer = Union[
18
19
  MisakiPhonemizer,
@@ -21,7 +22,10 @@ Phonemizer = Union[
21
22
  CharsiuPhonemizer,
22
23
  EspeakPhonemizer,
23
24
  GruutPhonemizer,
25
+ GoruutPhonemizer,
24
26
  EpitranPhonemizer,
27
+ TransphonePhonemizer,
28
+ MirandesePhonemizer,
25
29
  OpenJTaklPhonemizer,
26
30
  CutletPhonemizer,
27
31
  PyKakasiPhonemizer,
@@ -436,6 +436,187 @@ class GruutPhonemizer(BasePhonemizer):
436
436
  return pho.strip()
437
437
 
438
438
 
439
+ class GoruutPhonemizer(BasePhonemizer):
440
+ """
441
+ A phonemizer class that uses the pygoruut library to convert text into phonemes.
442
+ https://github.com/neurlang/pygoruut/
443
+ """
444
+ GORUUT_LANGS_NON_STD = [
445
+ 'BengaliDhaka', 'BengaliRahr', 'MalayArab', 'VietnameseCentral', 'VietnameseSouthern',
446
+ 'EnglishAmerican', 'EnglishBritish', 'NahuatlClassical', 'Hebrew2', 'Hebrew3',
447
+ 'MinnanTawianese', 'MinnanHokkien', 'MinnanTawianese2', 'MinnanHokkien2']
448
+ ISO639 = {
449
+ "af": "Afrikaans",
450
+ "am": "Amharic",
451
+ "ar": "Arabic",
452
+ "az": "Azerbaijani",
453
+ "be": "Belarusian",
454
+ "bn": "Bengali",
455
+ "my": "Burmese",
456
+ "ceb": "Cebuano",
457
+ "ce": "Chechen",
458
+ "zh": "ChineseMandarin",
459
+ "cs": "Czech",
460
+ "da": "Danish",
461
+ "nl": "Dutch",
462
+ "dz": "Dzongkha",
463
+ "en": "English",
464
+ "eo": "Esperanto",
465
+ "fa": "Farsi",
466
+ "fi": "Finnish",
467
+ "fr": "French",
468
+ "de": "German",
469
+ "el": "Greek",
470
+ "gu": "Gujarati",
471
+ "ha": "Hausa",
472
+ "he": "Hebrew",
473
+ "hi": "Hindi",
474
+ "hu": "Hungarian",
475
+ "is": "Icelandic",
476
+ "id": "Indonesian",
477
+ "tts": "Isan",
478
+ "it": "Italian",
479
+ "jam": "Jamaican",
480
+ "ja": "Japanese",
481
+ "jv": "Javanese",
482
+ "kk": "Kazakh",
483
+ "ko": "Korean",
484
+ "lb": "Luxembourgish",
485
+ "mk": "Macedonian",
486
+ "ml": "Malayalam",
487
+ "ms": "MalayLatin",
488
+ "mt": "Maltese",
489
+ "mr": "Marathi",
490
+ "mn": "Mongolian",
491
+ "ne": "Nepali",
492
+ "no": "Norwegian",
493
+ "ps": "Pashto",
494
+ "pl": "Polish",
495
+ "pt": "Portuguese",
496
+ "pa": "Punjabi",
497
+ "ro": "Romanian",
498
+ "ru": "Russian",
499
+ "sk": "Slovak",
500
+ "es": "Spanish",
501
+ "sw": "Swahili",
502
+ "sv": "Swedish",
503
+ "ta": "Tamil",
504
+ "te": "Telugu",
505
+ "th": "Thai",
506
+ "bo": "Tibetan",
507
+ "tr": "Turkish",
508
+ "uk": "Ukrainian",
509
+ "ur": "Urdu",
510
+ "ug": "Uyghur",
511
+ "vi": "VietnameseNorthern",
512
+ "zu": "Zulu",
513
+ "hy": "Armenian",
514
+ "eu": "Basque",
515
+ "bg": "Bulgarian",
516
+ "ca": "Catalan",
517
+ "ny": "Chichewa",
518
+ "hr": "Croatian",
519
+ "et": "Estonian",
520
+ "gl": "Galician",
521
+ "ka": "Georgian",
522
+ "km": "KhmerCentral",
523
+ "lo": "Lao",
524
+ "lv": "Latvian",
525
+ "lt": "Lithuanian",
526
+ "sr": "Serbian",
527
+ "tl": "Tagalog",
528
+ "yo": "Yoruba",
529
+ "sq": "Albanian",
530
+ "an": "Aragonese",
531
+ "as": "Assamese",
532
+ "ba": "Bashkir",
533
+ "bpy": "BishnupriyaManipuri",
534
+ "bs": "Bosnian",
535
+ "chr": "Cherokee",
536
+ "cu": "Chuvash",
537
+ "gla": "GaelicScottish",
538
+ "gle": "GaelicIrish",
539
+ "kl": "Greenlandic",
540
+ "gn": "Guarani",
541
+ "ht": "HaitianCreole",
542
+ "haw": "Hawaiian",
543
+ "io": "Ido",
544
+ "ia": "Interlingua",
545
+ "kn": "Kannada",
546
+ "quc": "Kiche",
547
+ "kok": "Konkani",
548
+ "ku": "Kurdish",
549
+ "ky": "Kyrgyz",
550
+ "qdb": "LangBelta",
551
+ "ltg": "Latgalian",
552
+ "la": "LatinClassical",
553
+ "lat": "LatinEcclesiastical",
554
+ "lfn": "LinguaFrancaNova",
555
+ "jbo": "Lojban",
556
+ "smj": "LuleSaami",
557
+ "mi": "Maori",
558
+ "nah": "NahuatlCentral",
559
+ "nci": "NahuatlMecayapan",
560
+ "ncz": "NahuatlTetelcingo",
561
+ "nog": "Nogai",
562
+ "om": "Oromo",
563
+ "pap": "Papiamento",
564
+ "qu": "Quechua",
565
+ "qya": "Quenya",
566
+ "tn": "Setswana",
567
+ "shn": "ShanTaiYai",
568
+ "sjn": "Sindarin",
569
+ "sd": "Sindhi",
570
+ "si": "Sinhala",
571
+ "sl": "Slovenian",
572
+ "tt": "Tatar",
573
+ "tk": "Turkmen",
574
+ "uz": "Uzbek",
575
+ "cyw": "WelshNorth",
576
+ "cys": "WelshSouth",
577
+ "yue": "Cantonese"
578
+ }
579
+
580
+ def __init__(self, remote_url=None):
581
+ super().__init__(Alphabet.IPA)
582
+ from pygoruut.pygoruut import Pygoruut
583
+ from pygoruut.pygoruut_languages import PygoruutLanguages
584
+
585
+ self.pygoruut_langs = PygoruutLanguages()
586
+ if remote_url is not None:
587
+ # 'https://hashtron.cloud'
588
+ self.pygoruut = Pygoruut(api=remote_url)
589
+ else:
590
+ self.pygoruut = Pygoruut()
591
+
592
+ @classmethod
593
+ def get_lang(cls, target_lang: str) -> str:
594
+ """
595
+ Validates and returns the closest supported language code.
596
+
597
+ Args:
598
+ target_lang (str): The language code to validate.
599
+
600
+ Returns:
601
+ str: The validated language code.
602
+
603
+ Raises:
604
+ ValueError: If the language code is unsupported.
605
+ """
606
+ if target_lang in cls.GORUUT_LANGS_NON_STD:
607
+ return target_lang
608
+ if target_lang.lower() == "en-us":
609
+ return 'EnglishAmerican'
610
+ if target_lang.lower() == "en-gb" or target_lang.lower() == "en-uk":
611
+ return 'EnglishBritish'
612
+ lang = cls.match_lang(target_lang, list(cls.ISO639))
613
+ return cls.ISO639[lang]
614
+
615
+ def phonemize_string(self, text: str, lang: str) -> str:
616
+ lang = self.get_lang(lang)
617
+ return str(self.pygoruut.phonemize(language=lang, sentence=text))
618
+
619
+
439
620
  class EpitranPhonemizer(BasePhonemizer):
440
621
  """
441
622
  """
@@ -1178,6 +1359,7 @@ if __name__ == "__main__":
1178
1359
  byt5 = ByT5Phonemizer()
1179
1360
  espeak = EspeakPhonemizer()
1180
1361
  gruut = GruutPhonemizer()
1362
+ goruut = GoruutPhonemizer(remote_url='https://hashtron.cloud')
1181
1363
  epitr = EpitranPhonemizer()
1182
1364
  charsiu = CharsiuPhonemizer()
1183
1365
  misaki = MisakiPhonemizer()
@@ -1194,6 +1376,7 @@ if __name__ == "__main__":
1194
1376
  phonemes1e = charsiu.phonemize(text1, lang)
1195
1377
  phonemes1f = misaki.phonemize(text1, lang)
1196
1378
  phonemes1g = tphone.phonemize(text1, lang)
1379
+ phonemes1h = goruut.phonemize(text1, lang)
1197
1380
  print(f" Espeak Phonemes: {phonemes1}")
1198
1381
  print(f" Gruut Phonemes: {phonemes1b}")
1199
1382
  print(f" byt5 Phonemes: {phonemes1c}")
@@ -1201,6 +1384,7 @@ if __name__ == "__main__":
1201
1384
  print(f" Charsiu Phonemes: {phonemes1e}")
1202
1385
  print(f" Misaki Phonemes: {phonemes1f}")
1203
1386
  print(f" Transphone Phonemes: {phonemes1g}")
1387
+ print(f" Goruut Phonemes: {phonemes1h}")
1204
1388
 
1205
1389
  lang = "nl"
1206
1390
  sentence = "DJ's en bezoekers van Tomorrowland waren woensdagavond dolblij toen het paradepaardje van het festival alsnog opende in Oostenrijk op de Mainstage.\nWant het optreden van Metallica, waar iedereen zo blij mee was, zou hoe dan ook doorgaan, aldus de DJ die het nieuws aankondigde."
@@ -0,0 +1,36 @@
1
+ from phoonnx.phonemizers.base import BasePhonemizer, Alphabet
2
+ from mwl_phonemizer import CRFOrthoCorrector
3
+
4
+
5
+ class MirandesePhonemizer(BasePhonemizer):
6
+ _LANGS = ["mwl"]
7
+
8
+ def __init__(self):
9
+ super().__init__(Alphabet.IPA)
10
+ self.pho = CRFOrthoCorrector()
11
+
12
+ @classmethod
13
+ def get_lang(cls, target_lang: str) -> str:
14
+ """
15
+ Validates and returns the closest supported language code.
16
+
17
+ Args:
18
+ target_lang (str): The language code to validate.
19
+
20
+ Returns:
21
+ str: The validated language code.
22
+
23
+ Raises:
24
+ ValueError: If the language code is unsupported.
25
+ """
26
+ return cls.match_lang(target_lang, cls._LANGS)
27
+
28
+ def phonemize_string(self, text: str, lang: str) -> str:
29
+ # Validate language is supported
30
+ lang = self.get_lang(lang)
31
+ return self.pho.phonemize_sentence(text)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ pho = MirandesePhonemizer()
36
+ print(pho.phonemize_string("ls", "mwl"))
phoonnx/version.py CHANGED
@@ -1,10 +1,10 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
- VERSION_MINOR = 1
3
+ VERSION_MINOR = 2
4
4
  VERSION_BUILD = 0
5
- VERSION_ALPHA = 3
5
+ VERSION_ALPHA = 1
6
6
  # END_VERSION_BLOCK
7
7
 
8
8
  VERSION_STR = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}"
9
9
  if VERSION_ALPHA:
10
- VERSION_STR += f"a{VERSION_ALPHA}"
10
+ VERSION_STR += f"a{VERSION_ALPHA}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.1.0a3
3
+ Version: 0.2.0a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -150,6 +150,8 @@ Provides-Extra: ms
150
150
  Requires-Dist: epitran; extra == "ms"
151
151
  Provides-Extra: mt
152
152
  Requires-Dist: epitran; extra == "mt"
153
+ Provides-Extra: mwl
154
+ Requires-Dist: mwl-phonemizer; extra == "mwl"
153
155
  Provides-Extra: my
154
156
  Requires-Dist: epitran; extra == "my"
155
157
  Provides-Extra: nan
@@ -1,14 +1,14 @@
1
1
  phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- phoonnx/config.py,sha256=IYhC-kYjLgYmBroId6YeOE2Vp7SMNGtiGqIIe_09NJk,19531
2
+ phoonnx/config.py,sha256=DKgsU03g8jrAuMcVqbu-w3MWPXOUihFtRnavg6WGQ1Y,19983
3
3
  phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
4
4
  phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
5
- phoonnx/version.py,sha256=WnY5J2wtSTore9QbKwfk04gQhBsYq4HVmV5CBjEhGnk,236
5
+ phoonnx/version.py,sha256=pjMhhxCQpOnjMJmb_1XE4wD6sGrE3QOU71jk4mAIZTQ,237
6
6
  phoonnx/voice.py,sha256=JXjmbrhJd4mmTiLgz4O_Pa5_rKGUC9xzuBfqxYDw3Mg,19420
7
7
  phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
8
8
  phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
9
9
  phoonnx/locale/gl/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
10
10
  phoonnx/locale/pt/phonetic_spellings.txt,sha256=KntS8QMynEJ5A3Clvcjq4qlmL-ThSbhfD6v0nKSrlqs,49
11
- phoonnx/phonemizers/__init__.py,sha256=QGBZk0QUgJdg2MwUWY9Kpk6ucwrEJYtHb07YcNvXCV4,1647
11
+ phoonnx/phonemizers/__init__.py,sha256=Ryregys3nWJrXJrOEJA-6xzOiXZWOCIwtYgd4KLW6UE,1816
12
12
  phoonnx/phonemizers/ar.py,sha256=xxILq5iyH0kcI-NqFfRK4abGtpdUbykBjt_dZmPuO2w,3216
13
13
  phoonnx/phonemizers/base.py,sha256=FHvAsvSjAl_oSa1GoeEi96CQ_JO_xkKXWq0ukuMxiuo,8660
14
14
  phoonnx/phonemizers/en.py,sha256=N2SVoVhplQao7Ej5TXbxJU-YkAgkY0Fr9iYBFnsjFSE,9271
@@ -17,7 +17,8 @@ phoonnx/phonemizers/gl.py,sha256=jEFKJJViHufZtB7lGNwWQCdWGiNKDCVZ_GRYXTaw_2c,661
17
17
  phoonnx/phonemizers/he.py,sha256=49OFS34wSFvvR9B3z2bGSzSLmlIvnn2HtkHBOkHS9Ns,1383
18
18
  phoonnx/phonemizers/ja.py,sha256=Xojsrt715ihnIiEk9K6giYqDo9Iykw-SHfIidrHtHSU,3834
19
19
  phoonnx/phonemizers/ko.py,sha256=kwWoOFqanCB8kv2JRx17A0hP78P1wbXlX6e8VBn1ezQ,2989
20
- phoonnx/phonemizers/mul.py,sha256=-h6uN_laUD-unNRGThzjyiOZpN6pSl4uinCndg5-0TA,94184
20
+ phoonnx/phonemizers/mul.py,sha256=Y_M5BUY4Yka6Ba62Eea1HvgC6FTrrigaulo4KNRi1vE,99580
21
+ phoonnx/phonemizers/mwl.py,sha256=9bwKmKQ-fXQQKK04fmKbT9QiraD0r3rKdNFZkWZP-eI,999
21
22
  phoonnx/phonemizers/vi.py,sha256=_XJc-Xeawr1Lxr7o8mE_hJao1aGcj4g01XYAOxC_Scg,1311
22
23
  phoonnx/phonemizers/zh.py,sha256=88Ywq8h9LDanlyz8RHjRSCY_PRK_Dq808tBADyrgaP8,9657
23
24
  phoonnx/thirdparty/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -82,7 +83,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
82
83
  phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
83
84
  phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
84
85
  phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
85
- phoonnx-0.1.0a3.dist-info/METADATA,sha256=3U1Ea0g2HxtWPsIs7NCxzPdo7ZTr4s_lRs9gIOC6MWY,8184
86
- phoonnx-0.1.0a3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
87
- phoonnx-0.1.0a3.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
88
- phoonnx-0.1.0a3.dist-info/RECORD,,
86
+ phoonnx-0.2.0a1.dist-info/METADATA,sha256=YzTNDisiyAKoRj_Ig13nUXwtcL0mV4AaABva5c7OYOo,8250
87
+ phoonnx-0.2.0a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
88
+ phoonnx-0.2.0a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
89
+ phoonnx-0.2.0a1.dist-info/RECORD,,