phoonnx 0.1.0a3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/PKG-INFO +2 -1
  2. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/README.md +1 -0
  3. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/config.py +10 -0
  4. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/__init__.py +6 -2
  5. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/mul.py +184 -0
  6. phoonnx-0.2.0/phoonnx/phonemizers/mwl.py +36 -0
  7. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/version.py +3 -3
  8. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx.egg-info/PKG-INFO +2 -1
  9. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx.egg-info/SOURCES.txt +2 -0
  10. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx.egg-info/requires.txt +3 -0
  11. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/__init__.py +0 -0
  12. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
  13. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
  14. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
  15. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
  16. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phoneme_ids.py +0 -0
  17. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/ar.py +0 -0
  18. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/base.py +0 -0
  19. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/en.py +0 -0
  20. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/fa.py +0 -0
  21. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/gl.py +0 -0
  22. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/he.py +0 -0
  23. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/ja.py +0 -0
  24. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/ko.py +0 -0
  25. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/vi.py +0 -0
  26. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/phonemizers/zh.py +0 -0
  27. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/__init__.py +0 -0
  28. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/arpa2ipa.py +0 -0
  29. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/bw2ipa.py +0 -0
  30. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  31. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  32. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/hangul2ipa.py +0 -0
  33. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
  34. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
  35. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
  36. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
  37. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
  38. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
  39. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
  40. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
  41. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
  42. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
  43. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/__init__.py +0 -0
  44. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  45. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
  46. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
  47. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
  48. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
  49. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
  50. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
  51. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
  52. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
  53. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
  54. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
  55. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
  56. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
  57. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
  58. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
  59. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/phonikud/__init__.py +0 -0
  60. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
  61. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
  62. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
  63. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
  64. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
  65. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  66. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
  67. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/thirdparty/zh_num.py +0 -0
  68. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/util.py +0 -0
  69. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx/voice.py +0 -0
  70. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx.egg-info/dependency_links.txt +0 -0
  71. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx.egg-info/top_level.txt +0 -0
  72. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/__main__.py +0 -0
  73. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/export_onnx.py +0 -0
  74. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/norm_audio/__init__.py +0 -0
  75. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/norm_audio/trim.py +0 -0
  76. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/norm_audio/vad.py +0 -0
  77. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/preprocess.py +0 -0
  78. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/__init__.py +0 -0
  79. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/attentions.py +0 -0
  80. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/commons.py +0 -0
  81. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/config.py +0 -0
  82. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/dataset.py +0 -0
  83. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/lightning.py +0 -0
  84. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/losses.py +0 -0
  85. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/mel_processing.py +0 -0
  86. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/models.py +0 -0
  87. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/modules.py +0 -0
  88. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
  89. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
  90. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/transforms.py +0 -0
  91. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/utils.py +0 -0
  92. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/phoonnx_train/vits/wavfile.py +0 -0
  93. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/setup.cfg +0 -0
  94. {phoonnx-0.1.0a3 → phoonnx-0.2.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.1.0a3
3
+ Version: 0.2.0
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -41,6 +41,7 @@ Provides-Extra: jam
41
41
  Provides-Extra: hsn
42
42
  Provides-Extra: fil
43
43
  Provides-Extra: ml
44
+ Provides-Extra: mwl
44
45
  Provides-Extra: lez
45
46
  Provides-Extra: si
46
47
  Provides-Extra: kk
@@ -89,6 +89,7 @@ wrap, and the output alphabets they can generate.
89
89
  | **Multilingual** | `MisakiPhonemizer` | [misaki](https://github.com/hexgrad/misaki) | IPA | Misaki is a G2P engine designed for Kokoro models. |
90
90
  | **Multilingual** | `TransphonePhonemizer` | [transphone](https://github.com/xinjli/transphone) | IPA | It provides approximated phoneme tokenizers and G2P model for 7546 languages registered in the Glottolog database. |
91
91
  | **Multilingual** | `EpitranPhonemizer` | [epitran](https://github.com/dmort27/epitran) | IPA | A tool for transcribing orthographic text as IPA |
92
+ | **Mirandese (mwl)** | `MirandesePhonemizer` | [mwl_phonemizer](https://github.com/TigreGotico/mwl_phonemizer) | IPA | A tool for transcribing orthographic text as IPA |
92
93
  | **Arabic (ar)** | `MantoqPhonemizer` | [mantoq](https://github.com/mush42/mantoq) | BUCKWALTER, IPA | Translates unvoweled Arabic to phonemes, with optional conversion to IPA. |
93
94
  | **Chinese (zh)** | `JiebaPhonemizer` | [jieba](https://github.com/fxsjy/jieba) | HANZI | Segments Chinese text into words with spaces; useful for pre-processing. |
94
95
  | **Chinese (zh)** | `G2pMPhonemizer` | [g2pC](https://github.com/Kyubyong/g2pC) | IPA, Pinyin | CRF-based Grapheme-to-Phoneme converter |
@@ -45,9 +45,12 @@ class PhonemeType(str, Enum):
45
45
  MISAKI = "misaki"
46
46
  ESPEAK = "espeak"
47
47
  GRUUT = "gruut"
48
+ GORUUT = "goruut"
48
49
  EPITRAN = "epitran"
49
50
  BYT5 = "byt5"
50
51
  CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
52
+ TRANSPHONE = "transphone"
53
+ MIRANDESE = "mwl_phonemizer"
51
54
 
52
55
  DEEPPHONEMIZER = "deepphonemizer" # en
53
56
  OPENPHONEMIZER = "openphonemizer" # en
@@ -392,6 +395,7 @@ def get_phonemizer(phoneme_type: PhonemeType,
392
395
  from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
393
396
  ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
394
397
  G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
398
+ TransphonePhonemizer, MirandesePhonemizer, GoruutPhonemizer,
395
399
  GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
396
400
  KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
397
401
  CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
@@ -404,10 +408,16 @@ def get_phonemizer(phoneme_type: PhonemeType,
404
408
  phonemizer = CharsiuPhonemizer(model)
405
409
  elif phoneme_type == PhonemeType.GRUUT:
406
410
  phonemizer = GruutPhonemizer()
411
+ elif phoneme_type == PhonemeType.GORUUT:
412
+ phonemizer = GoruutPhonemizer()
407
413
  elif phoneme_type == PhonemeType.EPITRAN:
408
414
  phonemizer = EpitranPhonemizer()
409
415
  elif phoneme_type == PhonemeType.MISAKI:
410
416
  phonemizer = MisakiPhonemizer()
417
+ elif phoneme_type == PhonemeType.TRANSPHONE:
418
+ phonemizer = TransphonePhonemizer()
419
+ elif phoneme_type == PhonemeType.MIRANDESE:
420
+ phonemizer = MirandesePhonemizer()
411
421
  elif phoneme_type == PhonemeType.DEEPPHONEMIZER:
412
422
  phonemizer = DeepPhonemizer(model)
413
423
  elif phoneme_type == PhonemeType.OPENPHONEMIZER:
@@ -11,8 +11,9 @@ from phoonnx.phonemizers.ja import PyKakasiPhonemizer, CutletPhonemizer, OpenJTa
11
11
  from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
12
12
  from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
13
13
  XpinyinPhonemizer, JiebaPhonemizer)
14
- from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
15
- GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer)
14
+ from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer, GoruutPhonemizer,
15
+ GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer, TransphonePhonemizer)
16
+ from phoonnx.phonemizers.mwl import MirandesePhonemizer
16
17
 
17
18
  Phonemizer = Union[
18
19
  MisakiPhonemizer,
@@ -21,7 +22,10 @@ Phonemizer = Union[
21
22
  CharsiuPhonemizer,
22
23
  EspeakPhonemizer,
23
24
  GruutPhonemizer,
25
+ GoruutPhonemizer,
24
26
  EpitranPhonemizer,
27
+ TransphonePhonemizer,
28
+ MirandesePhonemizer,
25
29
  OpenJTaklPhonemizer,
26
30
  CutletPhonemizer,
27
31
  PyKakasiPhonemizer,
@@ -436,6 +436,187 @@ class GruutPhonemizer(BasePhonemizer):
436
436
  return pho.strip()
437
437
 
438
438
 
439
+ class GoruutPhonemizer(BasePhonemizer):
440
+ """
441
+ A phonemizer class that uses the pygoruut library to convert text into phonemes.
442
+ https://github.com/neurlang/pygoruut/
443
+ """
444
+ GORUUT_LANGS_NON_STD = [
445
+ 'BengaliDhaka', 'BengaliRahr', 'MalayArab', 'VietnameseCentral', 'VietnameseSouthern',
446
+ 'EnglishAmerican', 'EnglishBritish', 'NahuatlClassical', 'Hebrew2', 'Hebrew3',
447
+ 'MinnanTawianese', 'MinnanHokkien', 'MinnanTawianese2', 'MinnanHokkien2']
448
+ ISO639 = {
449
+ "af": "Afrikaans",
450
+ "am": "Amharic",
451
+ "ar": "Arabic",
452
+ "az": "Azerbaijani",
453
+ "be": "Belarusian",
454
+ "bn": "Bengali",
455
+ "my": "Burmese",
456
+ "ceb": "Cebuano",
457
+ "ce": "Chechen",
458
+ "zh": "ChineseMandarin",
459
+ "cs": "Czech",
460
+ "da": "Danish",
461
+ "nl": "Dutch",
462
+ "dz": "Dzongkha",
463
+ "en": "English",
464
+ "eo": "Esperanto",
465
+ "fa": "Farsi",
466
+ "fi": "Finnish",
467
+ "fr": "French",
468
+ "de": "German",
469
+ "el": "Greek",
470
+ "gu": "Gujarati",
471
+ "ha": "Hausa",
472
+ "he": "Hebrew",
473
+ "hi": "Hindi",
474
+ "hu": "Hungarian",
475
+ "is": "Icelandic",
476
+ "id": "Indonesian",
477
+ "tts": "Isan",
478
+ "it": "Italian",
479
+ "jam": "Jamaican",
480
+ "ja": "Japanese",
481
+ "jv": "Javanese",
482
+ "kk": "Kazakh",
483
+ "ko": "Korean",
484
+ "lb": "Luxembourgish",
485
+ "mk": "Macedonian",
486
+ "ml": "Malayalam",
487
+ "ms": "MalayLatin",
488
+ "mt": "Maltese",
489
+ "mr": "Marathi",
490
+ "mn": "Mongolian",
491
+ "ne": "Nepali",
492
+ "no": "Norwegian",
493
+ "ps": "Pashto",
494
+ "pl": "Polish",
495
+ "pt": "Portuguese",
496
+ "pa": "Punjabi",
497
+ "ro": "Romanian",
498
+ "ru": "Russian",
499
+ "sk": "Slovak",
500
+ "es": "Spanish",
501
+ "sw": "Swahili",
502
+ "sv": "Swedish",
503
+ "ta": "Tamil",
504
+ "te": "Telugu",
505
+ "th": "Thai",
506
+ "bo": "Tibetan",
507
+ "tr": "Turkish",
508
+ "uk": "Ukrainian",
509
+ "ur": "Urdu",
510
+ "ug": "Uyghur",
511
+ "vi": "VietnameseNorthern",
512
+ "zu": "Zulu",
513
+ "hy": "Armenian",
514
+ "eu": "Basque",
515
+ "bg": "Bulgarian",
516
+ "ca": "Catalan",
517
+ "ny": "Chichewa",
518
+ "hr": "Croatian",
519
+ "et": "Estonian",
520
+ "gl": "Galician",
521
+ "ka": "Georgian",
522
+ "km": "KhmerCentral",
523
+ "lo": "Lao",
524
+ "lv": "Latvian",
525
+ "lt": "Lithuanian",
526
+ "sr": "Serbian",
527
+ "tl": "Tagalog",
528
+ "yo": "Yoruba",
529
+ "sq": "Albanian",
530
+ "an": "Aragonese",
531
+ "as": "Assamese",
532
+ "ba": "Bashkir",
533
+ "bpy": "BishnupriyaManipuri",
534
+ "bs": "Bosnian",
535
+ "chr": "Cherokee",
536
+ "cu": "Chuvash",
537
+ "gla": "GaelicScottish",
538
+ "gle": "GaelicIrish",
539
+ "kl": "Greenlandic",
540
+ "gn": "Guarani",
541
+ "ht": "HaitianCreole",
542
+ "haw": "Hawaiian",
543
+ "io": "Ido",
544
+ "ia": "Interlingua",
545
+ "kn": "Kannada",
546
+ "quc": "Kiche",
547
+ "kok": "Konkani",
548
+ "ku": "Kurdish",
549
+ "ky": "Kyrgyz",
550
+ "qdb": "LangBelta",
551
+ "ltg": "Latgalian",
552
+ "la": "LatinClassical",
553
+ "lat": "LatinEcclesiastical",
554
+ "lfn": "LinguaFrancaNova",
555
+ "jbo": "Lojban",
556
+ "smj": "LuleSaami",
557
+ "mi": "Maori",
558
+ "nah": "NahuatlCentral",
559
+ "nci": "NahuatlMecayapan",
560
+ "ncz": "NahuatlTetelcingo",
561
+ "nog": "Nogai",
562
+ "om": "Oromo",
563
+ "pap": "Papiamento",
564
+ "qu": "Quechua",
565
+ "qya": "Quenya",
566
+ "tn": "Setswana",
567
+ "shn": "ShanTaiYai",
568
+ "sjn": "Sindarin",
569
+ "sd": "Sindhi",
570
+ "si": "Sinhala",
571
+ "sl": "Slovenian",
572
+ "tt": "Tatar",
573
+ "tk": "Turkmen",
574
+ "uz": "Uzbek",
575
+ "cyw": "WelshNorth",
576
+ "cys": "WelshSouth",
577
+ "yue": "Cantonese"
578
+ }
579
+
580
+ def __init__(self, remote_url=None):
581
+ super().__init__(Alphabet.IPA)
582
+ from pygoruut.pygoruut import Pygoruut
583
+ from pygoruut.pygoruut_languages import PygoruutLanguages
584
+
585
+ self.pygoruut_langs = PygoruutLanguages()
586
+ if remote_url is not None:
587
+ # 'https://hashtron.cloud'
588
+ self.pygoruut = Pygoruut(api=remote_url)
589
+ else:
590
+ self.pygoruut = Pygoruut()
591
+
592
+ @classmethod
593
+ def get_lang(cls, target_lang: str) -> str:
594
+ """
595
+ Validates and returns the closest supported language code.
596
+
597
+ Args:
598
+ target_lang (str): The language code to validate.
599
+
600
+ Returns:
601
+ str: The validated language code.
602
+
603
+ Raises:
604
+ ValueError: If the language code is unsupported.
605
+ """
606
+ if target_lang in cls.GORUUT_LANGS_NON_STD:
607
+ return target_lang
608
+ if target_lang.lower() == "en-us":
609
+ return 'EnglishAmerican'
610
+ if target_lang.lower() == "en-gb" or target_lang.lower() == "en-uk":
611
+ return 'EnglishBritish'
612
+ lang = cls.match_lang(target_lang, list(cls.ISO639))
613
+ return cls.ISO639[lang]
614
+
615
+ def phonemize_string(self, text: str, lang: str) -> str:
616
+ lang = self.get_lang(lang)
617
+ return str(self.pygoruut.phonemize(language=lang, sentence=text))
618
+
619
+
439
620
  class EpitranPhonemizer(BasePhonemizer):
440
621
  """
441
622
  """
@@ -1178,6 +1359,7 @@ if __name__ == "__main__":
1178
1359
  byt5 = ByT5Phonemizer()
1179
1360
  espeak = EspeakPhonemizer()
1180
1361
  gruut = GruutPhonemizer()
1362
+ goruut = GoruutPhonemizer(remote_url='https://hashtron.cloud')
1181
1363
  epitr = EpitranPhonemizer()
1182
1364
  charsiu = CharsiuPhonemizer()
1183
1365
  misaki = MisakiPhonemizer()
@@ -1194,6 +1376,7 @@ if __name__ == "__main__":
1194
1376
  phonemes1e = charsiu.phonemize(text1, lang)
1195
1377
  phonemes1f = misaki.phonemize(text1, lang)
1196
1378
  phonemes1g = tphone.phonemize(text1, lang)
1379
+ phonemes1h = goruut.phonemize(text1, lang)
1197
1380
  print(f" Espeak Phonemes: {phonemes1}")
1198
1381
  print(f" Gruut Phonemes: {phonemes1b}")
1199
1382
  print(f" byt5 Phonemes: {phonemes1c}")
@@ -1201,6 +1384,7 @@ if __name__ == "__main__":
1201
1384
  print(f" Charsiu Phonemes: {phonemes1e}")
1202
1385
  print(f" Misaki Phonemes: {phonemes1f}")
1203
1386
  print(f" Transphone Phonemes: {phonemes1g}")
1387
+ print(f" Goruut Phonemes: {phonemes1h}")
1204
1388
 
1205
1389
  lang = "nl"
1206
1390
  sentence = "DJ's en bezoekers van Tomorrowland waren woensdagavond dolblij toen het paradepaardje van het festival alsnog opende in Oostenrijk op de Mainstage.\nWant het optreden van Metallica, waar iedereen zo blij mee was, zou hoe dan ook doorgaan, aldus de DJ die het nieuws aankondigde."
@@ -0,0 +1,36 @@
1
+ from phoonnx.phonemizers.base import BasePhonemizer, Alphabet
2
+
3
+
4
+ class MirandesePhonemizer(BasePhonemizer):
5
+ _LANGS = ["mwl"]
6
+
7
+ def __init__(self):
8
+ super().__init__(Alphabet.IPA)
9
+ from mwl_phonemizer import CRFOrthoCorrector
10
+ self.pho = CRFOrthoCorrector()
11
+
12
+ @classmethod
13
+ def get_lang(cls, target_lang: str) -> str:
14
+ """
15
+ Validates and returns the closest supported language code.
16
+
17
+ Args:
18
+ target_lang (str): The language code to validate.
19
+
20
+ Returns:
21
+ str: The validated language code.
22
+
23
+ Raises:
24
+ ValueError: If the language code is unsupported.
25
+ """
26
+ return cls.match_lang(target_lang, cls._LANGS)
27
+
28
+ def phonemize_string(self, text: str, lang: str) -> str:
29
+ # Validate language is supported
30
+ lang = self.get_lang(lang)
31
+ return self.pho.phonemize_sentence(text)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ pho = MirandesePhonemizer()
36
+ print(pho.phonemize_string("ls", "mwl"))
@@ -1,10 +1,10 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
- VERSION_MINOR = 1
3
+ VERSION_MINOR = 2
4
4
  VERSION_BUILD = 0
5
- VERSION_ALPHA = 3
5
+ VERSION_ALPHA = 0
6
6
  # END_VERSION_BLOCK
7
7
 
8
8
  VERSION_STR = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}"
9
9
  if VERSION_ALPHA:
10
- VERSION_STR += f"a{VERSION_ALPHA}"
10
+ VERSION_STR += f"a{VERSION_ALPHA}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.1.0a3
3
+ Version: 0.2.0
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -41,6 +41,7 @@ Provides-Extra: jam
41
41
  Provides-Extra: hsn
42
42
  Provides-Extra: fil
43
43
  Provides-Extra: ml
44
+ Provides-Extra: mwl
44
45
  Provides-Extra: lez
45
46
  Provides-Extra: si
46
47
  Provides-Extra: kk
@@ -25,6 +25,7 @@ phoonnx/phonemizers/he.py
25
25
  phoonnx/phonemizers/ja.py
26
26
  phoonnx/phonemizers/ko.py
27
27
  phoonnx/phonemizers/mul.py
28
+ phoonnx/phonemizers/mwl.py
28
29
  phoonnx/phonemizers/vi.py
29
30
  phoonnx/phonemizers/zh.py
30
31
  phoonnx/thirdparty/__init__.py
@@ -91,6 +92,7 @@ phoonnx_train/../phoonnx/phonemizers/he.py
91
92
  phoonnx_train/../phoonnx/phonemizers/ja.py
92
93
  phoonnx_train/../phoonnx/phonemizers/ko.py
93
94
  phoonnx_train/../phoonnx/phonemizers/mul.py
95
+ phoonnx_train/../phoonnx/phonemizers/mwl.py
94
96
  phoonnx_train/../phoonnx/phonemizers/vi.py
95
97
  phoonnx_train/../phoonnx/phonemizers/zh.py
96
98
  phoonnx_train/../phoonnx/thirdparty/__init__.py
@@ -206,6 +206,9 @@ epitran
206
206
  [mt]
207
207
  epitran
208
208
 
209
+ [mwl]
210
+ mwl_phonemizer
211
+
209
212
  [my]
210
213
  epitran
211
214
 
File without changes
File without changes
File without changes
File without changes
File without changes