phoonnx 0.0.0a2__py3-none-any.whl → 0.0.2a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/util.py +35 -27
- phoonnx/version.py +2 -2
- {phoonnx-0.0.0a2.dist-info → phoonnx-0.0.2a1.dist-info}/METADATA +1 -1
- {phoonnx-0.0.0a2.dist-info → phoonnx-0.0.2a1.dist-info}/RECORD +6 -6
- {phoonnx-0.0.0a2.dist-info → phoonnx-0.0.2a1.dist-info}/WHEEL +0 -0
- {phoonnx-0.0.0a2.dist-info → phoonnx-0.0.2a1.dist-info}/top_level.txt +0 -0
phoonnx/util.py
CHANGED
@@ -257,9 +257,9 @@ UNITS = {
|
|
257
257
|
"en": {
|
258
258
|
"€": "euros",
|
259
259
|
"%": "per cent",
|
260
|
-
"
|
261
|
-
"
|
262
|
-
"
|
260
|
+
"°C": "degrees celsius",
|
261
|
+
"°F": "degrees fahrenheit",
|
262
|
+
"°K": "degrees kelvin",
|
263
263
|
"°": "degrees",
|
264
264
|
"$": "dollars",
|
265
265
|
"£": "pounds",
|
@@ -287,9 +287,9 @@ UNITS = {
|
|
287
287
|
"pt": {
|
288
288
|
"€": "euros",
|
289
289
|
"%": "por cento",
|
290
|
-
"
|
291
|
-
"
|
292
|
-
"
|
290
|
+
"°C": "graus celsius",
|
291
|
+
"°F": "graus fahrenheit",
|
292
|
+
"°K": "graus kelvin",
|
293
293
|
"°": "graus",
|
294
294
|
"$": "dólares",
|
295
295
|
"£": "libras",
|
@@ -308,9 +308,9 @@ UNITS = {
|
|
308
308
|
"es": {
|
309
309
|
"€": "euros",
|
310
310
|
"%": "por ciento",
|
311
|
-
"
|
312
|
-
"
|
313
|
-
"
|
311
|
+
"°C": "grados celsius",
|
312
|
+
"°F": "grados fahrenheit",
|
313
|
+
"°K": "grados kelvin",
|
314
314
|
"°": "grados",
|
315
315
|
"$": "dólares",
|
316
316
|
"£": "libras",
|
@@ -325,9 +325,9 @@ UNITS = {
|
|
325
325
|
"fr": {
|
326
326
|
"€": "euros",
|
327
327
|
"%": "pour cent",
|
328
|
-
"
|
329
|
-
"
|
330
|
-
"
|
328
|
+
"°C": "degrés celsius",
|
329
|
+
"°F": "degrés fahrenheit",
|
330
|
+
"°K": "degrés kelvin",
|
331
331
|
"°": "degrés",
|
332
332
|
"$": "dollars",
|
333
333
|
"£": "livres",
|
@@ -342,9 +342,9 @@ UNITS = {
|
|
342
342
|
"de": {
|
343
343
|
"€": "Euro",
|
344
344
|
"%": "Prozent",
|
345
|
-
"
|
346
|
-
"
|
347
|
-
"
|
345
|
+
"°C": "Grad Celsius",
|
346
|
+
"°F": "Grad Fahrenheit",
|
347
|
+
"°K": "Grad Kelvin",
|
348
348
|
"°": "Grad",
|
349
349
|
"$": "Dollar",
|
350
350
|
"£": "Pfund",
|
@@ -548,6 +548,7 @@ def _normalize_units(text: str, full_lang: str) -> str:
|
|
548
548
|
This function handles symbolic and alphanumeric units separately
|
549
549
|
to avoid issues with word boundaries.
|
550
550
|
"""
|
551
|
+
text = text.replace("º", "°") # these characters look the same... but...
|
551
552
|
lang_code = full_lang.split("-")[0]
|
552
553
|
if lang_code in UNITS:
|
553
554
|
# Determine number separators for the language
|
@@ -566,15 +567,18 @@ def _normalize_units(text: str, full_lang: str) -> str:
|
|
566
567
|
symbolic_pattern = re.compile(number_pattern_str + r"\s*(" + symbolic_pattern_str + r")", re.IGNORECASE)
|
567
568
|
|
568
569
|
def replace_symbolic(match):
|
569
|
-
|
570
|
+
number = match.group(1)
|
570
571
|
# Remove thousands separator and replace decimal separator for parsing
|
571
|
-
number
|
572
|
+
if thousands_separator in number and decimal_separator in number:
|
573
|
+
number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
|
574
|
+
elif decimal_separator != "." and decimal_separator in number:
|
575
|
+
number = number.replace(decimal_separator, ".")
|
572
576
|
unit_symbol = match.group(2)
|
573
577
|
unit_word = symbolic_units[unit_symbol]
|
574
578
|
try:
|
575
|
-
return f"{pronounce_number(float(number), full_lang)} {unit_word}"
|
579
|
+
return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
|
576
580
|
except Exception as e:
|
577
|
-
LOG.error(f"Failed to pronounce number with unit: {
|
581
|
+
LOG.error(f"Failed to pronounce number with unit: {number}{unit_symbol} - ({e})")
|
578
582
|
return match.group(0)
|
579
583
|
text = symbolic_pattern.sub(replace_symbolic, text)
|
580
584
|
|
@@ -587,12 +591,15 @@ def _normalize_units(text: str, full_lang: str) -> str:
|
|
587
591
|
re.IGNORECASE)
|
588
592
|
|
589
593
|
def replace_alphanumeric(match):
|
590
|
-
|
594
|
+
number = match.group(1)
|
591
595
|
# Remove thousands separator and replace decimal separator for parsing
|
592
|
-
number
|
596
|
+
if thousands_separator in number and decimal_separator in number:
|
597
|
+
number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
|
598
|
+
elif decimal_separator != "." and decimal_separator in number:
|
599
|
+
number = number.replace(decimal_separator, ".")
|
593
600
|
unit_symbol = match.group(2)
|
594
601
|
unit_word = alphanumeric_units[unit_symbol]
|
595
|
-
return f"{pronounce_number(float(number), full_lang)} {unit_word}"
|
602
|
+
return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
|
596
603
|
|
597
604
|
text = alphanumeric_pattern.sub(replace_alphanumeric, text)
|
598
605
|
return text
|
@@ -666,7 +673,8 @@ if __name__ == "__main__":
|
|
666
673
|
|
667
674
|
# General normalization examples
|
668
675
|
print("General English example: " + normalize('I\'m Dr. Prof. 3/3 0.5% of 12345€, 5ft, and 10kg', 'en'))
|
669
|
-
print(f"
|
676
|
+
print(f"Word Salad Portuguese (Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg', 'pt')}")
|
677
|
+
print(f"Word Salad Portuguese (Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
|
670
678
|
|
671
679
|
# Portuguese examples with comma decimal separator
|
672
680
|
print("\n--- Portuguese Decimal Separator Examples ---")
|
@@ -690,10 +698,10 @@ if __name__ == "__main__":
|
|
690
698
|
|
691
699
|
# Portuguese dates and times
|
692
700
|
print("\n--- Portuguese Date & Time Examples ---")
|
693
|
-
print(f"Portuguese date (
|
694
|
-
print(f"Portuguese ambiguous date (
|
695
|
-
print(f"Portuguese date with dashes: {normalize('O evento é no dia 25-10-2024', 'pt')}")
|
696
|
-
print(f"Portuguese military time: {normalize('O encontro é às 14h30', 'pt')}")
|
701
|
+
print(f"Portuguese date (A data é 03/08/2025): {normalize('A data é 03/08/2025', 'pt')}")
|
702
|
+
print(f"Portuguese ambiguous date (O relatório é para 15/05/2025): {normalize('O relatório é para 15/05/2025', 'pt')}")
|
703
|
+
print(f"Portuguese date with dashes (O evento é no dia 25-10-2024): {normalize('O evento é no dia 25-10-2024', 'pt')}")
|
704
|
+
print(f"Portuguese military time (O encontro é às 14h30): {normalize('O encontro é às 14h30', 'pt')}")
|
697
705
|
|
698
706
|
# Other examples
|
699
707
|
print(f"\n--- Other Examples ---")
|
phoonnx/version.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
phoonnx/config.py,sha256=bO7dx2tfLotkohict3UKlCEVm-BRFB1feYYR1HarUkk,19382
|
3
3
|
phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
|
4
|
-
phoonnx/util.py,sha256=
|
5
|
-
phoonnx/version.py,sha256=
|
4
|
+
phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
|
5
|
+
phoonnx/version.py,sha256=tnwL6H7F2rKiYfzZqG80Wt82MT0mHbzZ38RjwaJcsgs,114
|
6
6
|
phoonnx/voice.py,sha256=FR_LafK1vSi_anPERJjZBuH3Bb9vUIof0MAW6TnALlA,20024
|
7
7
|
phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
|
8
8
|
phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
|
@@ -80,7 +80,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
|
|
80
80
|
phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
|
81
81
|
phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
|
82
82
|
phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
|
83
|
-
phoonnx-0.0.
|
84
|
-
phoonnx-0.0.
|
85
|
-
phoonnx-0.0.
|
86
|
-
phoonnx-0.0.
|
83
|
+
phoonnx-0.0.2a1.dist-info/METADATA,sha256=i-4sLAwReU6JT44xM2JVBcAfKP0Rv9mMpWpiEdNUX7U,7868
|
84
|
+
phoonnx-0.0.2a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
85
|
+
phoonnx-0.0.2a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
|
86
|
+
phoonnx-0.0.2a1.dist-info/RECORD,,
|
File without changes
|
File without changes
|