phoonnx 0.0.0a2__py3-none-any.whl → 0.0.2a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phoonnx/util.py CHANGED
@@ -257,9 +257,9 @@ UNITS = {
257
257
  "en": {
258
258
  "€": "euros",
259
259
  "%": "per cent",
260
- "ºC": "degrees celsius",
261
- "ºF": "degrees fahrenheit",
262
- "ºK": "degrees kelvin",
260
+ "°C": "degrees celsius",
261
+ "°F": "degrees fahrenheit",
262
+ "°K": "degrees kelvin",
263
263
  "°": "degrees",
264
264
  "$": "dollars",
265
265
  "£": "pounds",
@@ -287,9 +287,9 @@ UNITS = {
287
287
  "pt": {
288
288
  "€": "euros",
289
289
  "%": "por cento",
290
- "ºC": "graus celsius",
291
- "ºF": "graus fahrenheit",
292
- "ºK": "graus kelvin",
290
+ "°C": "graus celsius",
291
+ "°F": "graus fahrenheit",
292
+ "°K": "graus kelvin",
293
293
  "°": "graus",
294
294
  "$": "dólares",
295
295
  "£": "libras",
@@ -308,9 +308,9 @@ UNITS = {
308
308
  "es": {
309
309
  "€": "euros",
310
310
  "%": "por ciento",
311
- "ºC": "grados celsius",
312
- "ºF": "grados fahrenheit",
313
- "ºK": "grados kelvin",
311
+ "°C": "grados celsius",
312
+ "°F": "grados fahrenheit",
313
+ "°K": "grados kelvin",
314
314
  "°": "grados",
315
315
  "$": "dólares",
316
316
  "£": "libras",
@@ -325,9 +325,9 @@ UNITS = {
325
325
  "fr": {
326
326
  "€": "euros",
327
327
  "%": "pour cent",
328
- "ºC": "degrés celsius",
329
- "ºF": "degrés fahrenheit",
330
- "ºK": "degrés kelvin",
328
+ "°C": "degrés celsius",
329
+ "°F": "degrés fahrenheit",
330
+ "°K": "degrés kelvin",
331
331
  "°": "degrés",
332
332
  "$": "dollars",
333
333
  "£": "livres",
@@ -342,9 +342,9 @@ UNITS = {
342
342
  "de": {
343
343
  "€": "Euro",
344
344
  "%": "Prozent",
345
- "ºC": "Grad Celsius",
346
- "ºF": "Grad Fahrenheit",
347
- "ºK": "Grad Kelvin",
345
+ "°C": "Grad Celsius",
346
+ "°F": "Grad Fahrenheit",
347
+ "°K": "Grad Kelvin",
348
348
  "°": "Grad",
349
349
  "$": "Dollar",
350
350
  "£": "Pfund",
@@ -548,6 +548,7 @@ def _normalize_units(text: str, full_lang: str) -> str:
548
548
  This function handles symbolic and alphanumeric units separately
549
549
  to avoid issues with word boundaries.
550
550
  """
551
+ text = text.replace("º", "°") # these characters look the same... but...
551
552
  lang_code = full_lang.split("-")[0]
552
553
  if lang_code in UNITS:
553
554
  # Determine number separators for the language
@@ -566,15 +567,18 @@ def _normalize_units(text: str, full_lang: str) -> str:
566
567
  symbolic_pattern = re.compile(number_pattern_str + r"\s*(" + symbolic_pattern_str + r")", re.IGNORECASE)
567
568
 
568
569
  def replace_symbolic(match):
569
- number_str = match.group(1)
570
+ number = match.group(1)
570
571
  # Remove thousands separator and replace decimal separator for parsing
571
- number = number_str.replace(thousands_separator, "").replace(decimal_separator, ".")
572
+ if thousands_separator in number and decimal_separator in number:
573
+ number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
574
+ elif decimal_separator != "." and decimal_separator in number:
575
+ number = number.replace(decimal_separator, ".")
572
576
  unit_symbol = match.group(2)
573
577
  unit_word = symbolic_units[unit_symbol]
574
578
  try:
575
- return f"{pronounce_number(float(number), full_lang)} {unit_word}"
579
+ return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
576
580
  except Exception as e:
577
- LOG.error(f"Failed to pronounce number with unit: {number_str}{unit_symbol} - ({e})")
581
+ LOG.error(f"Failed to pronounce number with unit: {number}{unit_symbol} - ({e})")
578
582
  return match.group(0)
579
583
  text = symbolic_pattern.sub(replace_symbolic, text)
580
584
 
@@ -587,12 +591,15 @@ def _normalize_units(text: str, full_lang: str) -> str:
587
591
  re.IGNORECASE)
588
592
 
589
593
  def replace_alphanumeric(match):
590
- number_str = match.group(1)
594
+ number = match.group(1)
591
595
  # Remove thousands separator and replace decimal separator for parsing
592
- number = number_str.replace(thousands_separator, "").replace(decimal_separator, ".")
596
+ if thousands_separator in number and decimal_separator in number:
597
+ number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
598
+ elif decimal_separator != "." and decimal_separator in number:
599
+ number = number.replace(decimal_separator, ".")
593
600
  unit_symbol = match.group(2)
594
601
  unit_word = alphanumeric_units[unit_symbol]
595
- return f"{pronounce_number(float(number), full_lang)} {unit_word}"
602
+ return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
596
603
 
597
604
  text = alphanumeric_pattern.sub(replace_alphanumeric, text)
598
605
  return text
@@ -666,7 +673,8 @@ if __name__ == "__main__":
666
673
 
667
674
  # General normalization examples
668
675
  print("General English example: " + normalize('I\'m Dr. Prof. 3/3 0.5% of 12345€, 5ft, and 10kg', 'en'))
669
- print(f"General Portuguese example: {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
676
+ print(f"Word Salad Portuguese (Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg', 'pt')}")
677
+ print(f"Word Salad Portuguese (Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
670
678
 
671
679
  # Portuguese examples with comma decimal separator
672
680
  print("\n--- Portuguese Decimal Separator Examples ---")
@@ -690,10 +698,10 @@ if __name__ == "__main__":
690
698
 
691
699
  # Portuguese dates and times
692
700
  print("\n--- Portuguese Date & Time Examples ---")
693
- print(f"Portuguese date (DMY format): {normalize('A data é 03/08/2025', 'pt')}")
694
- print(f"Portuguese ambiguous date (DMY assumed): {normalize('O relatório é para 15/05/2025', 'pt')}")
695
- print(f"Portuguese date with dashes: {normalize('O evento é no dia 25-10-2024', 'pt')}")
696
- print(f"Portuguese military time: {normalize('O encontro é às 14h30', 'pt')}")
701
+ print(f"Portuguese date (A data é 03/08/2025): {normalize('A data é 03/08/2025', 'pt')}")
702
+ print(f"Portuguese ambiguous date (O relatório é para 15/05/2025): {normalize('O relatório é para 15/05/2025', 'pt')}")
703
+ print(f"Portuguese date with dashes (O evento é no dia 25-10-2024): {normalize('O evento é no dia 25-10-2024', 'pt')}")
704
+ print(f"Portuguese military time (O encontro é às 14h30): {normalize('O encontro é às 14h30', 'pt')}")
697
705
 
698
706
  # Other examples
699
707
  print(f"\n--- Other Examples ---")
phoonnx/version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 0
4
- VERSION_BUILD = 0
5
- VERSION_ALPHA = 2
4
+ VERSION_BUILD = 2
5
+ VERSION_ALPHA = 1
6
6
  # END_VERSION_BLOCK
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.0.0a2
3
+ Version: 0.0.2a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -1,8 +1,8 @@
1
1
  phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  phoonnx/config.py,sha256=bO7dx2tfLotkohict3UKlCEVm-BRFB1feYYR1HarUkk,19382
3
3
  phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
4
- phoonnx/util.py,sha256=3DqSRwsF498oP_uFVZnriP9PyjXhCfEy97Ey0ePTEqA,25042
5
- phoonnx/version.py,sha256=dR5gxHjK1GpeSbtoOSuQmJ4k9oUkkoIYYAwIJlvkQAk,114
4
+ phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
5
+ phoonnx/version.py,sha256=tnwL6H7F2rKiYfzZqG80Wt82MT0mHbzZ38RjwaJcsgs,114
6
6
  phoonnx/voice.py,sha256=FR_LafK1vSi_anPERJjZBuH3Bb9vUIof0MAW6TnALlA,20024
7
7
  phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
8
8
  phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
@@ -80,7 +80,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
80
80
  phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
81
81
  phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
82
82
  phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
83
- phoonnx-0.0.0a2.dist-info/METADATA,sha256=6Vy9IlAd8I2pFPa-NyzdYCNTyUTDxfShyd1DVLrMzpQ,7868
84
- phoonnx-0.0.0a2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
- phoonnx-0.0.0a2.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
86
- phoonnx-0.0.0a2.dist-info/RECORD,,
83
+ phoonnx-0.0.2a1.dist-info/METADATA,sha256=i-4sLAwReU6JT44xM2JVBcAfKP0Rv9mMpWpiEdNUX7U,7868
84
+ phoonnx-0.0.2a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
+ phoonnx-0.0.2a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
86
+ phoonnx-0.0.2a1.dist-info/RECORD,,