batchalign 0.7.10.post2__tar.gz → 0.7.10.post5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {batchalign-0.7.10.post2/batchalign.egg-info → batchalign-0.7.10.post5}/PKG-INFO +2 -1
  2. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/utils.py +15 -0
  3. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/ud.py +2 -1
  4. batchalign-0.7.10.post5/batchalign/version +3 -0
  5. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5/batchalign.egg-info}/PKG-INFO +2 -1
  6. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/requires.txt +1 -0
  7. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/setup.py +1 -0
  8. batchalign-0.7.10.post2/batchalign/version +0 -3
  9. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/LICENSE +0 -0
  10. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/MANIFEST.in +0 -0
  11. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/README.md +0 -0
  12. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/cli.py +0 -0
  16. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/dispatch.py +0 -0
  17. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/constants.py +0 -0
  18. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/document.py +0 -0
  19. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/errors.py +0 -0
  20. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/__init__.py +0 -0
  21. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/base.py +0 -0
  22. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/__init__.py +0 -0
  23. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/file.py +0 -0
  24. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/generator.py +0 -0
  25. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/lexer.py +0 -0
  26. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/parser.py +0 -0
  27. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/utils.py +0 -0
  28. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/__init__.py +0 -0
  29. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/file.py +0 -0
  30. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/generator.py +0 -0
  31. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/parser.py +0 -0
  32. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/__init__.py +0 -0
  33. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/resolve.py +0 -0
  34. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/__init__.py +0 -0
  35. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/config.yaml +0 -0
  36. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/infer.py +0 -0
  37. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/utils.py +0 -0
  38. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/__init__.py +0 -0
  39. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/run.py +0 -0
  40. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/utils.py +0 -0
  41. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utils.py +0 -0
  42. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/__init__.py +0 -0
  43. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/dataset.py +0 -0
  44. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/execute.py +0 -0
  45. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/infer.py +0 -0
  46. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/prep.py +0 -0
  47. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/train.py +0 -0
  48. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/__init__.py +0 -0
  49. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/infer_asr.py +0 -0
  50. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/infer_fa.py +0 -0
  51. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/__init__.py +0 -0
  52. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/analysis/__init__.py +0 -0
  53. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/analysis/eval.py +0 -0
  54. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/rev.py +0 -0
  56. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  72. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  73. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  74. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  75. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  76. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  77. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/pipeline.py +0 -0
  78. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/__init__.py +0 -0
  79. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  80. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/__init__.py +0 -0
  81. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/rev_utr.py +0 -0
  82. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/utils.py +0 -0
  83. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  84. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/__init__.py +0 -0
  85. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  86. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/__init__.py +0 -0
  87. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/conftest.py +0 -0
  88. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  89. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  90. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  91. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  92. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  93. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  94. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  95. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  96. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  97. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  98. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  99. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  100. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/fixures.py +0 -0
  101. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  102. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  103. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/test_document.py +0 -0
  104. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/__init__.py +0 -0
  105. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/config.py +0 -0
  106. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/dp.py +0 -0
  107. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/utils.py +0 -0
  108. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/SOURCES.txt +0 -0
  109. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/dependency_links.txt +0 -0
  110. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/entry_points.txt +0 -0
  111. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/top_level.txt +0 -0
  112. {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.10.post2
3
+ Version: 0.7.10.post5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -32,6 +32,7 @@ Requires-Dist: setuptools
32
32
  Requires-Dist: soundfile~=0.12.0
33
33
  Requires-Dist: rich-click>=1.7.0
34
34
  Requires-Dist: typing-extensions
35
+ Requires-Dist: num2words
35
36
  Provides-Extra: dev
36
37
  Requires-Dist: pytest; extra == "dev"
37
38
  Provides-Extra: train
@@ -4,6 +4,10 @@ from batchalign.utils import *
4
4
 
5
5
  from batchalign.constants import ENDING_PUNCT
6
6
 
7
+ from num2words import num2words
8
+ import pycountry
9
+
10
+
7
11
  def retokenize(intermediate_output):
8
12
  """Retokenize the output of the ASR system from one giant blob to utterances
9
13
 
@@ -153,6 +157,17 @@ def process_generation(output, lang="eng", utterance_engine=None):
153
157
  for part in word_parts:
154
158
  final_words.append([part.strip(), [cur, cur+div]])
155
159
  cur += div
160
+
161
+ lang_2 = pycountry.languages.get(alpha_3=lang).alpha_2
162
+ def catched_num2words(i):
163
+ if not i.isdigit():
164
+ return i
165
+ try:
166
+ return num2words(i, lang=lang_2)
167
+ except NotImplementedError:
168
+ return i
169
+ final_words = [[catched_num2words(i), j] for i,j in final_words]
170
+
156
171
  # if the final words is > 300, split into n parts
157
172
  if len(final_words) > 300:
158
173
  # for each group, append
@@ -990,7 +990,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
990
990
  content.dependency = form.dependency
991
991
 
992
992
  except Exception as e:
993
- warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
993
+ pass
994
+ # warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
994
995
 
995
996
  L.debug("Stanza done.")
996
997
  return doc
@@ -0,0 +1,3 @@
1
+ 0.7.10-post.5
2
+ Janurary 29th, 2025
3
+ skipping warnings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.10.post2
3
+ Version: 0.7.10.post5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -32,6 +32,7 @@ Requires-Dist: setuptools
32
32
  Requires-Dist: soundfile~=0.12.0
33
33
  Requires-Dist: rich-click>=1.7.0
34
34
  Requires-Dist: typing-extensions
35
+ Requires-Dist: num2words
35
36
  Provides-Extra: dev
36
37
  Requires-Dist: pytest; extra == "dev"
37
38
  Provides-Extra: train
@@ -22,6 +22,7 @@ setuptools
22
22
  soundfile~=0.12.0
23
23
  rich-click>=1.7.0
24
24
  typing-extensions
25
+ num2words
25
26
 
26
27
  [dev]
27
28
  pytest
@@ -51,6 +51,7 @@ setup(
51
51
  "soundfile~=0.12.0",
52
52
  "rich-click>=1.7.0",
53
53
  "typing-extensions",
54
+ "num2words",
54
55
  ],
55
56
  extras_require={
56
57
  'dev': [
@@ -1,3 +0,0 @@
1
- 0.7.10-post.2
2
- Janurary 23th, 2025
3
- output Benchmark