batchalign 0.7.19.post11__tar.gz → 0.7.19.post12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (124) hide show
  1. {batchalign-0.7.19.post11/batchalign.egg-info → batchalign-0.7.19.post12}/PKG-INFO +2 -3
  2. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/analysis/eval.py +8 -3
  3. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/dp.py +6 -5
  4. batchalign-0.7.19.post12/batchalign/version +3 -0
  5. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12/batchalign.egg-info}/PKG-INFO +2 -3
  6. batchalign-0.7.19.post11/batchalign/version +0 -3
  7. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/LICENSE +0 -0
  8. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/MANIFEST.in +0 -0
  9. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/README.md +0 -0
  10. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/document.py +0 -0
  17. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/base.py +0 -0
  62. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/__init__.py +0 -0
  63. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  64. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  65. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  66. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/retrace.py +0 -0
  67. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  68. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  69. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/test.test +0 -0
  70. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/dispatch.py +0 -0
  71. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  73. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  74. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  75. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  76. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  77. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  78. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  79. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  80. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  81. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  82. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/pipeline.py +0 -0
  83. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/speaker/__init__.py +0 -0
  84. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  85. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/__init__.py +0 -0
  86. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/gtrans.py +0 -0
  87. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/seamless.py +0 -0
  88. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/utils.py +0 -0
  89. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/__init__.py +0 -0
  90. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/rev_utr.py +0 -0
  91. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/utils.py +0 -0
  92. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  93. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utterance/__init__.py +0 -0
  94. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  95. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/__init__.py +0 -0
  96. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/conftest.py +0 -0
  97. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  98. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  99. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  100. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  101. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  102. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  103. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  104. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  105. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  106. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  107. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  108. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  109. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/fixures.py +0 -0
  110. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  111. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  112. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/tests/test_document.py +0 -0
  113. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/__init__.py +0 -0
  114. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/abbrev.py +0 -0
  115. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/config.py +0 -0
  116. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/names.py +0 -0
  117. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign/utils/utils.py +0 -0
  118. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign.egg-info/SOURCES.txt +0 -0
  119. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign.egg-info/dependency_links.txt +0 -0
  120. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign.egg-info/entry_points.txt +0 -0
  121. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign.egg-info/requires.txt +0 -0
  122. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/batchalign.egg-info/top_level.txt +0 -0
  123. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/setup.cfg +0 -0
  124. {batchalign-0.7.19.post11 → batchalign-0.7.19.post12}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post11
3
+ Version: 0.7.19.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -47,7 +47,6 @@ Dynamic: author-email
47
47
  Dynamic: classifier
48
48
  Dynamic: description
49
49
  Dynamic: description-content-type
50
- Dynamic: license-file
51
50
  Dynamic: provides-extra
52
51
  Dynamic: requires-dist
53
52
  Dynamic: summary
@@ -95,6 +95,11 @@ def conform(x):
95
95
 
96
96
  return result
97
97
 
98
+ def match_fn(x,y):
99
+ return (y == x or
100
+ y.replace("(", "").replace(")", "") == x.replace("(", "").replace(")", "") or
101
+ re.sub(r"\((.*)\)",r"", y) == x or re.sub(r"\((.*)\)",r"", x) == y)
102
+
98
103
  class EvaluationEngine(BatchalignEngine):
99
104
  tasks = [ Task.WER ]
100
105
 
@@ -107,8 +112,8 @@ class EvaluationEngine(BatchalignEngine):
107
112
  forms = [i.replace("-", "") for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
108
113
  gold_forms = [i.replace("-", "") for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
109
114
 
110
- forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
111
- gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
115
+ # forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
116
+ # gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
112
117
 
113
118
  # if there are single letter frames, we combine them tofgether
114
119
  # until the utterance is done or there isn't any left
@@ -149,7 +154,7 @@ class EvaluationEngine(BatchalignEngine):
149
154
  forms_final = conform(forms_final)
150
155
 
151
156
  # dp!
152
- alignment = align(forms_final, gold_final, False)
157
+ alignment = align(forms_final, gold_final, False, match_fn)
153
158
 
154
159
  # calculate each type of error
155
160
  sub = 0
@@ -76,7 +76,7 @@ def __serialize_arr(src, tgt):
76
76
 
77
77
  return src_serialized, tgt_serialized
78
78
 
79
- def __dp(payload, reference, t):
79
+ def __dp(payload, reference, t, match_fn):
80
80
  """Performs bottom-up dynamic programming alignment
81
81
 
82
82
  Parameters
@@ -149,7 +149,7 @@ def __dp(payload, reference, t):
149
149
  # get a match.
150
150
 
151
151
  # recall 1 indexing
152
- is_match = (reference[i-1].key == payload[j-1].key)
152
+ is_match = match_fn(reference[i-1].key, payload[j-1].key)
153
153
 
154
154
  # calculate new distances
155
155
  new_dist1 = dist1+(0 if is_match else 2)
@@ -209,15 +209,16 @@ def __dp(payload, reference, t):
209
209
 
210
210
  def align(source_payload_sequence,
211
211
  target_reference_sequence,
212
- tqdm=True):
212
+ tqdm=True,
213
+ match_fn=lambda x,y: x==y):
213
214
  """Align two sequences"""
214
215
 
215
216
  if (len(source_payload_sequence) > 0 and
216
217
  type(source_payload_sequence[0]) == PayloadTarget):
217
- return __dp(source_payload_sequence, target_reference_sequence, tqdm)
218
+ return __dp(source_payload_sequence, target_reference_sequence, tqdm, match_fn)
218
219
  else:
219
220
  return __dp(*__serialize_arr(source_payload_sequence,
220
- target_reference_sequence), tqdm)
221
+ target_reference_sequence), tqdm, match_fn)
221
222
 
222
223
  # align([1,2,3,4,4,5,5,5], [1,1,3,4,4,12,5,5,18])
223
224
 
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.12
2
+ July 10st, 2025
3
+ Fix ASR evals paren matchin
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post11
3
+ Version: 0.7.19.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -47,7 +47,6 @@ Dynamic: author-email
47
47
  Dynamic: classifier
48
48
  Dynamic: description
49
49
  Dynamic: description-content-type
50
- Dynamic: license-file
51
50
  Dynamic: provides-extra
52
51
  Dynamic: requires-dist
53
52
  Dynamic: summary
@@ -1,3 +0,0 @@
1
- 0.7.19-post.11
2
- July 8st, 2025
3
- benchmarking changes