batchalign 0.7.19.post10__tar.gz → 0.7.19.post12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (124) hide show
  1. {batchalign-0.7.19.post10/batchalign.egg-info → batchalign-0.7.19.post12}/PKG-INFO +1 -1
  2. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/analysis/eval.py +46 -3
  3. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/dp.py +6 -5
  4. batchalign-0.7.19.post12/batchalign/version +3 -0
  5. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.19.post10/batchalign/version +0 -3
  7. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/LICENSE +0 -0
  8. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/MANIFEST.in +0 -0
  9. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/README.md +0 -0
  10. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/document.py +0 -0
  17. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/base.py +0 -0
  62. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/__init__.py +0 -0
  63. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  64. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  65. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  66. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/retrace.py +0 -0
  67. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  68. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  69. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/cleanup/support/test.test +0 -0
  70. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/dispatch.py +0 -0
  71. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  73. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  74. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  75. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  76. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  77. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  78. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  79. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  80. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  81. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  82. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/pipeline.py +0 -0
  83. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/speaker/__init__.py +0 -0
  84. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  85. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/__init__.py +0 -0
  86. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/gtrans.py +0 -0
  87. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/seamless.py +0 -0
  88. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/translate/utils.py +0 -0
  89. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/__init__.py +0 -0
  90. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/rev_utr.py +0 -0
  91. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/utils.py +0 -0
  92. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  93. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utterance/__init__.py +0 -0
  94. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  95. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/__init__.py +0 -0
  96. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/conftest.py +0 -0
  97. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  98. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  99. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  100. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  101. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  102. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  103. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  104. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  105. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  106. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  107. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  108. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  109. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/fixures.py +0 -0
  110. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  111. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  112. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/tests/test_document.py +0 -0
  113. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/__init__.py +0 -0
  114. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/abbrev.py +0 -0
  115. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/config.py +0 -0
  116. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/names.py +0 -0
  117. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign/utils/utils.py +0 -0
  118. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign.egg-info/SOURCES.txt +0 -0
  119. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign.egg-info/dependency_links.txt +0 -0
  120. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign.egg-info/entry_points.txt +0 -0
  121. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign.egg-info/requires.txt +0 -0
  122. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/batchalign.egg-info/top_level.txt +0 -0
  123. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/setup.cfg +0 -0
  124. {batchalign-0.7.19.post10 → batchalign-0.7.19.post12}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post10
3
+ Version: 0.7.19.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -38,9 +38,47 @@ def conform(x):
38
38
  elif "wanna" == i.strip():
39
39
  result.append("want")
40
40
  result.append("to")
41
+ elif "ii" == i.strip():
42
+ result.append("i")
43
+ result.append("i")
44
+ elif "i'd" == i.strip():
45
+ result.append("i")
46
+ result.append("had")
47
+ elif "tshirts" == i.strip():
48
+ result.append("t")
49
+ result.append("shirts")
50
+ elif "tshirts" == i.strip():
51
+ result.append("t")
52
+ result.append("shirts")
53
+ elif "anytime" == i.strip():
54
+ result.append("any")
55
+ result.append("time")
56
+ elif "alright" == i.strip():
57
+ result.append("all")
58
+ result.append("right")
59
+ elif "sorta" == i.strip():
60
+ result.append("sort")
61
+ result.append("of")
62
+ elif "alrightie" == i.strip():
63
+ result.append("all")
64
+ result.append("right")
65
+ elif "mm" == i.strip():
66
+ result.append("hm")
67
+ elif "ai" == i.strip():
68
+ result.append("a")
69
+ result.append("i")
70
+ elif "this'll" == i.strip():
71
+ result.append("this")
72
+ result.append("will")
41
73
  elif "gotta" == i.strip():
42
74
  result.append("got")
43
75
  result.append("to")
76
+ elif "eh" == i.strip():
77
+ result.append("uh")
78
+ elif "kinda" == i.strip():
79
+ result.append("a")
80
+ result.append("kind")
81
+ result.append("of")
44
82
  elif "farmhouse" == i.strip():
45
83
  result.append("farm")
46
84
  result.append("house")
@@ -57,6 +95,11 @@ def conform(x):
57
95
 
58
96
  return result
59
97
 
98
+ def match_fn(x,y):
99
+ return (y == x or
100
+ y.replace("(", "").replace(")", "") == x.replace("(", "").replace(")", "") or
101
+ re.sub(r"\((.*)\)",r"", y) == x or re.sub(r"\((.*)\)",r"", x) == y)
102
+
60
103
  class EvaluationEngine(BatchalignEngine):
61
104
  tasks = [ Task.WER ]
62
105
 
@@ -69,8 +112,8 @@ class EvaluationEngine(BatchalignEngine):
69
112
  forms = [i.replace("-", "") for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
70
113
  gold_forms = [i.replace("-", "") for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
71
114
 
72
- forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
73
- gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
115
+ # forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
116
+ # gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
74
117
 
75
118
  # if there are single letter frames, we combine them tofgether
76
119
  # until the utterance is done or there isn't any left
@@ -111,7 +154,7 @@ class EvaluationEngine(BatchalignEngine):
111
154
  forms_final = conform(forms_final)
112
155
 
113
156
  # dp!
114
- alignment = align(forms_final, gold_final, False)
157
+ alignment = align(forms_final, gold_final, False, match_fn)
115
158
 
116
159
  # calculate each type of error
117
160
  sub = 0
@@ -76,7 +76,7 @@ def __serialize_arr(src, tgt):
76
76
 
77
77
  return src_serialized, tgt_serialized
78
78
 
79
- def __dp(payload, reference, t):
79
+ def __dp(payload, reference, t, match_fn):
80
80
  """Performs bottom-up dynamic programming alignment
81
81
 
82
82
  Parameters
@@ -149,7 +149,7 @@ def __dp(payload, reference, t):
149
149
  # get a match.
150
150
 
151
151
  # recall 1 indexing
152
- is_match = (reference[i-1].key == payload[j-1].key)
152
+ is_match = match_fn(reference[i-1].key, payload[j-1].key)
153
153
 
154
154
  # calculate new distances
155
155
  new_dist1 = dist1+(0 if is_match else 2)
@@ -209,15 +209,16 @@ def __dp(payload, reference, t):
209
209
 
210
210
  def align(source_payload_sequence,
211
211
  target_reference_sequence,
212
- tqdm=True):
212
+ tqdm=True,
213
+ match_fn=lambda x,y: x==y):
213
214
  """Align two sequences"""
214
215
 
215
216
  if (len(source_payload_sequence) > 0 and
216
217
  type(source_payload_sequence[0]) == PayloadTarget):
217
- return __dp(source_payload_sequence, target_reference_sequence, tqdm)
218
+ return __dp(source_payload_sequence, target_reference_sequence, tqdm, match_fn)
218
219
  else:
219
220
  return __dp(*__serialize_arr(source_payload_sequence,
220
- target_reference_sequence), tqdm)
221
+ target_reference_sequence), tqdm, match_fn)
221
222
 
222
223
  # align([1,2,3,4,4,5,5,5], [1,1,3,4,4,12,5,5,18])
223
224
 
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.12
2
+ July 10st, 2025
3
+ Fix ASR evals paren matchin
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.19.post10
3
+ Version: 0.7.19.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.19-post.10
2
- July 1st, 2025
3
- Whoops, fixed ASR regression.