batchalign 0.7.20.post4__tar.gz → 0.7.20.post6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (125) hide show
  1. {batchalign-0.7.20.post4/batchalign.egg-info → batchalign-0.7.20.post6}/PKG-INFO +1 -1
  2. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/document.py +2 -2
  3. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/lexer.py +11 -10
  4. batchalign-0.7.20.post6/batchalign/version +3 -0
  5. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.20.post4/batchalign/version +0 -3
  7. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/LICENSE +0 -0
  8. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/MANIFEST.in +0 -0
  9. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/README.md +0 -0
  10. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/parser.py +0 -0
  23. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/chat/utils.py +0 -0
  24. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/textgrid/__init__.py +0 -0
  25. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/textgrid/file.py +0 -0
  26. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/textgrid/generator.py +0 -0
  27. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/formats/textgrid/parser.py +0 -0
  28. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/__init__.py +0 -0
  29. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/resolve.py +0 -0
  30. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/speaker/__init__.py +0 -0
  31. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/speaker/config.yaml +0 -0
  32. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/speaker/infer.py +0 -0
  33. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/speaker/utils.py +0 -0
  34. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/training/__init__.py +0 -0
  35. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/training/run.py +0 -0
  36. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/training/utils.py +0 -0
  37. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utils.py +0 -0
  38. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/__init__.py +0 -0
  39. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/cantonese_infer.py +0 -0
  40. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/wave2vec/__init__.py +0 -0
  46. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/wave2vec/infer_fa.py +0 -0
  47. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/num2chinese.py +0 -0
  55. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  56. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/rev.py +0 -0
  57. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/utils.py +0 -0
  58. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/whisper.py +0 -0
  59. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/asr/whisperx.py +0 -0
  60. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/base.py +0 -0
  61. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/__init__.py +0 -0
  62. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  63. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  64. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  65. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/retrace.py +0 -0
  66. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  67. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  68. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/cleanup/support/test.test +0 -0
  69. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/dispatch.py +0 -0
  70. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/fa/__init__.py +0 -0
  71. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  72. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  73. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  74. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  75. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  76. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  77. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  78. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  79. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  80. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  81. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/pipeline.py +0 -0
  82. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/speaker/__init__.py +0 -0
  83. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  84. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/translate/__init__.py +0 -0
  85. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/translate/gtrans.py +0 -0
  86. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/translate/seamless.py +0 -0
  87. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/translate/utils.py +0 -0
  88. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utr/__init__.py +0 -0
  89. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utr/rev_utr.py +0 -0
  90. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utr/utils.py +0 -0
  91. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  92. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utterance/__init__.py +0 -0
  93. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  94. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/__init__.py +0 -0
  95. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/conftest.py +0 -0
  96. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  97. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  98. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  99. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  100. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  101. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  102. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  103. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  104. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  105. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  106. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  107. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  108. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/fixures.py +0 -0
  109. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  110. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  111. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/tests/test_document.py +0 -0
  112. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/__init__.py +0 -0
  113. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/abbrev.py +0 -0
  114. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/compounds.py +0 -0
  115. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/config.py +0 -0
  116. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/dp.py +0 -0
  117. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/names.py +0 -0
  118. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign/utils/utils.py +0 -0
  119. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign.egg-info/SOURCES.txt +0 -0
  120. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign.egg-info/dependency_links.txt +0 -0
  121. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign.egg-info/entry_points.txt +0 -0
  122. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign.egg-info/requires.txt +0 -0
  123. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/batchalign.egg-info/top_level.txt +0 -0
  124. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/setup.cfg +0 -0
  125. {batchalign-0.7.20.post4 → batchalign-0.7.20.post6}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.20.post4
3
+ Version: 0.7.20.post6
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -225,9 +225,9 @@ class Utterance(BaseModel):
225
225
  t = re.sub(r" +", " ", t.strip()).strip()
226
226
  t = t.replace("+ ,", "+,").strip()
227
227
 
228
- abbrevs = [" " .join(list(i)) for i in abbrev]
228
+ abbrevs = [" "+" " .join(list(i))+" " for i in abbrev]
229
229
  for i in abbrevs:
230
- t = t.replace(i, i.replace(" ", ""))
230
+ t = t.replace(i, " "+i.replace(" ", "")+" ")
231
231
 
232
232
  return t
233
233
 
@@ -47,7 +47,7 @@ class UtteranceLexer:
47
47
  decoded.append((i, type))
48
48
  return decoded
49
49
 
50
- def __get_until(self, end_tokens=[' ']):
50
+ def _get_until(self, end_tokens=[' ']):
51
51
  has_read_nonempty = False
52
52
  tokens = []
53
53
  while True:
@@ -79,7 +79,10 @@ class UtteranceLexer:
79
79
  # self.__clauses.append((form.strip(), TokenType.FEAT))
80
80
  elif form.strip() in NORMAL_GROUP_MARKS:
81
81
  # basically ignore the form
82
- pass
82
+ popped = self.__clauses.pop(-1)[0]
83
+ if not popped.startswith("&="):
84
+ self.__clauses.append((popped, TokenType.REGULAR))
85
+ # pass
83
86
  # self.__clauses.append((form.strip(), TokenType.FEAT))
84
87
  elif form[0] == "[" and form[:2] != "[:":
85
88
  # we ignore all other things which are simple annotations
@@ -97,13 +100,13 @@ class UtteranceLexer:
97
100
  self.__clauses.append((annotation_clean(form).strip(), TokenType.REGULAR))
98
101
 
99
102
  def __pull(self):
100
- form, num, delim = self.__get_until()
103
+ form, num, delim = self._get_until()
101
104
 
102
105
  self.__handle(form, num, delim)
103
106
 
104
107
  return form
105
108
 
106
- def __get_group(self, form, type):
109
+ def _get_group(self, form, type):
107
110
  text = ""
108
111
  group = [form]
109
112
 
@@ -113,11 +116,10 @@ class UtteranceLexer:
113
116
 
114
117
  # scan forward until we have the first actual form, if
115
118
  # its a selection group
116
- if type == ">" and annotation_clean(form, special=True) == "":
117
- form, num, delim = self.__get_until()
119
+ if type == ">" and annotation_clean(form, special=True) == "" and form != "<":
120
+ form, num, delim = self._get_until()
118
121
  group = [group.pop(0).strip()+annotation_clean(form)]
119
122
 
120
-
121
123
  # decrement nesting first
122
124
  if form not in REPEAT_GROUP_MARKS and form not in NORMAL_GROUP_MARKS:
123
125
  if type == ">" and ">" in form:
@@ -127,7 +129,7 @@ class UtteranceLexer:
127
129
 
128
130
  # grab forward the entire group
129
131
  while (type not in form) or (nesting != -1):
130
- form, num, delim = self.__get_until()
132
+ form, num, delim = self._get_until()
131
133
 
132
134
  sform = copy.deepcopy(form)
133
135
  for i in REPEAT_GROUP_MARKS + NORMAL_GROUP_MARKS:
@@ -157,7 +159,6 @@ class UtteranceLexer:
157
159
  words = [re.compile(r"[^A-Za-zÀ-ÖØ-öø-ÿ']").sub("", i).strip() for i in group
158
160
  if re.compile(r"[^A-Za-zÀ-ÖØ-öø-ÿ']").sub("", i).strip()!= ""]
159
161
 
160
-
161
162
  if type == "]":
162
163
  return words, special[0], text
163
164
  else:
@@ -167,7 +168,7 @@ class UtteranceLexer:
167
168
  orig_form = form
168
169
 
169
170
  # scan the group
170
- words, special, text = self.__get_group(form, type)
171
+ words, special, text = self._get_group(form, type)
171
172
  text = form + text
172
173
 
173
174
  if len(text.strip()) == 0:
@@ -0,0 +1,3 @@
1
+ 0.7.20-post.6
2
+ July 27th, 2025
3
+ Various parsing errors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.20.post4
3
+ Version: 0.7.20.post6
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.20-post.4
2
- July 23th, 2025
3
- bump dependencies