batchalign 0.7.15__tar.gz → 0.7.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {batchalign-0.7.15/batchalign.egg-info → batchalign-0.7.16}/PKG-INFO +1 -1
  2. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/fa/wave2vec_fa.py +1 -1
  3. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/ud.py +9 -1
  4. batchalign-0.7.16/batchalign/version +3 -0
  5. {batchalign-0.7.15 → batchalign-0.7.16/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.15/batchalign/version +0 -3
  7. {batchalign-0.7.15 → batchalign-0.7.16}/LICENSE +0 -0
  8. {batchalign-0.7.15 → batchalign-0.7.16}/MANIFEST.in +0 -0
  9. {batchalign-0.7.15 → batchalign-0.7.16}/README.md +0 -0
  10. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/document.py +0 -0
  17. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/analysis/eval.py +0 -0
  55. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/__init__.py +0 -0
  56. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/num2chinese.py +0 -0
  57. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/base.py +0 -0
  62. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  63. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  64. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  65. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  66. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  67. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  68. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  69. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  70. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/dispatch.py +0 -0
  71. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  73. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  74. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  75. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  76. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  77. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  78. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  79. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  80. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/pipeline.py +0 -0
  81. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/speaker/__init__.py +0 -0
  82. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  83. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/translate/__init__.py +0 -0
  84. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/translate/seamless.py +0 -0
  85. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utr/__init__.py +0 -0
  86. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  87. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utr/utils.py +0 -0
  88. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  89. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utterance/__init__.py +0 -0
  90. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  91. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/__init__.py +0 -0
  92. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/conftest.py +0 -0
  93. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  94. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  95. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  96. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  97. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  98. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  99. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  100. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  101. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  102. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  103. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  104. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  105. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/fixures.py +0 -0
  106. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  107. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  108. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/tests/test_document.py +0 -0
  109. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/utils/__init__.py +0 -0
  110. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/utils/config.py +0 -0
  111. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/utils/dp.py +0 -0
  112. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign/utils/utils.py +0 -0
  113. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign.egg-info/SOURCES.txt +0 -0
  114. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign.egg-info/dependency_links.txt +0 -0
  115. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign.egg-info/entry_points.txt +0 -0
  116. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign.egg-info/requires.txt +0 -0
  117. {batchalign-0.7.15 → batchalign-0.7.16}/batchalign.egg-info/top_level.txt +0 -0
  118. {batchalign-0.7.15 → batchalign-0.7.16}/setup.cfg +0 -0
  119. {batchalign-0.7.15 → batchalign-0.7.16}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.15
3
+ Version: 0.7.16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -79,7 +79,7 @@ class Wave2VecFAEngine(BatchalignEngine):
79
79
  # if "noone's" in detokenized:
80
80
  # breakpoint()
81
81
  res = self.__wav2vec(audio=f.chunk(grp[0][1][0], grp[-1][1][1]), text=transcript)
82
- except IndexError:
82
+ except (IndexError, RuntimeError) as e:
83
83
  # utterance contains nothing
84
84
  continue
85
85
 
@@ -143,6 +143,7 @@ def handler(word, lang=None):
143
143
  pos,target = verbform(pos,target,word.text)
144
144
  target = target.replace(',', 'cm')
145
145
 
146
+
146
147
  return f"{'' if not unknown else '0'}{pos}|{target}"
147
148
 
148
149
  # POS specific handler
@@ -155,6 +156,9 @@ def handler__PRON(word, lang=None):
155
156
  person = '4'
156
157
 
157
158
  case = feats.get("Case","")
159
+ reflex = str(feats.get("Reflex","")).strip()
160
+ if reflex == "Yes":
161
+ reflex = "reflx"
158
162
  if lang == "fr":
159
163
  from batchalign.pipelines.morphosyntax.fr.case import case as caser
160
164
  case = caser(word.text)
@@ -167,6 +171,7 @@ def handler__PRON(word, lang=None):
167
171
  return (handler(word, lang)+
168
172
  stringify_feats(feats.get("PronType", "Int"),
169
173
  case.replace(",", ""),
174
+ reflex,
170
175
  number_string))
171
176
 
172
177
  def handler__DET(word, lang=None):
@@ -254,6 +259,9 @@ def handler__VERB(word, lang=None):
254
259
  mood = feats.get("Mood", "")
255
260
  person = str(feats.get("Person", ""))
256
261
 
262
+ biyan = str(feats.get("HebBinyan", "")).lower()
263
+ existential = str(feats.get("HebExistential", "")).lower()
264
+
257
265
  if person == "0":
258
266
  person = '4'
259
267
  number = feats.get("Number", "Sing")
@@ -281,7 +289,7 @@ def handler__VERB(word, lang=None):
281
289
  return res
282
290
  else:
283
291
  return res+flag+stringify_feats(aspect, mood,
284
- tense, polarity, polite,
292
+ tense, polarity, polite, biyan, existential,
285
293
  number[:1]+person, irr)
286
294
 
287
295
  def handler__actual_PUNCT(word, lang=None):
@@ -0,0 +1,3 @@
1
+ 0.7.16
2
+ Feburary 27th, 2025
3
+ More Hebrew features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.15
3
+ Version: 0.7.16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.15
2
- Feburary 23rd, 2025
3
- Whisper ASR with Cantonese and tokenization!
File without changes
File without changes
File without changes
File without changes
File without changes