batchalign 0.7.22.post4__tar.gz → 0.7.22.post7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (131) hide show
  1. {batchalign-0.7.22.post4/batchalign.egg-info → batchalign-0.7.22.post7}/PKG-INFO +1 -1
  2. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/cli/cli.py +1 -1
  3. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/analysis/eval.py +1 -1
  4. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/avqi/engine.py +40 -2
  5. batchalign-0.7.22.post7/batchalign/version +3 -0
  6. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7/batchalign.egg-info}/PKG-INFO +1 -1
  7. batchalign-0.7.22.post4/batchalign/version +0 -3
  8. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/LICENSE +0 -0
  9. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/MANIFEST.in +0 -0
  10. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/README.md +0 -0
  11. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/document.py +0 -0
  17. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/avqi/__init__.py +0 -0
  62. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/base.py +0 -0
  63. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  64. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  65. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  66. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  67. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  68. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  69. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  70. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  71. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/diarization/__init__.py +0 -0
  72. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/diarization/pyannote.py +0 -0
  73. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/dispatch.py +0 -0
  74. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/fa/__init__.py +0 -0
  75. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  76. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  77. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  78. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  79. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  80. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  81. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  82. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  83. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  84. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  85. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/opensmile/__init__.py +0 -0
  86. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/opensmile/engine.py +0 -0
  87. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/pipeline.py +0 -0
  88. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/speaker/__init__.py +0 -0
  89. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  90. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/translate/__init__.py +0 -0
  91. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/translate/gtrans.py +0 -0
  92. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/translate/seamless.py +0 -0
  93. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/translate/utils.py +0 -0
  94. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utr/__init__.py +0 -0
  95. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  96. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utr/utils.py +0 -0
  97. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  98. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utterance/__init__.py +0 -0
  99. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  100. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/__init__.py +0 -0
  101. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/conftest.py +0 -0
  102. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  103. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  104. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  105. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  106. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  107. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  108. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  109. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  110. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  111. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  112. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  113. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  114. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/fixures.py +0 -0
  115. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  116. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  117. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/tests/test_document.py +0 -0
  118. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/__init__.py +0 -0
  119. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/abbrev.py +0 -0
  120. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/compounds.py +0 -0
  121. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/config.py +0 -0
  122. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/dp.py +0 -0
  123. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/names.py +0 -0
  124. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign/utils/utils.py +0 -0
  125. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign.egg-info/SOURCES.txt +0 -0
  126. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign.egg-info/dependency_links.txt +0 -0
  127. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign.egg-info/entry_points.txt +0 -0
  128. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign.egg-info/requires.txt +0 -0
  129. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/batchalign.egg-info/top_level.txt +0 -0
  130. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/setup.cfg +0 -0
  131. {batchalign-0.7.22.post4 → batchalign-0.7.22.post7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.22.post4
3
+ Version: 0.7.22.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -179,7 +179,7 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
179
179
 
180
180
  def writer(doc, output):
181
181
  doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
182
- content=f"Batchalign {VERSION_NUMBER.strip()}, ASR Engine {asr}. Unchecked output of ASR model; do not use."))
182
+ content=f"Batchalign {VERSION_NUMBER.strip()}, ASR Engine {asr}. Unchecked output of ASR model."))
183
183
  CHATFile(doc=doc).write(output
184
184
  .replace(".wav", ".cha")
185
185
  .replace(".WAV", ".cha")
@@ -25,7 +25,7 @@ def conform(x):
25
25
  result = []
26
26
  for i in x:
27
27
  if i.strip().lower() in joined_compounds:
28
- for k in compounds[joined_compounds.index(i.strip())]:
28
+ for k in compounds[joined_compounds.index(i.strip().lower())]:
29
29
  result.append(k)
30
30
  elif i.strip() in lowered_abbrev:
31
31
  for j in i.strip():
@@ -11,6 +11,7 @@ from typing import Tuple, Dict, Optional
11
11
  import os
12
12
  from pathlib import Path
13
13
  import logging
14
+ import torchaudio
14
15
 
15
16
  from batchalign.pipelines.base import BatchalignEngine
16
17
  from batchalign.document import Task, Document
@@ -246,8 +247,37 @@ class AVQIEngine(BatchalignEngine):
246
247
 
247
248
  L.info(f"Calculating AVQI for CS: {cs_path.name}, SV: {sv_name}")
248
249
 
250
+ # Create mono versions of both files
251
+ cs_mono_path = None
252
+ sv_mono_path = None
253
+
249
254
  try:
250
- avqi_score, features = self.calculate_avqi_features(str(cs_file), str(sv_file))
255
+ # Convert cs_file to mono
256
+ L.info(f"Converting {cs_path.name} to mono")
257
+ cs_waveform, cs_sample_rate = torchaudio.load(str(cs_file))
258
+ if cs_waveform.shape[0] > 1:
259
+ cs_mono = cs_waveform.mean(dim=0, keepdim=True)
260
+ else:
261
+ cs_mono = cs_waveform
262
+
263
+ # Create mono filename: file_name.cs.[extension].mono
264
+ cs_mono_path = cs_path.parent / f"{cs_path.name}.mono.wav"
265
+ torchaudio.save(str(cs_mono_path), cs_mono, cs_sample_rate)
266
+
267
+ # Convert sv_file to mono
268
+ L.info(f"Converting {sv_name} to mono")
269
+ sv_waveform, sv_sample_rate = torchaudio.load(str(sv_file))
270
+ if sv_waveform.shape[0] > 1:
271
+ sv_mono = sv_waveform.mean(dim=0, keepdim=True)
272
+ else:
273
+ sv_mono = sv_waveform
274
+
275
+ # Create mono filename: file_name.sv.[extension].mono
276
+ sv_mono_path = sv_file.parent / f"{sv_file.name}.mono.wav"
277
+ torchaudio.save(str(sv_mono_path), sv_mono, sv_sample_rate)
278
+
279
+ # Calculate AVQI using mono versions
280
+ avqi_score, features = self.calculate_avqi_features(str(cs_mono_path), str(sv_mono_path))
251
281
 
252
282
  results = {
253
283
  'avqi': avqi_score,
@@ -277,4 +307,12 @@ class AVQIEngine(BatchalignEngine):
277
307
  'tilt': 0.0,
278
308
  'error': str(e),
279
309
  'success': False
280
- }
310
+ }
311
+ finally:
312
+ # Clean up temporary mono files
313
+ if cs_mono_path and cs_mono_path.exists():
314
+ L.info(f"Cleaning up temporary file: {cs_mono_path.name}")
315
+ cs_mono_path.unlink()
316
+ if sv_mono_path and sv_mono_path.exists():
317
+ L.info(f"Cleaning up temporary file: {sv_mono_path.name}")
318
+ sv_mono_path.unlink()
@@ -0,0 +1,3 @@
1
+ 0.7.22-post.7
2
+ October 21st, 2025
3
+ Patch two small bugs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.22.post4
3
+ Version: 0.7.22.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.22-post.4
2
- October 17th, 2025
3
- Bump certs