batchalign 0.8.0__tar.gz → 0.8.0.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (144) hide show
  1. {batchalign-0.8.0/batchalign.egg-info → batchalign-0.8.0.post1}/PKG-INFO +1 -1
  2. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/cli/dispatch.py +105 -76
  3. batchalign-0.8.0.post1/batchalign/version +3 -0
  4. {batchalign-0.8.0 → batchalign-0.8.0.post1/batchalign.egg-info}/PKG-INFO +1 -1
  5. batchalign-0.8.0/batchalign/version +0 -3
  6. {batchalign-0.8.0 → batchalign-0.8.0.post1}/LICENSE +0 -0
  7. {batchalign-0.8.0 → batchalign-0.8.0.post1}/MANIFEST.in +0 -0
  8. {batchalign-0.8.0 → batchalign-0.8.0.post1}/README.md +0 -0
  9. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/__init__.py +0 -0
  10. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/__main__.py +0 -0
  11. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/constants.py +0 -0
  14. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/document.py +0 -0
  15. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/errors.py +0 -0
  16. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/__init__.py +0 -0
  17. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/base.py +0 -0
  18. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/__init__.py +0 -0
  19. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/file.py +0 -0
  20. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/generator.py +0 -0
  21. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/lexer.py +0 -0
  22. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/parser.py +0 -0
  23. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/chat/utils.py +0 -0
  24. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/__init__.py +0 -0
  25. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/file.py +0 -0
  26. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/generator.py +0 -0
  27. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/parser.py +0 -0
  28. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/__init__.py +0 -0
  29. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/resolve.py +0 -0
  30. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/speaker/__init__.py +0 -0
  31. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/speaker/config.yaml +0 -0
  32. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/speaker/infer.py +0 -0
  33. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/speaker/utils.py +0 -0
  34. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/training/__init__.py +0 -0
  35. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/training/run.py +0 -0
  36. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/training/utils.py +0 -0
  37. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utils.py +0 -0
  38. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/__init__.py +0 -0
  39. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
  40. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/wave2vec/__init__.py +0 -0
  46. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
  47. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
  55. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
  56. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
  57. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
  58. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
  59. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
  60. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
  61. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
  62. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
  63. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
  64. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
  65. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/por.py +0 -0
  66. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
  67. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
  68. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  69. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/rev.py +0 -0
  70. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/utils.py +0 -0
  71. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/whisper.py +0 -0
  72. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
  73. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/avqi/__init__.py +0 -0
  74. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/avqi/engine.py +0 -0
  75. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/base.py +0 -0
  76. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
  77. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  78. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  79. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  80. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
  81. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  82. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  83. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
  84. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/diarization/__init__.py +0 -0
  85. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/diarization/pyannote.py +0 -0
  86. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/dispatch.py +0 -0
  87. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/__init__.py +0 -0
  88. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  89. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  90. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  91. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  92. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  93. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  94. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  95. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  96. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  97. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  98. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/opensmile/__init__.py +0 -0
  99. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/opensmile/engine.py +0 -0
  100. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/pipeline.py +0 -0
  101. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
  102. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  103. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/__init__.py +0 -0
  104. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
  105. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/seamless.py +0 -0
  106. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/utils.py +0 -0
  107. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/__init__.py +0 -0
  108. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
  109. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/utils.py +0 -0
  110. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  111. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
  112. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  113. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/__init__.py +0 -0
  114. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/conftest.py +0 -0
  115. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  116. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  117. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  118. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  119. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  120. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  121. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  122. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  123. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  124. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  125. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  126. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  127. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/fixures.py +0 -0
  128. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  129. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  130. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/tests/test_document.py +0 -0
  131. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/__init__.py +0 -0
  132. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/abbrev.py +0 -0
  133. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/compounds.py +0 -0
  134. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/config.py +0 -0
  135. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/dp.py +0 -0
  136. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/names.py +0 -0
  137. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign/utils/utils.py +0 -0
  138. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign.egg-info/SOURCES.txt +0 -0
  139. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign.egg-info/dependency_links.txt +0 -0
  140. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign.egg-info/entry_points.txt +0 -0
  141. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign.egg-info/requires.txt +0 -0
  142. {batchalign-0.8.0 → batchalign-0.8.0.post1}/batchalign.egg-info/top_level.txt +0 -0
  143. {batchalign-0.8.0 → batchalign-0.8.0.post1}/setup.cfg +0 -0
  144. {batchalign-0.8.0 → batchalign-0.8.0.post1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.8.0
3
+ Version: 0.8.0.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -46,103 +46,131 @@ def _get_worker_pipeline(command, lang, num_speakers, **kwargs):
46
46
  lang=lang, num_speakers=num_speakers, **kwargs)
47
47
  return _worker_pipeline
48
48
 
49
- def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_info, progress_queue=None, **kwargs):
49
+ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_info, progress_queue=None, verbose=0, **kwargs):
50
50
  """The task executed in each worker process."""
51
51
  import sys
52
52
  import os
53
53
  import tempfile
54
-
54
+ import logging
55
+
55
56
  file, output = file_info
56
57
  pid = os.getpid()
57
-
58
- # Use a temporary file to capture ALL output at the FD level
59
- # This is the most robust way to prevent interleaved output
60
- with tempfile.TemporaryFile(mode='w+') as log_file:
58
+
59
+ # Configure logging in this worker process
60
+ if verbose >= 1:
61
+ # Ensure basicConfig is called so logging works
62
+ logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.ERROR)
63
+
64
+ # Configure batchalign logger level in this worker process
65
+ baL = logging.getLogger('batchalign')
66
+ if verbose == 0:
67
+ baL.setLevel(logging.WARN)
68
+ elif verbose == 1:
69
+ baL.setLevel(logging.INFO)
70
+ else:
71
+ baL.setLevel(logging.DEBUG)
72
+
73
+ # Only capture output if not in verbose mode
74
+ # In verbose mode, let logs stream naturally to the console
75
+ should_capture = verbose == 0
76
+
77
+ if should_capture:
78
+ # Use a temporary file to capture ALL output at the FD level
79
+ # This is the most robust way to prevent interleaved output
80
+ log_file = tempfile.TemporaryFile(mode='w+')
61
81
  old_stdout_fd = os.dup(sys.stdout.fileno())
62
82
  old_stderr_fd = os.dup(sys.stderr.fileno())
63
-
64
- try:
65
- # Redirect FD 1 and 2 to our temp file
66
- os.dup2(log_file.fileno(), sys.stdout.fileno())
67
- os.dup2(log_file.fileno(), sys.stderr.fileno())
68
-
69
- pipeline = _get_worker_pipeline(command, lang, num_speakers, **kwargs)
70
83
 
71
- def progress_callback(completed, total, tasks):
72
- if not progress_queue:
73
- return
74
- try:
75
- progress_queue.put((file, completed, total, tasks))
76
- except Exception:
77
- pass
78
-
79
- # For now, we'll re-import what we need
80
- from batchalign.formats.chat import CHATFile
81
-
82
- # Morphosyntax specific loader/writer logic moved here for picklability
83
- if command == "morphotag":
84
- # Extract morphotag-specific arguments from kwargs
85
- mwt = kwargs.pop("mwt", {})
86
- retokenize = kwargs.pop("retokenize", False)
87
- skipmultilang = kwargs.pop("skipmultilang", False)
88
-
89
- cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
90
- doc = cf.doc
91
- if str(cf).count("%mor") > 0:
92
- doc.ba_special_["special_mor_notation"] = True
93
-
94
- # Prepare arguments for the pipeline
95
- pipeline_kwargs = {
96
- "retokenize": retokenize,
97
- "skipmultilang": skipmultilang,
98
- "mwt": mwt
99
- }
100
- # Add any remaining kwargs
101
- pipeline_kwargs.update(kwargs)
102
-
103
- # Process
104
- doc = pipeline(doc, callback=progress_callback, **pipeline_kwargs)
105
-
106
- # Write
107
- CHATFile(doc=doc, special_mor_=doc.ba_special_.get("special_mor_notation", False)).write(output)
108
-
109
- # Add other commands as needed, or use a more generic registry
110
- elif command == "align":
111
- cf = CHATFile(path=os.path.abspath(file))
112
- doc = cf.doc
113
- kw = {"pauses": kwargs.get("pauses", False)}
114
- doc = pipeline(doc, callback=progress_callback, **kw)
115
- CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
116
-
117
- else:
118
- loader, writer = loader_info, writer_info
119
- doc = loader(os.path.abspath(file))
120
- kw = {}
121
- if isinstance(doc, tuple) and len(doc) > 1:
122
- doc, kw = doc
123
- doc = pipeline(doc, callback=progress_callback, **kw)
124
- writer(doc, output)
125
-
126
- # Flush everything before reading back
84
+ # Redirect FD 1 and 2 to our temp file
85
+ os.dup2(log_file.fileno(), sys.stdout.fileno())
86
+ os.dup2(log_file.fileno(), sys.stderr.fileno())
87
+
88
+ try:
89
+ pipeline = _get_worker_pipeline(command, lang, num_speakers, **kwargs)
90
+
91
+ def progress_callback(completed, total, tasks):
92
+ if not progress_queue:
93
+ return
94
+ try:
95
+ progress_queue.put((file, completed, total, tasks))
96
+ except Exception:
97
+ pass
98
+
99
+ # For now, we'll re-import what we need
100
+ from batchalign.formats.chat import CHATFile
101
+
102
+ # Morphosyntax specific loader/writer logic moved here for picklability
103
+ if command == "morphotag":
104
+ # Extract morphotag-specific arguments from kwargs
105
+ mwt = kwargs.pop("mwt", {})
106
+ retokenize = kwargs.pop("retokenize", False)
107
+ skipmultilang = kwargs.pop("skipmultilang", False)
108
+
109
+ cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
110
+ doc = cf.doc
111
+ if str(cf).count("%mor") > 0:
112
+ doc.ba_special_["special_mor_notation"] = True
113
+
114
+ # Prepare arguments for the pipeline
115
+ pipeline_kwargs = {
116
+ "retokenize": retokenize,
117
+ "skipmultilang": skipmultilang,
118
+ "mwt": mwt
119
+ }
120
+ # Add any remaining kwargs
121
+ pipeline_kwargs.update(kwargs)
122
+
123
+ # Process
124
+ doc = pipeline(doc, callback=progress_callback, **pipeline_kwargs)
125
+
126
+ # Write
127
+ CHATFile(doc=doc, special_mor_=doc.ba_special_.get("special_mor_notation", False)).write(output)
128
+
129
+ # Add other commands as needed, or use a more generic registry
130
+ elif command == "align":
131
+ cf = CHATFile(path=os.path.abspath(file))
132
+ doc = cf.doc
133
+ kw = {"pauses": kwargs.get("pauses", False)}
134
+ doc = pipeline(doc, callback=progress_callback, **kw)
135
+ CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
136
+
137
+ else:
138
+ loader, writer = loader_info, writer_info
139
+ doc = loader(os.path.abspath(file))
140
+ kw = {}
141
+ if isinstance(doc, tuple) and len(doc) > 1:
142
+ doc, kw = doc
143
+ doc = pipeline(doc, callback=progress_callback, **kw)
144
+ writer(doc, output)
145
+
146
+ # Flush and read captured output if we were capturing
147
+ if should_capture:
127
148
  sys.stdout.flush()
128
149
  sys.stderr.flush()
129
150
  log_file.seek(0)
130
151
  captured = log_file.read()
131
-
132
- return file, None, None, captured
133
- except Exception as e:
134
- # Flush everything before reading back
152
+ else:
153
+ captured = ""
154
+
155
+ return file, None, None, captured
156
+ except Exception as e:
157
+ # Flush and read captured output if we were capturing
158
+ if should_capture:
135
159
  sys.stdout.flush()
136
160
  sys.stderr.flush()
137
161
  log_file.seek(0)
138
162
  captured = log_file.read()
139
- return file, traceback.format_exc(), e, captured
140
- finally:
141
- # Restore original FDs
163
+ else:
164
+ captured = ""
165
+ return file, traceback.format_exc(), e, captured
166
+ finally:
167
+ # Restore original FDs only if we redirected them
168
+ if should_capture:
142
169
  os.dup2(old_stdout_fd, sys.stdout.fileno())
143
170
  os.dup2(old_stderr_fd, sys.stderr.fileno())
144
171
  os.close(old_stdout_fd)
145
172
  os.close(old_stderr_fd)
173
+ log_file.close()
146
174
 
147
175
  # this dictionary maps what commands are executed
148
176
  # against what BatchalignPipeline tasks are actually ran
@@ -300,6 +328,7 @@ def _dispatch(command, lang, num_speakers,
300
328
  loader_info=None,
301
329
  writer_info=None,
302
330
  progress_queue=progress_queue,
331
+ verbose=ctx.obj["verbose"],
303
332
  **kwargs)
304
333
 
305
334
  future_to_file = {executor.submit(worker_func, (f, o)): f for f, o in zip(files, outputs)}
@@ -0,0 +1,3 @@
1
+ 0.8.0-post.1
2
+ Jan 13th, 2025
3
+ Speed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.8.0
3
+ Version: 0.8.0.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.8.0
2
- Jan 13th, 2025
3
- Speed
File without changes
File without changes
File without changes
File without changes
File without changes