batchalign 0.7.22.post20__tar.gz → 0.8.0.post1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of batchalign might be problematic. Click here for more details.
- {batchalign-0.7.22.post20/batchalign.egg-info → batchalign-0.8.0.post1}/PKG-INFO +1 -1
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/cli/cli.py +22 -21
- batchalign-0.8.0.post1/batchalign/cli/dispatch.py +393 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/ud.py +115 -81
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utterance/ud_utterance.py +1 -1
- batchalign-0.8.0.post1/batchalign/utils/dp.py +225 -0
- batchalign-0.8.0.post1/batchalign/version +3 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.22.post20/batchalign/cli/dispatch.py +0 -196
- batchalign-0.7.22.post20/batchalign/utils/dp.py +0 -225
- batchalign-0.7.22.post20/batchalign/version +0 -3
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/LICENSE +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/MANIFEST.in +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/README.md +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/__main__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/constants.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/document.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/errors.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/por.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/avqi/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/avqi/engine.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/diarization/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/diarization/pyannote.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/opensmile/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/opensmile/engine.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/abbrev.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/compounds.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/names.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/setup.cfg +0 -0
- {batchalign-0.7.22.post20 → batchalign-0.8.0.post1}/setup.py +0 -0
|
@@ -3,37 +3,23 @@ cli.py
|
|
|
3
3
|
The Batchalign command-line interface
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
import multiprocessing
|
|
7
6
|
import rich_click as click
|
|
8
7
|
import functools
|
|
9
8
|
|
|
10
9
|
import os
|
|
11
|
-
from glob import glob
|
|
12
10
|
|
|
13
|
-
from multiprocessing import
|
|
14
|
-
|
|
15
|
-
from batchalign.pipelines import BatchalignPipeline
|
|
11
|
+
from multiprocessing import freeze_support
|
|
16
12
|
|
|
13
|
+
from pathlib import Path
|
|
17
14
|
from rich.traceback import install
|
|
18
15
|
from rich.console import Console
|
|
19
|
-
from rich.panel import Panel
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from batchalign.document import *
|
|
22
|
-
from batchalign.formats.chat import CHATFile
|
|
23
|
-
from batchalign.utils import config
|
|
24
16
|
from rich.logging import RichHandler
|
|
25
17
|
|
|
26
18
|
from batchalign.cli.dispatch import _dispatch
|
|
27
19
|
from batchalign.models.training.run import cli as train
|
|
28
20
|
|
|
29
|
-
from enum import Enum
|
|
30
|
-
|
|
31
|
-
import traceback
|
|
32
|
-
|
|
33
21
|
import pyfiglet
|
|
34
|
-
|
|
35
|
-
import logging as L
|
|
36
|
-
baL = L.getLogger('batchalign')
|
|
22
|
+
import logging as L
|
|
37
23
|
|
|
38
24
|
C = Console()
|
|
39
25
|
|
|
@@ -62,7 +48,7 @@ def handle_verbosity(verbosity):
|
|
|
62
48
|
L.getLogger('stanza').handlers.clear()
|
|
63
49
|
L.getLogger('transformers').handlers.clear()
|
|
64
50
|
L.getLogger('nemo_logger').handlers.clear()
|
|
65
|
-
L.getLogger("stanza").setLevel(L.
|
|
51
|
+
L.getLogger("stanza").setLevel(L.WARN)
|
|
66
52
|
L.getLogger('nemo_logger').setLevel(L.CRITICAL)
|
|
67
53
|
L.getLogger('batchalign').setLevel(L.WARN)
|
|
68
54
|
L.getLogger('lightning.pytorch.utilities.migration.utils').setLevel(L.ERROR)
|
|
@@ -73,6 +59,7 @@ def handle_verbosity(verbosity):
|
|
|
73
59
|
L.getLogger('batchalign').setLevel(L.INFO)
|
|
74
60
|
if verbosity >= 3:
|
|
75
61
|
L.getLogger('batchalign').setLevel(L.DEBUG)
|
|
62
|
+
L.getLogger("stanza").setLevel(L.INFO)
|
|
76
63
|
if verbosity >= 4:
|
|
77
64
|
L.getLogger('batchalign').setLevel(L.DEBUG)
|
|
78
65
|
L.getLogger('transformers').setLevel(L.INFO)
|
|
@@ -81,7 +68,8 @@ def handle_verbosity(verbosity):
|
|
|
81
68
|
@click.pass_context
|
|
82
69
|
@click.version_option(VERSION_NUMBER)
|
|
83
70
|
@click.option("-v", "--verbose", type=int, count=True, default=0, help="How loquacious Batchalign should be.")
|
|
84
|
-
|
|
71
|
+
@click.option("--workers", type=int, default=os.cpu_count(), help="Number of worker processes to use.")
|
|
72
|
+
def batchalign(ctx, verbose, workers):
|
|
85
73
|
"""process .cha and/or audio files in IN_DIR and dumps them to OUT_DIR using recipe COMMAND"""
|
|
86
74
|
|
|
87
75
|
## setup commands ##
|
|
@@ -93,7 +81,9 @@ def batchalign(ctx, verbose):
|
|
|
93
81
|
handle_verbosity(verbose)
|
|
94
82
|
# add to arguments
|
|
95
83
|
ctx.obj["verbose"] = verbose
|
|
84
|
+
ctx.obj["workers"] = workers
|
|
96
85
|
# setup config
|
|
86
|
+
from batchalign.utils import config
|
|
97
87
|
ctx.obj["config"] = config.config_read(True)
|
|
98
88
|
# make everything look better
|
|
99
89
|
# pretty.install()
|
|
@@ -116,6 +106,7 @@ batchalign.add_command(train, "models")
|
|
|
116
106
|
@click.pass_context
|
|
117
107
|
def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
|
|
118
108
|
"""Align transcripts against corresponding media files."""
|
|
109
|
+
from batchalign.formats.chat import CHATFile
|
|
119
110
|
def loader(file):
|
|
120
111
|
return (
|
|
121
112
|
CHATFile(path=os.path.abspath(file)).doc,
|
|
@@ -165,6 +156,8 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
|
|
|
165
156
|
@click.pass_context
|
|
166
157
|
def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
167
158
|
"""Create a transcript from audio files."""
|
|
159
|
+
from batchalign.document import CustomLine, CustomLineType
|
|
160
|
+
from batchalign.formats.chat import CHATFile
|
|
168
161
|
def loader(file):
|
|
169
162
|
return file
|
|
170
163
|
|
|
@@ -209,6 +202,7 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
209
202
|
@click.pass_context
|
|
210
203
|
def translate(ctx, in_dir, out_dir, **kwargs):
|
|
211
204
|
"""Translate the transcript to English."""
|
|
205
|
+
from batchalign.formats.chat import CHATFile
|
|
212
206
|
|
|
213
207
|
def loader(file):
|
|
214
208
|
cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
|
|
@@ -239,6 +233,7 @@ def translate(ctx, in_dir, out_dir, **kwargs):
|
|
|
239
233
|
@click.pass_context
|
|
240
234
|
def morphotag(ctx, in_dir, out_dir, **kwargs):
|
|
241
235
|
"""Perform morphosyntactic analysis on transcripts."""
|
|
236
|
+
from batchalign.formats.chat import CHATFile
|
|
242
237
|
|
|
243
238
|
def loader(file):
|
|
244
239
|
mwt = {}
|
|
@@ -265,7 +260,7 @@ def morphotag(ctx, in_dir, out_dir, **kwargs):
|
|
|
265
260
|
|
|
266
261
|
_dispatch("morphotag", "eng", 1, ["cha"], ctx,
|
|
267
262
|
in_dir, out_dir,
|
|
268
|
-
loader, writer, C)
|
|
263
|
+
loader, writer, C, **kwargs)
|
|
269
264
|
|
|
270
265
|
|
|
271
266
|
#################### MORPHOTAG ################################
|
|
@@ -275,6 +270,7 @@ def morphotag(ctx, in_dir, out_dir, **kwargs):
|
|
|
275
270
|
@click.pass_context
|
|
276
271
|
def coref(ctx, in_dir, out_dir, **kwargs):
|
|
277
272
|
"""Perform coreference analysis on transcripts."""
|
|
273
|
+
from batchalign.formats.chat import CHATFile
|
|
278
274
|
|
|
279
275
|
def loader(file):
|
|
280
276
|
cf = CHATFile(path=os.path.abspath(file))
|
|
@@ -302,6 +298,7 @@ def coref(ctx, in_dir, out_dir, **kwargs):
|
|
|
302
298
|
@click.pass_context
|
|
303
299
|
def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
304
300
|
"""Perform morphosyntactic analysis on transcripts."""
|
|
301
|
+
from batchalign.formats.chat import CHATFile
|
|
305
302
|
|
|
306
303
|
def loader(file):
|
|
307
304
|
return CHATFile(path=os.path.abspath(file)).doc
|
|
@@ -332,6 +329,7 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
332
329
|
@click.pass_context
|
|
333
330
|
def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, whisper_oai, **kwargs):
|
|
334
331
|
"""Benchmark ASR utilities for their word accuracy"""
|
|
332
|
+
from batchalign.formats.chat import CHATFile
|
|
335
333
|
def loader(file):
|
|
336
334
|
# try to find a .cha in the same directory
|
|
337
335
|
p = Path(file)
|
|
@@ -374,6 +372,7 @@ def avqi(ctx, input_dir, output_dir, lang, **kwargs):
|
|
|
374
372
|
"""Calculate AVQI from paired .cs and .sv audio files in input directory."""
|
|
375
373
|
|
|
376
374
|
from batchalign.pipelines.avqi import AVQIEngine
|
|
375
|
+
from batchalign.document import Document
|
|
377
376
|
from pathlib import Path
|
|
378
377
|
import os
|
|
379
378
|
|
|
@@ -441,6 +440,7 @@ def avqi(ctx, input_dir, output_dir, lang, **kwargs):
|
|
|
441
440
|
@click.pass_context
|
|
442
441
|
def opensmile(ctx, input_dir, output_dir, feature_set, lang, **kwargs):
|
|
443
442
|
"""Extract openSMILE audio features from speech samples."""
|
|
443
|
+
from batchalign.document import Document
|
|
444
444
|
|
|
445
445
|
def loader(file):
|
|
446
446
|
doc = Document.new(media_path=file, lang=lang)
|
|
@@ -468,6 +468,7 @@ def opensmile(ctx, input_dir, output_dir, feature_set, lang, **kwargs):
|
|
|
468
468
|
def setup(ctx):
|
|
469
469
|
"""Reconfigure Batchalign settings, such as Rev.AI key."""
|
|
470
470
|
|
|
471
|
+
from batchalign.utils import config
|
|
471
472
|
config.interactive_setup()
|
|
472
473
|
|
|
473
474
|
#################### VERSION ################################
|
|
@@ -480,5 +481,5 @@ def version(ctx, **kwargs):
|
|
|
480
481
|
ptr = (pyfiglet.figlet_format("Batchalign2")+"\n" +
|
|
481
482
|
f"Version: [bold]{VERSION_NUMBER.strip()}[/bold], released {RELEASE_DATE.strip()}\n" +
|
|
482
483
|
f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
|
|
483
|
-
"\nDeveloped by Brian MacWhinney and Houjun Liu")
|
|
484
|
+
"\nDeveloped by Brian MacWhinney and Houjun Liu\ncontributions from Sebastian Song and Franklin Chen")
|
|
484
485
|
C.print("\n\n"+ptr+"\n\n")
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dispatch.py
|
|
3
|
+
CLI runner dispatch. Essentially the translation layer between `command` in CLI
|
|
4
|
+
and actual BatchalignPipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
import shutil
|
|
12
|
+
import os
|
|
13
|
+
import glob
|
|
14
|
+
import queue
|
|
15
|
+
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.markup import escape
|
|
18
|
+
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import concurrent.futures
|
|
22
|
+
import multiprocessing
|
|
23
|
+
from functools import partial
|
|
24
|
+
|
|
25
|
+
# Oneliner of directory-based glob and replace
|
|
26
|
+
globase = lambda path, statement: glob(os.path.join(path, statement))
|
|
27
|
+
repath_file = lambda file_path, new_dir: os.path.join(new_dir, Path(file_path).name)
|
|
28
|
+
|
|
29
|
+
import tempfile
|
|
30
|
+
import time
|
|
31
|
+
|
|
32
|
+
import traceback
|
|
33
|
+
import logging as L
|
|
34
|
+
baL = L.getLogger('batchalign')
|
|
35
|
+
|
|
36
|
+
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
|
|
37
|
+
|
|
38
|
+
# Global cache for the pipeline in worker processes
|
|
39
|
+
_worker_pipeline = None
|
|
40
|
+
|
|
41
|
+
def _get_worker_pipeline(command, lang, num_speakers, **kwargs):
|
|
42
|
+
global _worker_pipeline
|
|
43
|
+
if _worker_pipeline is None:
|
|
44
|
+
from batchalign.pipelines import BatchalignPipeline
|
|
45
|
+
_worker_pipeline = BatchalignPipeline.new(Cmd2Task[command],
|
|
46
|
+
lang=lang, num_speakers=num_speakers, **kwargs)
|
|
47
|
+
return _worker_pipeline
|
|
48
|
+
|
|
49
|
+
def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_info, progress_queue=None, verbose=0, **kwargs):
|
|
50
|
+
"""The task executed in each worker process."""
|
|
51
|
+
import sys
|
|
52
|
+
import os
|
|
53
|
+
import tempfile
|
|
54
|
+
import logging
|
|
55
|
+
|
|
56
|
+
file, output = file_info
|
|
57
|
+
pid = os.getpid()
|
|
58
|
+
|
|
59
|
+
# Configure logging in this worker process
|
|
60
|
+
if verbose >= 1:
|
|
61
|
+
# Ensure basicConfig is called so logging works
|
|
62
|
+
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.ERROR)
|
|
63
|
+
|
|
64
|
+
# Configure batchalign logger level in this worker process
|
|
65
|
+
baL = logging.getLogger('batchalign')
|
|
66
|
+
if verbose == 0:
|
|
67
|
+
baL.setLevel(logging.WARN)
|
|
68
|
+
elif verbose == 1:
|
|
69
|
+
baL.setLevel(logging.INFO)
|
|
70
|
+
else:
|
|
71
|
+
baL.setLevel(logging.DEBUG)
|
|
72
|
+
|
|
73
|
+
# Only capture output if not in verbose mode
|
|
74
|
+
# In verbose mode, let logs stream naturally to the console
|
|
75
|
+
should_capture = verbose == 0
|
|
76
|
+
|
|
77
|
+
if should_capture:
|
|
78
|
+
# Use a temporary file to capture ALL output at the FD level
|
|
79
|
+
# This is the most robust way to prevent interleaved output
|
|
80
|
+
log_file = tempfile.TemporaryFile(mode='w+')
|
|
81
|
+
old_stdout_fd = os.dup(sys.stdout.fileno())
|
|
82
|
+
old_stderr_fd = os.dup(sys.stderr.fileno())
|
|
83
|
+
|
|
84
|
+
# Redirect FD 1 and 2 to our temp file
|
|
85
|
+
os.dup2(log_file.fileno(), sys.stdout.fileno())
|
|
86
|
+
os.dup2(log_file.fileno(), sys.stderr.fileno())
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
pipeline = _get_worker_pipeline(command, lang, num_speakers, **kwargs)
|
|
90
|
+
|
|
91
|
+
def progress_callback(completed, total, tasks):
|
|
92
|
+
if not progress_queue:
|
|
93
|
+
return
|
|
94
|
+
try:
|
|
95
|
+
progress_queue.put((file, completed, total, tasks))
|
|
96
|
+
except Exception:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
# For now, we'll re-import what we need
|
|
100
|
+
from batchalign.formats.chat import CHATFile
|
|
101
|
+
|
|
102
|
+
# Morphosyntax specific loader/writer logic moved here for picklability
|
|
103
|
+
if command == "morphotag":
|
|
104
|
+
# Extract morphotag-specific arguments from kwargs
|
|
105
|
+
mwt = kwargs.pop("mwt", {})
|
|
106
|
+
retokenize = kwargs.pop("retokenize", False)
|
|
107
|
+
skipmultilang = kwargs.pop("skipmultilang", False)
|
|
108
|
+
|
|
109
|
+
cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
|
|
110
|
+
doc = cf.doc
|
|
111
|
+
if str(cf).count("%mor") > 0:
|
|
112
|
+
doc.ba_special_["special_mor_notation"] = True
|
|
113
|
+
|
|
114
|
+
# Prepare arguments for the pipeline
|
|
115
|
+
pipeline_kwargs = {
|
|
116
|
+
"retokenize": retokenize,
|
|
117
|
+
"skipmultilang": skipmultilang,
|
|
118
|
+
"mwt": mwt
|
|
119
|
+
}
|
|
120
|
+
# Add any remaining kwargs
|
|
121
|
+
pipeline_kwargs.update(kwargs)
|
|
122
|
+
|
|
123
|
+
# Process
|
|
124
|
+
doc = pipeline(doc, callback=progress_callback, **pipeline_kwargs)
|
|
125
|
+
|
|
126
|
+
# Write
|
|
127
|
+
CHATFile(doc=doc, special_mor_=doc.ba_special_.get("special_mor_notation", False)).write(output)
|
|
128
|
+
|
|
129
|
+
# Add other commands as needed, or use a more generic registry
|
|
130
|
+
elif command == "align":
|
|
131
|
+
cf = CHATFile(path=os.path.abspath(file))
|
|
132
|
+
doc = cf.doc
|
|
133
|
+
kw = {"pauses": kwargs.get("pauses", False)}
|
|
134
|
+
doc = pipeline(doc, callback=progress_callback, **kw)
|
|
135
|
+
CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
loader, writer = loader_info, writer_info
|
|
139
|
+
doc = loader(os.path.abspath(file))
|
|
140
|
+
kw = {}
|
|
141
|
+
if isinstance(doc, tuple) and len(doc) > 1:
|
|
142
|
+
doc, kw = doc
|
|
143
|
+
doc = pipeline(doc, callback=progress_callback, **kw)
|
|
144
|
+
writer(doc, output)
|
|
145
|
+
|
|
146
|
+
# Flush and read captured output if we were capturing
|
|
147
|
+
if should_capture:
|
|
148
|
+
sys.stdout.flush()
|
|
149
|
+
sys.stderr.flush()
|
|
150
|
+
log_file.seek(0)
|
|
151
|
+
captured = log_file.read()
|
|
152
|
+
else:
|
|
153
|
+
captured = ""
|
|
154
|
+
|
|
155
|
+
return file, None, None, captured
|
|
156
|
+
except Exception as e:
|
|
157
|
+
# Flush and read captured output if we were capturing
|
|
158
|
+
if should_capture:
|
|
159
|
+
sys.stdout.flush()
|
|
160
|
+
sys.stderr.flush()
|
|
161
|
+
log_file.seek(0)
|
|
162
|
+
captured = log_file.read()
|
|
163
|
+
else:
|
|
164
|
+
captured = ""
|
|
165
|
+
return file, traceback.format_exc(), e, captured
|
|
166
|
+
finally:
|
|
167
|
+
# Restore original FDs only if we redirected them
|
|
168
|
+
if should_capture:
|
|
169
|
+
os.dup2(old_stdout_fd, sys.stdout.fileno())
|
|
170
|
+
os.dup2(old_stderr_fd, sys.stderr.fileno())
|
|
171
|
+
os.close(old_stdout_fd)
|
|
172
|
+
os.close(old_stderr_fd)
|
|
173
|
+
log_file.close()
|
|
174
|
+
|
|
175
|
+
# this dictionary maps what commands are executed
|
|
176
|
+
# against what BatchalignPipeline tasks are actually ran
|
|
177
|
+
Cmd2Task = {
|
|
178
|
+
"align": "fa",
|
|
179
|
+
"transcribe": "asr",
|
|
180
|
+
"transcribe_s": "asr,speaker",
|
|
181
|
+
"morphotag": "morphosyntax",
|
|
182
|
+
"benchmark": "asr,eval",
|
|
183
|
+
"utseg": "utterance",
|
|
184
|
+
"coref": "coref",
|
|
185
|
+
"translate": "translate",
|
|
186
|
+
"opensmile": "opensmile",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
# this is the main runner used by all functions
|
|
190
|
+
def _dispatch(command, lang, num_speakers,
|
|
191
|
+
extensions, ctx, in_dir, out_dir,
|
|
192
|
+
loader:callable, writer:callable, console,
|
|
193
|
+
**kwargs):
|
|
194
|
+
|
|
195
|
+
C = console
|
|
196
|
+
from batchalign.constants import FORCED_CONVERSION
|
|
197
|
+
from batchalign.document import TaskFriendlyName
|
|
198
|
+
|
|
199
|
+
# get files by walking the directory
|
|
200
|
+
files = []
|
|
201
|
+
outputs = []
|
|
202
|
+
|
|
203
|
+
for basedir, _, fs in os.walk(in_dir):
|
|
204
|
+
for f in fs:
|
|
205
|
+
path = Path(os.path.join(basedir, f))
|
|
206
|
+
ext = path.suffix.strip(".").strip().lower()
|
|
207
|
+
|
|
208
|
+
# calculate input path, convert if needed
|
|
209
|
+
inp_path = str(path)
|
|
210
|
+
if ext in FORCED_CONVERSION:
|
|
211
|
+
# check for ffmpeg
|
|
212
|
+
if not shutil.which("ffmpeg"):
|
|
213
|
+
raise ValueError(f"ffmpeg not found in Path! Cannot load input media at {inp_path}.\nHint: Please convert your input audio sample to .wav before proceeding witch Batchalign, or install ffmpeg (https://ffmpeg.org/download.html)")
|
|
214
|
+
# convert
|
|
215
|
+
from pydub import AudioSegment
|
|
216
|
+
seg = AudioSegment.from_file(inp_path, ext)
|
|
217
|
+
seg.export(inp_path.replace(f".{ext}", ".wav"), format="wav")
|
|
218
|
+
inp_path = inp_path.replace(f".{ext}", ".wav")
|
|
219
|
+
|
|
220
|
+
# repath the file to the output
|
|
221
|
+
rel = os.path.relpath(inp_path, in_dir)
|
|
222
|
+
repathed = Path(os.path.join(out_dir, rel))
|
|
223
|
+
# make the repathed dir, if it doesn't exist
|
|
224
|
+
parent = repathed.parent.absolute()
|
|
225
|
+
os.makedirs(parent, exist_ok=True)
|
|
226
|
+
|
|
227
|
+
# HACK check for @Options:\tdummy in the file
|
|
228
|
+
# and simply copy it
|
|
229
|
+
if ext == "cha":
|
|
230
|
+
with open(inp_path, 'r', encoding="utf-8") as df:
|
|
231
|
+
data = df.read()
|
|
232
|
+
if "@Options:\tdummy" in data:
|
|
233
|
+
shutil.copy2(inp_path, str(repathed))
|
|
234
|
+
continue
|
|
235
|
+
elif "This is a dummy file to permit playback from the TalkBank browser" in data:
|
|
236
|
+
shutil.copy2(inp_path, str(repathed))
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# if the file needs to get processed, append it to the list
|
|
240
|
+
# to be processed and compute the output
|
|
241
|
+
if ext in extensions:
|
|
242
|
+
files.append(inp_path)
|
|
243
|
+
outputs.append(str(repathed))
|
|
244
|
+
# otherwise just copy the file
|
|
245
|
+
else:
|
|
246
|
+
shutil.copy2(inp_path, str(repathed))
|
|
247
|
+
|
|
248
|
+
__tf = None
|
|
249
|
+
# output file
|
|
250
|
+
if ctx.obj["verbose"] > 1:
|
|
251
|
+
__tf = tempfile.NamedTemporaryFile(delete=True, mode='w')
|
|
252
|
+
C = Console(file=__tf)
|
|
253
|
+
|
|
254
|
+
# process largest inputs first to avoid late stragglers
|
|
255
|
+
file_pairs = list(zip(files, outputs))
|
|
256
|
+
file_pairs.sort(key=lambda fo: os.path.getsize(fo[0]) if os.path.exists(fo[0]) else 0, reverse=True)
|
|
257
|
+
files, outputs = zip(*file_pairs) if file_pairs else ([], [])
|
|
258
|
+
|
|
259
|
+
C.print(f"\nMode: [blue]{command}[/blue]; got [bold cyan]{len(files)}[/bold cyan] transcript{'s' if len(files) > 1 else ''} to process from {in_dir}:\n")
|
|
260
|
+
|
|
261
|
+
# Determine number of workers
|
|
262
|
+
num_workers = kwargs.get("num_workers", ctx.obj.get("workers", os.cpu_count()))
|
|
263
|
+
|
|
264
|
+
# Pre-download stanza resources if needed to avoid interleaved downloads in workers
|
|
265
|
+
if command in ["morphotag", "utseg", "coref"]:
|
|
266
|
+
try:
|
|
267
|
+
import stanza
|
|
268
|
+
stanza.download_resources_json()
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
|
|
272
|
+
# For some commands or environments, we might want to limit this
|
|
273
|
+
if command in ["transcribe", "transcribe_s"]:
|
|
274
|
+
num_workers = min(num_workers, 2) # GPU memory limits
|
|
275
|
+
|
|
276
|
+
C.print(f"Using [bold]{num_workers}[/bold] worker processes.\n")
|
|
277
|
+
|
|
278
|
+
manager = multiprocessing.Manager() if files else None
|
|
279
|
+
progress_queue = manager.Queue() if manager else None
|
|
280
|
+
|
|
281
|
+
def render_stage(stage_tasks):
|
|
282
|
+
if not stage_tasks:
|
|
283
|
+
return "Processing..."
|
|
284
|
+
if not isinstance(stage_tasks, (list, tuple)):
|
|
285
|
+
stage_tasks = [stage_tasks]
|
|
286
|
+
names = [TaskFriendlyName.get(task, str(task)) for task in stage_tasks]
|
|
287
|
+
return ", ".join(names)
|
|
288
|
+
|
|
289
|
+
# create the spinner
|
|
290
|
+
prog = Progress(SpinnerColumn(), *Progress.get_default_columns()[:-1],
|
|
291
|
+
TimeElapsedColumn(),
|
|
292
|
+
TextColumn("[cyan]{task.fields[processor]}[/cyan]"), console=C)
|
|
293
|
+
errors = []
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
with prog as prog:
|
|
297
|
+
tasks = {}
|
|
298
|
+
task_totals = {}
|
|
299
|
+
|
|
300
|
+
for f in files:
|
|
301
|
+
tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...")
|
|
302
|
+
task_totals[f] = 1
|
|
303
|
+
|
|
304
|
+
def drain_progress_queue():
|
|
305
|
+
if not progress_queue:
|
|
306
|
+
return
|
|
307
|
+
while True:
|
|
308
|
+
try:
|
|
309
|
+
file, completed, total, stage_tasks = progress_queue.get_nowait()
|
|
310
|
+
except queue.Empty:
|
|
311
|
+
break
|
|
312
|
+
except Exception:
|
|
313
|
+
break
|
|
314
|
+
if file not in tasks:
|
|
315
|
+
continue
|
|
316
|
+
task_total = max(int(total) if total else task_totals.get(file, 1), 1)
|
|
317
|
+
task_totals[file] = task_total
|
|
318
|
+
prog.update(tasks[file],
|
|
319
|
+
total=task_total,
|
|
320
|
+
completed=min(int(completed), task_total),
|
|
321
|
+
processor=render_stage(stage_tasks))
|
|
322
|
+
|
|
323
|
+
with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
|
|
324
|
+
worker_func = partial(_worker_task,
|
|
325
|
+
command=command,
|
|
326
|
+
lang=lang,
|
|
327
|
+
num_speakers=num_speakers,
|
|
328
|
+
loader_info=None,
|
|
329
|
+
writer_info=None,
|
|
330
|
+
progress_queue=progress_queue,
|
|
331
|
+
verbose=ctx.obj["verbose"],
|
|
332
|
+
**kwargs)
|
|
333
|
+
|
|
334
|
+
future_to_file = {executor.submit(worker_func, (f, o)): f for f, o in zip(files, outputs)}
|
|
335
|
+
|
|
336
|
+
for f in files:
|
|
337
|
+
prog.start_task(tasks[f])
|
|
338
|
+
prog.update(tasks[f], processor="Processing...")
|
|
339
|
+
|
|
340
|
+
pending = set(future_to_file.keys())
|
|
341
|
+
while pending:
|
|
342
|
+
done, pending = concurrent.futures.wait(
|
|
343
|
+
pending,
|
|
344
|
+
timeout=0.1,
|
|
345
|
+
return_when=concurrent.futures.FIRST_COMPLETED,
|
|
346
|
+
)
|
|
347
|
+
drain_progress_queue()
|
|
348
|
+
|
|
349
|
+
for future in done:
|
|
350
|
+
file = future_to_file[future]
|
|
351
|
+
try:
|
|
352
|
+
res_file, trcbk, e, captured = future.result()
|
|
353
|
+
final_total = max(task_totals.get(file, 1), 1)
|
|
354
|
+
if e:
|
|
355
|
+
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
|
|
356
|
+
errors.append((res_file, trcbk, e, captured))
|
|
357
|
+
else:
|
|
358
|
+
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold green]DONE[/bold green]")
|
|
359
|
+
if ctx.obj["verbose"] >= 1 and captured.strip():
|
|
360
|
+
errors.append((res_file, "Logs only (Success)", None, captured))
|
|
361
|
+
except Exception as e:
|
|
362
|
+
final_total = max(task_totals.get(file, 1), 1)
|
|
363
|
+
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
|
|
364
|
+
errors.append((file, traceback.format_exc(), e, ""))
|
|
365
|
+
|
|
366
|
+
drain_progress_queue()
|
|
367
|
+
finally:
|
|
368
|
+
if manager:
|
|
369
|
+
manager.shutdown()
|
|
370
|
+
|
|
371
|
+
if len(errors) > 0:
|
|
372
|
+
C.print()
|
|
373
|
+
for file, trcbk, e, captured in errors:
|
|
374
|
+
rel_path = os.path.relpath(str(Path(file).absolute()), in_dir)
|
|
375
|
+
if e:
|
|
376
|
+
C.print(f"[bold red]ERROR[/bold red] on file [italic]{rel_path}[/italic]: {escape(str(e))}\n")
|
|
377
|
+
if captured.strip():
|
|
378
|
+
C.print(f"[dim]Captured Worker Output:[/dim]\n{escape(captured.strip())}\n")
|
|
379
|
+
if ctx.obj["verbose"] == 1:
|
|
380
|
+
C.print(escape(str(trcbk)))
|
|
381
|
+
elif ctx.obj["verbose"] > 1:
|
|
382
|
+
Console().print(escape(str(trcbk)))
|
|
383
|
+
elif captured.strip():
|
|
384
|
+
C.print(f"[bold blue]INFO[/bold blue] on file [italic]{rel_path}[/italic]:\n")
|
|
385
|
+
C.print(f"{escape(captured.strip())}\n")
|
|
386
|
+
else:
|
|
387
|
+
C.print(f"\nAll done. Results saved to {out_dir}!\n")
|
|
388
|
+
|
|
389
|
+
if ctx.obj["verbose"] > 1:
|
|
390
|
+
C.end_capture()
|
|
391
|
+
|
|
392
|
+
if __tf:
|
|
393
|
+
__tf.close()
|