batchalign 0.8.0.post1__tar.gz → 0.8.0.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of batchalign might be problematic. Click here for more details.
- {batchalign-0.8.0.post1/batchalign.egg-info → batchalign-0.8.0.post2}/PKG-INFO +1 -1
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/cli/dispatch.py +186 -14
- batchalign-0.8.0.post2/batchalign/version +3 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.8.0.post1/batchalign/version +0 -3
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/LICENSE +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/MANIFEST.in +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/README.md +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/__main__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/cli/cli.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/constants.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/document.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/errors.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/base.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/resolve.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/training/run.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/por.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/avqi/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/avqi/engine.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/diarization/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/diarization/pyannote.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/opensmile/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/opensmile/engine.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/abbrev.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/compounds.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/config.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/dp.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/names.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/utils/utils.py +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/setup.cfg +0 -0
- {batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/setup.py +0 -0
|
@@ -32,6 +32,7 @@ import time
|
|
|
32
32
|
import traceback
|
|
33
33
|
import logging as L
|
|
34
34
|
baL = L.getLogger('batchalign')
|
|
35
|
+
import psutil
|
|
35
36
|
|
|
36
37
|
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
|
|
37
38
|
|
|
@@ -55,6 +56,29 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
|
|
|
55
56
|
|
|
56
57
|
file, output = file_info
|
|
57
58
|
pid = os.getpid()
|
|
59
|
+
rss_start = None
|
|
60
|
+
rss_end = None
|
|
61
|
+
rss_peak = None
|
|
62
|
+
|
|
63
|
+
def _safe_rss():
|
|
64
|
+
try:
|
|
65
|
+
import psutil
|
|
66
|
+
return psutil.Process(pid).memory_info().rss
|
|
67
|
+
except Exception:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def _safe_peak_rss():
|
|
71
|
+
try:
|
|
72
|
+
import resource
|
|
73
|
+
peak = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
74
|
+
if peak is None:
|
|
75
|
+
return None
|
|
76
|
+
# ru_maxrss is KB on Linux, bytes on macOS; normalize to bytes.
|
|
77
|
+
return int(peak * 1024) if peak < 1024 * 1024 * 1024 else int(peak)
|
|
78
|
+
except Exception:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
rss_start = _safe_rss()
|
|
58
82
|
|
|
59
83
|
# Configure logging in this worker process
|
|
60
84
|
if verbose >= 1:
|
|
@@ -70,9 +94,8 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
|
|
|
70
94
|
else:
|
|
71
95
|
baL.setLevel(logging.DEBUG)
|
|
72
96
|
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
should_capture = verbose == 0
|
|
97
|
+
# Always capture output to avoid interleaving with progress rendering.
|
|
98
|
+
should_capture = True
|
|
76
99
|
|
|
77
100
|
if should_capture:
|
|
78
101
|
# Use a temporary file to capture ALL output at the FD level
|
|
@@ -152,7 +175,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
|
|
|
152
175
|
else:
|
|
153
176
|
captured = ""
|
|
154
177
|
|
|
155
|
-
|
|
178
|
+
rss_end = _safe_rss()
|
|
179
|
+
rss_peak = _safe_peak_rss()
|
|
180
|
+
mem_info = {
|
|
181
|
+
"pid": pid,
|
|
182
|
+
"rss_start": rss_start,
|
|
183
|
+
"rss_end": rss_end,
|
|
184
|
+
"rss_peak": rss_peak,
|
|
185
|
+
}
|
|
186
|
+
return file, None, None, captured, mem_info
|
|
156
187
|
except Exception as e:
|
|
157
188
|
# Flush and read captured output if we were capturing
|
|
158
189
|
if should_capture:
|
|
@@ -162,7 +193,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
|
|
|
162
193
|
captured = log_file.read()
|
|
163
194
|
else:
|
|
164
195
|
captured = ""
|
|
165
|
-
|
|
196
|
+
rss_end = _safe_rss()
|
|
197
|
+
rss_peak = _safe_peak_rss()
|
|
198
|
+
mem_info = {
|
|
199
|
+
"pid": pid,
|
|
200
|
+
"rss_start": rss_start,
|
|
201
|
+
"rss_end": rss_end,
|
|
202
|
+
"rss_peak": rss_peak,
|
|
203
|
+
}
|
|
204
|
+
return file, traceback.format_exc(), e, captured, mem_info
|
|
166
205
|
finally:
|
|
167
206
|
# Restore original FDs only if we redirected them
|
|
168
207
|
if should_capture:
|
|
@@ -255,6 +294,7 @@ def _dispatch(command, lang, num_speakers,
|
|
|
255
294
|
file_pairs = list(zip(files, outputs))
|
|
256
295
|
file_pairs.sort(key=lambda fo: os.path.getsize(fo[0]) if os.path.exists(fo[0]) else 0, reverse=True)
|
|
257
296
|
files, outputs = zip(*file_pairs) if file_pairs else ([], [])
|
|
297
|
+
file_sizes = {f: os.path.getsize(f) if os.path.exists(f) else 0 for f in files}
|
|
258
298
|
|
|
259
299
|
C.print(f"\nMode: [blue]{command}[/blue]; got [bold cyan]{len(files)}[/bold cyan] transcript{'s' if len(files) > 1 else ''} to process from {in_dir}:\n")
|
|
260
300
|
|
|
@@ -289,8 +329,66 @@ def _dispatch(command, lang, num_speakers,
|
|
|
289
329
|
# create the spinner
|
|
290
330
|
prog = Progress(SpinnerColumn(), *Progress.get_default_columns()[:-1],
|
|
291
331
|
TimeElapsedColumn(),
|
|
292
|
-
TextColumn("[
|
|
332
|
+
TextColumn("[magenta]{task.fields[mem]}[/magenta]"),
|
|
333
|
+
TextColumn("[cyan]{task.fields[processor]}[/cyan]"),
|
|
334
|
+
console=C, refresh_per_second=5)
|
|
293
335
|
errors = []
|
|
336
|
+
mem_records = {}
|
|
337
|
+
mem_samples = []
|
|
338
|
+
last_low_mem_warn = 0.0
|
|
339
|
+
|
|
340
|
+
def _format_bytes(count, precision=2):
|
|
341
|
+
if count is None:
|
|
342
|
+
return "unknown"
|
|
343
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
344
|
+
idx = 0
|
|
345
|
+
size = float(count)
|
|
346
|
+
while size >= 1024 and idx < len(units) - 1:
|
|
347
|
+
size /= 1024
|
|
348
|
+
idx += 1
|
|
349
|
+
if idx == 0:
|
|
350
|
+
return f"{int(size)}{units[idx]}"
|
|
351
|
+
return f"{size:.{precision}f}{units[idx]}"
|
|
352
|
+
|
|
353
|
+
def _mem_label(base, available=None, low_mem=False):
|
|
354
|
+
parts = [base]
|
|
355
|
+
if available is not None:
|
|
356
|
+
parts.append(f"avail {_format_bytes(available, precision=1)}")
|
|
357
|
+
if low_mem:
|
|
358
|
+
parts.append("LOW MEM")
|
|
359
|
+
return " | ".join(parts)
|
|
360
|
+
|
|
361
|
+
def _system_memory():
|
|
362
|
+
try:
|
|
363
|
+
vm = psutil.virtual_memory()
|
|
364
|
+
return vm.total, vm.available
|
|
365
|
+
except Exception:
|
|
366
|
+
return None, None
|
|
367
|
+
|
|
368
|
+
def _memory_reserve(total):
|
|
369
|
+
if total is None:
|
|
370
|
+
return None
|
|
371
|
+
return max(int(total * 0.10), 2 * 1024 * 1024 * 1024)
|
|
372
|
+
|
|
373
|
+
def _estimate_worker_bytes(file_size):
|
|
374
|
+
if not mem_samples:
|
|
375
|
+
return 512 * 1024 * 1024
|
|
376
|
+
ratios = [mem / size for size, mem in mem_samples if size and mem]
|
|
377
|
+
if not ratios:
|
|
378
|
+
return 512 * 1024 * 1024
|
|
379
|
+
ratios.sort()
|
|
380
|
+
median_ratio = ratios[len(ratios) // 2]
|
|
381
|
+
est = int(median_ratio * file_size)
|
|
382
|
+
return max(512 * 1024 * 1024, min(est, 6 * 1024 * 1024 * 1024))
|
|
383
|
+
|
|
384
|
+
def _should_throttle(est_bytes):
|
|
385
|
+
total, available = _system_memory()
|
|
386
|
+
if total is None or available is None:
|
|
387
|
+
return False, total, available
|
|
388
|
+
reserve = _memory_reserve(total)
|
|
389
|
+
if reserve is None:
|
|
390
|
+
return False, total, available
|
|
391
|
+
return (available - est_bytes) < reserve, total, available
|
|
294
392
|
|
|
295
393
|
try:
|
|
296
394
|
with prog as prog:
|
|
@@ -298,8 +396,9 @@ def _dispatch(command, lang, num_speakers,
|
|
|
298
396
|
task_totals = {}
|
|
299
397
|
|
|
300
398
|
for f in files:
|
|
301
|
-
tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...")
|
|
399
|
+
tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...", mem="queued")
|
|
302
400
|
task_totals[f] = 1
|
|
401
|
+
prog.start_task(tasks[f])
|
|
303
402
|
|
|
304
403
|
def drain_progress_queue():
|
|
305
404
|
if not progress_queue:
|
|
@@ -315,10 +414,16 @@ def _dispatch(command, lang, num_speakers,
|
|
|
315
414
|
continue
|
|
316
415
|
task_total = max(int(total) if total else task_totals.get(file, 1), 1)
|
|
317
416
|
task_totals[file] = task_total
|
|
417
|
+
total_mem, available_mem = _system_memory()
|
|
418
|
+
reserve = _memory_reserve(total_mem)
|
|
419
|
+
low_mem = False
|
|
420
|
+
if reserve is not None and available_mem is not None:
|
|
421
|
+
low_mem = available_mem < reserve
|
|
318
422
|
prog.update(tasks[file],
|
|
319
423
|
total=task_total,
|
|
320
424
|
completed=min(int(completed), task_total),
|
|
321
|
-
processor=render_stage(stage_tasks)
|
|
425
|
+
processor=render_stage(stage_tasks),
|
|
426
|
+
mem=_mem_label("running", available_mem, low_mem))
|
|
322
427
|
|
|
323
428
|
with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
|
|
324
429
|
worker_func = partial(_worker_task,
|
|
@@ -331,11 +436,54 @@ def _dispatch(command, lang, num_speakers,
|
|
|
331
436
|
verbose=ctx.obj["verbose"],
|
|
332
437
|
**kwargs)
|
|
333
438
|
|
|
334
|
-
|
|
439
|
+
file_iter = iter(zip(files, outputs))
|
|
440
|
+
future_to_file = {}
|
|
441
|
+
|
|
442
|
+
def submit_one(file_path, output_path):
|
|
443
|
+
future = executor.submit(worker_func, (file_path, output_path))
|
|
444
|
+
future_to_file[future] = file_path
|
|
445
|
+
est_bytes = _estimate_worker_bytes(file_sizes.get(file_path, 0))
|
|
446
|
+
total_mem, available_mem = _system_memory()
|
|
447
|
+
reserve = _memory_reserve(total_mem)
|
|
448
|
+
low_mem = False
|
|
449
|
+
if reserve is not None and available_mem is not None:
|
|
450
|
+
low_mem = available_mem < reserve
|
|
451
|
+
prog.update(
|
|
452
|
+
tasks[file_path],
|
|
453
|
+
processor="Processing...",
|
|
454
|
+
mem=_mem_label(f"est {_format_bytes(est_bytes)}", available_mem, low_mem),
|
|
455
|
+
)
|
|
335
456
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
457
|
+
def schedule_available():
|
|
458
|
+
nonlocal last_low_mem_warn
|
|
459
|
+
while len(future_to_file) < num_workers:
|
|
460
|
+
try:
|
|
461
|
+
next_file, next_output = next(file_iter)
|
|
462
|
+
except StopIteration:
|
|
463
|
+
break
|
|
464
|
+
est_bytes = _estimate_worker_bytes(file_sizes.get(next_file, 0))
|
|
465
|
+
throttle, total, available = _should_throttle(est_bytes)
|
|
466
|
+
if throttle and future_to_file:
|
|
467
|
+
now = time.time()
|
|
468
|
+
if now - last_low_mem_warn > 10:
|
|
469
|
+
reserve = _memory_reserve(total)
|
|
470
|
+
prog.console.print(
|
|
471
|
+
f"[bold yellow]Low memory[/bold yellow]: "
|
|
472
|
+
f"{_format_bytes(available)} free, "
|
|
473
|
+
f"{_format_bytes(reserve)} reserve. "
|
|
474
|
+
f"Throttling new workers."
|
|
475
|
+
)
|
|
476
|
+
last_low_mem_warn = now
|
|
477
|
+
break
|
|
478
|
+
if throttle and not future_to_file:
|
|
479
|
+
prog.console.print(
|
|
480
|
+
f"[bold yellow]Low memory[/bold yellow]: "
|
|
481
|
+
f"{_format_bytes(available)} free. "
|
|
482
|
+
"Continuing with a single worker."
|
|
483
|
+
)
|
|
484
|
+
submit_one(next_file, next_output)
|
|
485
|
+
|
|
486
|
+
schedule_available()
|
|
339
487
|
|
|
340
488
|
pending = set(future_to_file.keys())
|
|
341
489
|
while pending:
|
|
@@ -348,8 +496,9 @@ def _dispatch(command, lang, num_speakers,
|
|
|
348
496
|
|
|
349
497
|
for future in done:
|
|
350
498
|
file = future_to_file[future]
|
|
499
|
+
future_to_file.pop(future, None)
|
|
351
500
|
try:
|
|
352
|
-
res_file, trcbk, e, captured = future.result()
|
|
501
|
+
res_file, trcbk, e, captured, mem_info = future.result()
|
|
353
502
|
final_total = max(task_totals.get(file, 1), 1)
|
|
354
503
|
if e:
|
|
355
504
|
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
|
|
@@ -357,12 +506,25 @@ def _dispatch(command, lang, num_speakers,
|
|
|
357
506
|
else:
|
|
358
507
|
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold green]DONE[/bold green]")
|
|
359
508
|
if ctx.obj["verbose"] >= 1 and captured.strip():
|
|
360
|
-
|
|
509
|
+
prog.console.print(f"[bold blue]INFO[/bold blue] on file [italic]{Path(file).name}[/italic]:\n{escape(captured.strip())}\n")
|
|
510
|
+
if mem_info:
|
|
511
|
+
mem_records[file] = mem_info
|
|
512
|
+
peak = mem_info.get("rss_peak") or mem_info.get("rss_end")
|
|
513
|
+
if peak:
|
|
514
|
+
mem_samples.append((file_sizes.get(file, 0), peak))
|
|
515
|
+
total_mem, available_mem = _system_memory()
|
|
516
|
+
reserve = _memory_reserve(total_mem)
|
|
517
|
+
low_mem = False
|
|
518
|
+
if reserve is not None and available_mem is not None:
|
|
519
|
+
low_mem = available_mem < reserve
|
|
520
|
+
prog.update(tasks[file], mem=_mem_label(_format_bytes(peak), available_mem, low_mem))
|
|
361
521
|
except Exception as e:
|
|
362
522
|
final_total = max(task_totals.get(file, 1), 1)
|
|
363
523
|
prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
|
|
364
524
|
errors.append((file, traceback.format_exc(), e, ""))
|
|
365
525
|
|
|
526
|
+
schedule_available()
|
|
527
|
+
pending = set(future_to_file.keys())
|
|
366
528
|
drain_progress_queue()
|
|
367
529
|
finally:
|
|
368
530
|
if manager:
|
|
@@ -386,6 +548,16 @@ def _dispatch(command, lang, num_speakers,
|
|
|
386
548
|
else:
|
|
387
549
|
C.print(f"\nAll done. Results saved to {out_dir}!\n")
|
|
388
550
|
|
|
551
|
+
if mem_records and ctx.obj["verbose"] >= 1:
|
|
552
|
+
C.print("\nMemory usage per file (worker RSS peak):")
|
|
553
|
+
for file, info in mem_records.items():
|
|
554
|
+
rel_path = os.path.relpath(str(Path(file).absolute()), in_dir)
|
|
555
|
+
peak = info.get("rss_peak") or info.get("rss_end")
|
|
556
|
+
C.print(f"- {rel_path}: {_format_bytes(peak)}")
|
|
557
|
+
total, available = _system_memory()
|
|
558
|
+
if total is not None and available is not None:
|
|
559
|
+
C.print(f"\nSystem memory available: {_format_bytes(available)} / {_format_bytes(total)}")
|
|
560
|
+
|
|
389
561
|
if ctx.obj["verbose"] > 1:
|
|
390
562
|
C.end_capture()
|
|
391
563
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/models/utterance/cantonese_infer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/asr/num2lang/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/cleanup/support/test.test
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/diarization/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/diarization/pyannote.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/opensmile/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
{batchalign-0.8.0.post1 → batchalign-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|