batchalign 0.7.20.post18__tar.gz → 0.7.21.post1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.20.post18/batchalign.egg-info → batchalign-0.7.21.post1}/PKG-INFO +10 -3
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/cli.py +46 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/dispatch.py +2 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/__init__.py +3 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/rev.py +2 -2
- batchalign-0.7.21.post1/batchalign/pipelines/avqi/__init__.py +8 -0
- batchalign-0.7.21.post1/batchalign/pipelines/avqi/engine.py +264 -0
- batchalign-0.7.21.post1/batchalign/pipelines/diarization/__init__.py +1 -0
- batchalign-0.7.21.post1/batchalign/pipelines/diarization/pyannote.py +103 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/dispatch.py +7 -2
- batchalign-0.7.21.post1/batchalign/version +3 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1/batchalign.egg-info}/PKG-INFO +10 -3
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/SOURCES.txt +4 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/requires.txt +7 -1
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/setup.py +7 -3
- batchalign-0.7.20.post18/batchalign/version +0 -3
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/LICENSE +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/MANIFEST.in +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/README.md +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/__main__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/constants.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/document.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/errors.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/abbrev.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/compounds.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/names.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: batchalign
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.21.post1
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -17,6 +17,8 @@ Requires-Dist: pydub
|
|
|
17
17
|
Requires-Dist: plotly>=5.3.0
|
|
18
18
|
Requires-Dist: transformers>=4.38.2
|
|
19
19
|
Requires-Dist: tokenizers>=0.14.1
|
|
20
|
+
Requires-Dist: numba>=0.61.0
|
|
21
|
+
Requires-Dist: numpy<=2.2
|
|
20
22
|
Requires-Dist: pycountry>=22.3
|
|
21
23
|
Requires-Dist: stanza[transformers]>=1.10.1
|
|
22
24
|
Requires-Dist: scipy~=1.11
|
|
@@ -35,7 +37,11 @@ Requires-Dist: tiktoken
|
|
|
35
37
|
Requires-Dist: blobfile
|
|
36
38
|
Requires-Dist: sentencepiece
|
|
37
39
|
Requires-Dist: googletrans
|
|
38
|
-
Requires-Dist: openai-whisper
|
|
40
|
+
Requires-Dist: openai-whisper
|
|
41
|
+
Requires-Dist: llvmlite>=0.44.0
|
|
42
|
+
Requires-Dist: praat-parselmouth==0.4.6
|
|
43
|
+
Requires-Dist: pyannote.audio
|
|
44
|
+
Requires-Dist: onnxruntime
|
|
39
45
|
Provides-Extra: dev
|
|
40
46
|
Requires-Dist: pytest; extra == "dev"
|
|
41
47
|
Provides-Extra: train
|
|
@@ -48,6 +54,7 @@ Dynamic: author-email
|
|
|
48
54
|
Dynamic: classifier
|
|
49
55
|
Dynamic: description
|
|
50
56
|
Dynamic: description-content-type
|
|
57
|
+
Dynamic: license-file
|
|
51
58
|
Dynamic: provides-extra
|
|
52
59
|
Dynamic: requires-dist
|
|
53
60
|
Dynamic: summary
|
|
@@ -356,6 +356,51 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
|
|
356
356
|
in_dir, out_dir,
|
|
357
357
|
loader, writer, C,
|
|
358
358
|
asr="whisper" if whisper else "rev", **kwargs)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
#################### AVQI ################################
|
|
362
|
+
|
|
363
|
+
@batchalign.command()
|
|
364
|
+
@click.argument("cs_file", type=click.Path(exists=True, file_okay=True))
|
|
365
|
+
@click.argument("sv_file", type=click.Path(exists=True, file_okay=True))
|
|
366
|
+
@click.option("--lang",
|
|
367
|
+
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
|
368
|
+
show_default=True,
|
|
369
|
+
default="eng",
|
|
370
|
+
type=str)
|
|
371
|
+
@click.pass_context
|
|
372
|
+
def avqi(ctx, cs_file, sv_file, lang, **kwargs):
|
|
373
|
+
"""Calculate Acoustic Voice Quality Index (AVQI) from continuous speech and sustained vowel audio files."""
|
|
374
|
+
|
|
375
|
+
# Import AVQI engine
|
|
376
|
+
from batchalign.pipelines.avqi import AVQIEngine
|
|
377
|
+
|
|
378
|
+
# Get output file path (same directory as cs_file, with .avqi.txt extension)
|
|
379
|
+
cs_path = Path(cs_file)
|
|
380
|
+
output_file = cs_path.with_suffix('.avqi.txt')
|
|
381
|
+
|
|
382
|
+
# Create AVQI engine
|
|
383
|
+
avqi_engine = AVQIEngine()
|
|
384
|
+
|
|
385
|
+
try:
|
|
386
|
+
# Calculate AVQI
|
|
387
|
+
C.print(f"\n[blue]Calculating AVQI[/blue] for:")
|
|
388
|
+
C.print(f" Continuous Speech: [cyan]{cs_file}[/cyan]")
|
|
389
|
+
C.print(f" Sustained Vowel: [cyan]{sv_file}[/cyan]")
|
|
390
|
+
C.print(f" Language: [cyan]{lang}[/cyan]")
|
|
391
|
+
C.print(f" Output: [cyan]{output_file}[/cyan]\n")
|
|
392
|
+
|
|
393
|
+
results = avqi_engine.analyze(cs_file, sv_file, str(output_file), lang)
|
|
394
|
+
|
|
395
|
+
C.print(f"[bold green]✓ AVQI calculation completed![/bold green]")
|
|
396
|
+
C.print(f"[bold]AVQI Score: {results['avqi']:.3f}[/bold]")
|
|
397
|
+
C.print(f"Results saved to: [cyan]{output_file}[/cyan]\n")
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
C.print(f"[bold red]ERROR[/bold red]: {str(e)}")
|
|
401
|
+
if ctx.obj["verbose"] > 0:
|
|
402
|
+
import traceback
|
|
403
|
+
C.print(traceback.format_exc())
|
|
359
404
|
|
|
360
405
|
|
|
361
406
|
#################### SETUP ################################
|
|
@@ -379,3 +424,4 @@ def version(ctx, **kwargs):
|
|
|
379
424
|
f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
|
|
380
425
|
"\nDeveloped by Brian MacWhinney and Houjun Liu")
|
|
381
426
|
C.print("\n\n"+ptr+"\n\n")
|
|
427
|
+
|
|
@@ -163,9 +163,11 @@ def _dispatch(command, lang, num_speakers,
|
|
|
163
163
|
callback=lambda *args:progress_callback(file, *args),
|
|
164
164
|
**kw)
|
|
165
165
|
msgs = [escape(str(i.message)).strip() for i in w]
|
|
166
|
+
msgs = [i for i in msgs if "torchaudio" not in i.lower()]
|
|
166
167
|
# write the format, as needed
|
|
167
168
|
writer(doc, output)
|
|
168
169
|
# print any warnings
|
|
170
|
+
|
|
169
171
|
if len(msgs) > 0:
|
|
170
172
|
if ctx.obj["verbose"] > 1:
|
|
171
173
|
Console().print(f"\n[bold yellow]WARN[/bold yellow] on {file}:\n","\n".join(msgs)+"\n")
|
|
@@ -28,9 +28,9 @@ class RevEngine(BatchalignEngine):
|
|
|
28
28
|
# if there is no utterance segmentation scheme, we only
|
|
29
29
|
# run ASR
|
|
30
30
|
if self.__engine:
|
|
31
|
-
return [ Task.ASR, Task.
|
|
31
|
+
return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
|
|
32
32
|
else:
|
|
33
|
-
return [ Task.ASR
|
|
33
|
+
return [ Task.ASR ]
|
|
34
34
|
|
|
35
35
|
def __init__(self, key:str=None, lang="eng", num_speakers=2):
|
|
36
36
|
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AVQI Engine for Batchalign2
|
|
3
|
+
Acoustic Voice Quality Index calculation for voice quality assessment
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import parselmouth
|
|
7
|
+
import numpy as np
|
|
8
|
+
from parselmouth.praat import call
|
|
9
|
+
import re
|
|
10
|
+
from typing import Tuple, Dict, Optional
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
from batchalign.pipelines.base import BatchalignEngine
|
|
16
|
+
from batchalign.document import Task
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
L = logging.getLogger('batchalign')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AVQIEngine(BatchalignEngine):
|
|
23
|
+
"""Engine for calculating Acoustic Voice Quality Index (AVQI)."""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super().__init__()
|
|
27
|
+
self._tasks = [Task.FEATURE_EXTRACT]
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def tasks(self):
|
|
31
|
+
return self._tasks
|
|
32
|
+
|
|
33
|
+
def extract_voiced_segments(self, sound):
|
|
34
|
+
"""Extract voiced segments from audio."""
|
|
35
|
+
original = call(sound, "Copy", "original")
|
|
36
|
+
sampling_rate = call(original, "Get sampling frequency")
|
|
37
|
+
onlyVoice = call("Create Sound", "onlyVoice", 0, 0.001, sampling_rate, "0")
|
|
38
|
+
textgrid = call(
|
|
39
|
+
original,
|
|
40
|
+
"To TextGrid (silences)",
|
|
41
|
+
50,
|
|
42
|
+
0.003,
|
|
43
|
+
-25,
|
|
44
|
+
0.1,
|
|
45
|
+
0.1,
|
|
46
|
+
"silence",
|
|
47
|
+
"sounding",
|
|
48
|
+
)
|
|
49
|
+
intervals = call(
|
|
50
|
+
[original, textgrid],
|
|
51
|
+
"Extract intervals where",
|
|
52
|
+
1,
|
|
53
|
+
False,
|
|
54
|
+
"does not contain",
|
|
55
|
+
"silence",
|
|
56
|
+
)
|
|
57
|
+
onlyLoud = call(intervals, "Concatenate")
|
|
58
|
+
globalPower = call(onlyLoud, "Get power in air")
|
|
59
|
+
voicelessThreshold = globalPower * 0.3
|
|
60
|
+
signalEnd = call(onlyLoud, "Get end time")
|
|
61
|
+
windowBorderLeft = call(onlyLoud, "Get start time")
|
|
62
|
+
windowWidth = 0.03
|
|
63
|
+
while windowBorderLeft + windowWidth <= signalEnd:
|
|
64
|
+
part = call(
|
|
65
|
+
onlyLoud,
|
|
66
|
+
"Extract part",
|
|
67
|
+
windowBorderLeft,
|
|
68
|
+
windowBorderLeft + windowWidth,
|
|
69
|
+
"Rectangular",
|
|
70
|
+
1.0,
|
|
71
|
+
False,
|
|
72
|
+
)
|
|
73
|
+
partialPower = call(part, "Get power in air")
|
|
74
|
+
if partialPower > voicelessThreshold:
|
|
75
|
+
try:
|
|
76
|
+
start = 0.0025
|
|
77
|
+
startZero = call(part, "Get nearest zero crossing", start)
|
|
78
|
+
if startZero is not None and not np.isinf(startZero):
|
|
79
|
+
onlyVoice = call([onlyVoice, part], "Concatenate")
|
|
80
|
+
except:
|
|
81
|
+
pass
|
|
82
|
+
windowBorderLeft += 0.03
|
|
83
|
+
return onlyVoice
|
|
84
|
+
|
|
85
|
+
def calculate_avqi_features(self, cs_file, sv_file):
|
|
86
|
+
"""Calculate AVQI score and features from continuous speech and sustained vowel files."""
|
|
87
|
+
cs_sound = parselmouth.Sound(cs_file)
|
|
88
|
+
sv_sound = parselmouth.Sound(sv_file)
|
|
89
|
+
cs_filtered = call(cs_sound, "Filter (stop Hann band)", 0, 34, 0.1)
|
|
90
|
+
sv_filtered = call(sv_sound, "Filter (stop Hann band)", 0, 34, 0.1)
|
|
91
|
+
voiced_cs = self.extract_voiced_segments(cs_filtered)
|
|
92
|
+
sv_duration = call(sv_filtered, "Get total duration")
|
|
93
|
+
if sv_duration > 3:
|
|
94
|
+
sv_start = sv_duration - 3
|
|
95
|
+
sv_part = call(
|
|
96
|
+
sv_filtered, "Extract part", sv_start, sv_duration, "rectangular", 1, False
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
sv_part = call(sv_filtered, "Copy", "sv_part")
|
|
100
|
+
concatenated = call([voiced_cs, sv_part], "Concatenate")
|
|
101
|
+
powercepstrogram = call(concatenated, "To PowerCepstrogram", 60, 0.002, 5000, 50)
|
|
102
|
+
cpps = call(
|
|
103
|
+
powercepstrogram,
|
|
104
|
+
"Get CPPS",
|
|
105
|
+
False,
|
|
106
|
+
0.01,
|
|
107
|
+
0.001,
|
|
108
|
+
60,
|
|
109
|
+
330,
|
|
110
|
+
0.05,
|
|
111
|
+
"Parabolic",
|
|
112
|
+
0.001,
|
|
113
|
+
0,
|
|
114
|
+
"Straight",
|
|
115
|
+
"Robust",
|
|
116
|
+
)
|
|
117
|
+
ltas = call(concatenated, "To Ltas", 1)
|
|
118
|
+
slope = call(ltas, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
119
|
+
ltas_copy = call(ltas, "Copy", "ltas_for_tilt")
|
|
120
|
+
try:
|
|
121
|
+
call(ltas_copy, "Compute trend line", 1, 10000)
|
|
122
|
+
tilt = call(ltas_copy, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
123
|
+
if abs(tilt - slope) < 0.01:
|
|
124
|
+
ltas_copy2 = call(ltas, "Copy", "ltas_for_tilt2")
|
|
125
|
+
call(ltas_copy2, "Compute trend line", 100, 8000)
|
|
126
|
+
tilt = call(ltas_copy2, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
127
|
+
if abs(tilt - slope) < 0.01:
|
|
128
|
+
tilt = slope + 5.5
|
|
129
|
+
except:
|
|
130
|
+
tilt = slope + 5.5
|
|
131
|
+
pointprocess = call(concatenated, "To PointProcess (periodic, cc)", 50, 400)
|
|
132
|
+
shim_percent = call(
|
|
133
|
+
[concatenated, pointprocess],
|
|
134
|
+
"Get shimmer (local)",
|
|
135
|
+
0,
|
|
136
|
+
0,
|
|
137
|
+
0.0001,
|
|
138
|
+
0.02,
|
|
139
|
+
1.3,
|
|
140
|
+
1.6,
|
|
141
|
+
)
|
|
142
|
+
shim = shim_percent * 100
|
|
143
|
+
shdb = call(
|
|
144
|
+
[concatenated, pointprocess],
|
|
145
|
+
"Get shimmer (local_dB)",
|
|
146
|
+
0,
|
|
147
|
+
0,
|
|
148
|
+
0.0001,
|
|
149
|
+
0.02,
|
|
150
|
+
1.3,
|
|
151
|
+
1.6,
|
|
152
|
+
)
|
|
153
|
+
pitch = call(
|
|
154
|
+
concatenated,
|
|
155
|
+
"To Pitch (cc)",
|
|
156
|
+
0,
|
|
157
|
+
75,
|
|
158
|
+
15,
|
|
159
|
+
False,
|
|
160
|
+
0.03,
|
|
161
|
+
0.45,
|
|
162
|
+
0.01,
|
|
163
|
+
0.35,
|
|
164
|
+
0.14,
|
|
165
|
+
600,
|
|
166
|
+
)
|
|
167
|
+
pointprocess2 = call([concatenated, pitch], "To PointProcess (cc)")
|
|
168
|
+
voice_report = call(
|
|
169
|
+
[concatenated, pitch, pointprocess2],
|
|
170
|
+
"Voice report",
|
|
171
|
+
0,
|
|
172
|
+
0,
|
|
173
|
+
75,
|
|
174
|
+
600,
|
|
175
|
+
1.3,
|
|
176
|
+
1.6,
|
|
177
|
+
0.03,
|
|
178
|
+
0.45,
|
|
179
|
+
)
|
|
180
|
+
hnr_match = re.search(
|
|
181
|
+
r"Mean harmonics-to-noise ratio:\s*([-+]?\d*\.?\d+)", voice_report
|
|
182
|
+
)
|
|
183
|
+
hnr = float(hnr_match.group(1)) if hnr_match else 0.0
|
|
184
|
+
avqi = (
|
|
185
|
+
4.152
|
|
186
|
+
- (0.177 * cpps)
|
|
187
|
+
- (0.006 * hnr)
|
|
188
|
+
- (0.037 * shim)
|
|
189
|
+
+ (0.941 * shdb)
|
|
190
|
+
+ (0.01 * slope)
|
|
191
|
+
+ (0.093 * tilt)
|
|
192
|
+
) * 2.8902
|
|
193
|
+
return avqi, {
|
|
194
|
+
"cpps": cpps,
|
|
195
|
+
"hnr": hnr,
|
|
196
|
+
"shimmer_local": shim,
|
|
197
|
+
"shimmer_local_db": shdb,
|
|
198
|
+
"slope": slope,
|
|
199
|
+
"tilt": tilt,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def analyze(self, cs_file: str, sv_file: str, output_file: str, lang: str = 'eng', **kwargs) -> Dict:
|
|
203
|
+
"""
|
|
204
|
+
Analyze audio files and calculate AVQI.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
cs_file : str
|
|
209
|
+
Path to continuous speech audio file
|
|
210
|
+
sv_file : str
|
|
211
|
+
Path to sustained vowel audio file
|
|
212
|
+
output_file : str
|
|
213
|
+
Path to output file
|
|
214
|
+
lang : str
|
|
215
|
+
Language code (default: 'eng')
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
Dict
|
|
220
|
+
Dictionary containing AVQI score and features
|
|
221
|
+
"""
|
|
222
|
+
L.info(f"Calculating AVQI for CS: {cs_file}, SV: {sv_file}")
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
# Calculate AVQI using the proper algorithm
|
|
226
|
+
avqi_score, features = self.calculate_avqi_features(cs_file, sv_file)
|
|
227
|
+
|
|
228
|
+
# Prepare results
|
|
229
|
+
results = {
|
|
230
|
+
'avqi': avqi_score,
|
|
231
|
+
'cpps': features['cpps'],
|
|
232
|
+
'hnr': features['hnr'],
|
|
233
|
+
'shimmer_local': features['shimmer_local'],
|
|
234
|
+
'shimmer_local_db': features['shimmer_local_db'],
|
|
235
|
+
'slope': features['slope'],
|
|
236
|
+
'tilt': features['tilt']
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Write results to file
|
|
240
|
+
with open(output_file, 'w') as f:
|
|
241
|
+
f.write(f"AVQI: {avqi_score:.3f}\n")
|
|
242
|
+
f.write(f"CPPS: {features['cpps']:.3f}\n")
|
|
243
|
+
f.write(f"HNR: {features['hnr']:.3f}\n")
|
|
244
|
+
f.write(f"Shimmer Local: {features['shimmer_local']:.3f}\n")
|
|
245
|
+
f.write(f"Shimmer Local dB: {features['shimmer_local_db']:.3f}\n")
|
|
246
|
+
f.write(f"LTAS Slope: {features['slope']:.3f}\n")
|
|
247
|
+
f.write(f"LTAS Tilt: {features['tilt']:.3f}\n")
|
|
248
|
+
f.write(f"Language: {lang}\n")
|
|
249
|
+
|
|
250
|
+
L.info(f"AVQI results written to: {output_file}")
|
|
251
|
+
return results
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
L.error(f"Error calculating AVQI: {e}")
|
|
255
|
+
# Return default values on error
|
|
256
|
+
return {
|
|
257
|
+
'avqi': 0.0,
|
|
258
|
+
'cpps': 0.0,
|
|
259
|
+
'hnr': 0.0,
|
|
260
|
+
'shimmer_local': 0.0,
|
|
261
|
+
'shimmer_local_db': 0.0,
|
|
262
|
+
'slope': 0.0,
|
|
263
|
+
'tilt': 0.0
|
|
264
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .pyannote import PyannoteEngine
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# system utils
|
|
2
|
+
import glob, os, re
|
|
3
|
+
from itertools import groupby
|
|
4
|
+
|
|
5
|
+
# pathing tools
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# UD tools
|
|
9
|
+
import stanza
|
|
10
|
+
|
|
11
|
+
import copy
|
|
12
|
+
|
|
13
|
+
from stanza.utils.conll import CoNLL
|
|
14
|
+
from stanza import Document, DownloadMethod
|
|
15
|
+
from stanza.models.common.doc import Token
|
|
16
|
+
from stanza.pipeline.core import CONSTITUENCY
|
|
17
|
+
from stanza import DownloadMethod
|
|
18
|
+
from torch import heaviside
|
|
19
|
+
|
|
20
|
+
from stanza.pipeline.processor import ProcessorVariant, register_processor_variant
|
|
21
|
+
from stanza.resources.common import download_resources_json, load_resources_json, get_language_resources
|
|
22
|
+
|
|
23
|
+
# the loading bar
|
|
24
|
+
from tqdm import tqdm
|
|
25
|
+
|
|
26
|
+
from bdb import BdbQuit
|
|
27
|
+
|
|
28
|
+
from nltk import word_tokenize
|
|
29
|
+
from collections import defaultdict
|
|
30
|
+
|
|
31
|
+
import warnings
|
|
32
|
+
|
|
33
|
+
from stanza.utils.conll import CoNLL
|
|
34
|
+
|
|
35
|
+
# Oneliner of directory-based glob and replace
|
|
36
|
+
globase = lambda path, statement: glob.glob(os.path.join(path, statement))
|
|
37
|
+
repath_file = lambda file_path, new_dir: os.path.join(new_dir, pathlib.Path(file_path).name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
from batchalign.document import *
|
|
41
|
+
from batchalign.constants import *
|
|
42
|
+
from batchalign.pipelines.base import *
|
|
43
|
+
from batchalign.formats.chat.parser import chat_parse_utterance
|
|
44
|
+
|
|
45
|
+
from batchalign.utils.dp import *
|
|
46
|
+
|
|
47
|
+
from pyannote.audio import Pipeline
|
|
48
|
+
|
|
49
|
+
import logging
|
|
50
|
+
L = logging.getLogger("batchalign")
|
|
51
|
+
|
|
52
|
+
import pycountry
|
|
53
|
+
|
|
54
|
+
class PyannoteEngine(BatchalignEngine):
|
|
55
|
+
tasks = [ Task.SPEAKER_RECOGNITION ]
|
|
56
|
+
status_hook = None
|
|
57
|
+
|
|
58
|
+
def __init__(self, num_speakers=2):
|
|
59
|
+
self.pipe = Pipeline.from_pretrained("talkbank/dia-fork")
|
|
60
|
+
self.num_speakers = num_speakers
|
|
61
|
+
|
|
62
|
+
def process(self, doc):
|
|
63
|
+
assert doc.media != None and doc.media.url != None, f"We cannot diarize something that doesn't have a media path! Provided media tier='{doc.media}'"
|
|
64
|
+
res = self.pipe(doc.media.url, num_speakers=self.num_speakers)
|
|
65
|
+
|
|
66
|
+
speakers = list(set([int(i[-1].split("_")[-1])
|
|
67
|
+
for i in res.itertracks(yield_label=True)]))
|
|
68
|
+
corpus = doc.tiers[0].corpus
|
|
69
|
+
lang = doc.tiers[0].lang
|
|
70
|
+
tiers = {
|
|
71
|
+
i:
|
|
72
|
+
Tier(
|
|
73
|
+
lang=lang, corpus=corpus,
|
|
74
|
+
id="PAR"+str(i), name="Participant",
|
|
75
|
+
birthday="",
|
|
76
|
+
)
|
|
77
|
+
for i in speakers
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for i in doc.content:
|
|
81
|
+
if not isinstance(i, Utterance):
|
|
82
|
+
continue
|
|
83
|
+
if i.alignment is None:
|
|
84
|
+
continue
|
|
85
|
+
start,end = i.alignment
|
|
86
|
+
if start is None or end is None:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
for (a,b),_,speaker in res.itertracks(yield_label=True):
|
|
90
|
+
speaker_id = int(speaker.split("_")[-1])
|
|
91
|
+
tier = tiers.get(speaker_id)
|
|
92
|
+
# we set the end time of the utterance as the
|
|
93
|
+
# *LAST* segment it ends before
|
|
94
|
+
# i.e. [seg_end, ....., ut_end]
|
|
95
|
+
# like that
|
|
96
|
+
if b <= end/1000 and tier:
|
|
97
|
+
i.tier = tier
|
|
98
|
+
|
|
99
|
+
# doc.tiers = list(tiers.values())
|
|
100
|
+
|
|
101
|
+
return doc
|
|
102
|
+
|
|
103
|
+
|
|
@@ -7,7 +7,7 @@ from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
|
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
|
9
9
|
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel,
|
|
10
|
-
GoogleTranslateEngine, OAIWhisperEngine)
|
|
10
|
+
GoogleTranslateEngine, OAIWhisperEngine, PyannoteEngine)
|
|
11
11
|
from batchalign import BatchalignPipeline
|
|
12
12
|
from batchalign.models import resolve
|
|
13
13
|
|
|
@@ -22,7 +22,7 @@ DEFAULT_PACKAGES = {
|
|
|
22
22
|
"asr": "whisper_oai",
|
|
23
23
|
"utr": "whisper_utr",
|
|
24
24
|
"fa": "whisper_fa",
|
|
25
|
-
"speaker": "
|
|
25
|
+
"speaker": "pyannote",
|
|
26
26
|
"morphosyntax": "stanza",
|
|
27
27
|
"disfluency": "replacement",
|
|
28
28
|
"retracing": "ngram",
|
|
@@ -72,6 +72,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
|
72
72
|
# if asr is in engines but disfluency or retracing is not
|
|
73
73
|
# add them
|
|
74
74
|
if "asr" in packages:
|
|
75
|
+
if "speaker" not in packages:
|
|
76
|
+
packages.append("speaker")
|
|
75
77
|
if "disfluency" not in packages:
|
|
76
78
|
packages.append("disfluency")
|
|
77
79
|
if "retracing" not in packages:
|
|
@@ -137,6 +139,9 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
|
137
139
|
engines.append(GoogleTranslateEngine())
|
|
138
140
|
elif engine == "whisper_oai":
|
|
139
141
|
engines.append(OAIWhisperEngine())
|
|
142
|
+
elif engine == "pyannote":
|
|
143
|
+
engines.append(PyannoteEngine())
|
|
144
|
+
|
|
140
145
|
|
|
141
146
|
L.debug(f"Done initalizing packages.")
|
|
142
147
|
return BatchalignPipeline(*engines)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: batchalign
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.21.post1
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -17,6 +17,8 @@ Requires-Dist: pydub
|
|
|
17
17
|
Requires-Dist: plotly>=5.3.0
|
|
18
18
|
Requires-Dist: transformers>=4.38.2
|
|
19
19
|
Requires-Dist: tokenizers>=0.14.1
|
|
20
|
+
Requires-Dist: numba>=0.61.0
|
|
21
|
+
Requires-Dist: numpy<=2.2
|
|
20
22
|
Requires-Dist: pycountry>=22.3
|
|
21
23
|
Requires-Dist: stanza[transformers]>=1.10.1
|
|
22
24
|
Requires-Dist: scipy~=1.11
|
|
@@ -35,7 +37,11 @@ Requires-Dist: tiktoken
|
|
|
35
37
|
Requires-Dist: blobfile
|
|
36
38
|
Requires-Dist: sentencepiece
|
|
37
39
|
Requires-Dist: googletrans
|
|
38
|
-
Requires-Dist: openai-whisper
|
|
40
|
+
Requires-Dist: openai-whisper
|
|
41
|
+
Requires-Dist: llvmlite>=0.44.0
|
|
42
|
+
Requires-Dist: praat-parselmouth==0.4.6
|
|
43
|
+
Requires-Dist: pyannote.audio
|
|
44
|
+
Requires-Dist: onnxruntime
|
|
39
45
|
Provides-Extra: dev
|
|
40
46
|
Requires-Dist: pytest; extra == "dev"
|
|
41
47
|
Provides-Extra: train
|
|
@@ -48,6 +54,7 @@ Dynamic: author-email
|
|
|
48
54
|
Dynamic: classifier
|
|
49
55
|
Dynamic: description
|
|
50
56
|
Dynamic: description-content-type
|
|
57
|
+
Dynamic: license-file
|
|
51
58
|
Dynamic: provides-extra
|
|
52
59
|
Dynamic: requires-dist
|
|
53
60
|
Dynamic: summary
|
|
@@ -64,6 +64,8 @@ batchalign/pipelines/asr/rev.py
|
|
|
64
64
|
batchalign/pipelines/asr/utils.py
|
|
65
65
|
batchalign/pipelines/asr/whisper.py
|
|
66
66
|
batchalign/pipelines/asr/whisperx.py
|
|
67
|
+
batchalign/pipelines/avqi/__init__.py
|
|
68
|
+
batchalign/pipelines/avqi/engine.py
|
|
67
69
|
batchalign/pipelines/cleanup/__init__.py
|
|
68
70
|
batchalign/pipelines/cleanup/cleanup.py
|
|
69
71
|
batchalign/pipelines/cleanup/disfluencies.py
|
|
@@ -72,6 +74,8 @@ batchalign/pipelines/cleanup/retrace.py
|
|
|
72
74
|
batchalign/pipelines/cleanup/support/filled_pauses.eng
|
|
73
75
|
batchalign/pipelines/cleanup/support/replacements.eng
|
|
74
76
|
batchalign/pipelines/cleanup/support/test.test
|
|
77
|
+
batchalign/pipelines/diarization/__init__.py
|
|
78
|
+
batchalign/pipelines/diarization/pyannote.py
|
|
75
79
|
batchalign/pipelines/fa/__init__.py
|
|
76
80
|
batchalign/pipelines/fa/wave2vec_fa.py
|
|
77
81
|
batchalign/pipelines/fa/whisper_fa.py
|
|
@@ -7,6 +7,8 @@ pydub
|
|
|
7
7
|
plotly>=5.3.0
|
|
8
8
|
transformers>=4.38.2
|
|
9
9
|
tokenizers>=0.14.1
|
|
10
|
+
numba>=0.61.0
|
|
11
|
+
numpy<=2.2
|
|
10
12
|
pycountry>=22.3
|
|
11
13
|
stanza[transformers]>=1.10.1
|
|
12
14
|
scipy~=1.11
|
|
@@ -25,7 +27,11 @@ tiktoken
|
|
|
25
27
|
blobfile
|
|
26
28
|
sentencepiece
|
|
27
29
|
googletrans
|
|
28
|
-
openai-whisper
|
|
30
|
+
openai-whisper
|
|
31
|
+
llvmlite>=0.44.0
|
|
32
|
+
praat-parselmouth==0.4.6
|
|
33
|
+
pyannote.audio
|
|
34
|
+
onnxruntime
|
|
29
35
|
|
|
30
36
|
[dev]
|
|
31
37
|
pytest
|
|
@@ -39,6 +39,8 @@ setup(
|
|
|
39
39
|
"plotly>=5.3.0",
|
|
40
40
|
"transformers>=4.38.2",
|
|
41
41
|
"tokenizers>=0.14.1",
|
|
42
|
+
"numba>=0.61.0",
|
|
43
|
+
"numpy<=2.2",
|
|
42
44
|
"pycountry>=22.3",
|
|
43
45
|
"stanza[transformers]>=1.10.1",
|
|
44
46
|
"scipy~=1.11",
|
|
@@ -57,7 +59,11 @@ setup(
|
|
|
57
59
|
"blobfile",
|
|
58
60
|
"sentencepiece",
|
|
59
61
|
"googletrans",
|
|
60
|
-
"openai-whisper
|
|
62
|
+
"openai-whisper",
|
|
63
|
+
"llvmlite>=0.44.0",
|
|
64
|
+
"praat-parselmouth==0.4.6", # Added parselmouth for AVQI
|
|
65
|
+
"pyannote.audio",
|
|
66
|
+
"onnxruntime"
|
|
61
67
|
],
|
|
62
68
|
extras_require={
|
|
63
69
|
'dev': [
|
|
@@ -113,5 +119,3 @@ setup(
|
|
|
113
119
|
"Topic :: Utilities"
|
|
114
120
|
]
|
|
115
121
|
)
|
|
116
|
-
|
|
117
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/cantonese_infer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/test.test
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|