batchalign 0.7.20.post18__tar.gz → 0.7.21.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {batchalign-0.7.20.post18/batchalign.egg-info → batchalign-0.7.21.post1}/PKG-INFO +10 -3
  2. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/cli.py +46 -0
  3. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/dispatch.py +2 -0
  4. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/__init__.py +3 -0
  5. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/rev.py +2 -2
  6. batchalign-0.7.21.post1/batchalign/pipelines/avqi/__init__.py +8 -0
  7. batchalign-0.7.21.post1/batchalign/pipelines/avqi/engine.py +264 -0
  8. batchalign-0.7.21.post1/batchalign/pipelines/diarization/__init__.py +1 -0
  9. batchalign-0.7.21.post1/batchalign/pipelines/diarization/pyannote.py +103 -0
  10. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/dispatch.py +7 -2
  11. batchalign-0.7.21.post1/batchalign/version +3 -0
  12. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1/batchalign.egg-info}/PKG-INFO +10 -3
  13. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/SOURCES.txt +4 -0
  14. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/requires.txt +7 -1
  15. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/setup.py +7 -3
  16. batchalign-0.7.20.post18/batchalign/version +0 -3
  17. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/LICENSE +0 -0
  18. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/MANIFEST.in +0 -0
  19. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/README.md +0 -0
  20. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/__init__.py +0 -0
  21. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/__main__.py +0 -0
  22. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/cli/__init__.py +0 -0
  23. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/constants.py +0 -0
  24. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/document.py +0 -0
  25. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/errors.py +0 -0
  26. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/__init__.py +0 -0
  27. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/base.py +0 -0
  28. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/__init__.py +0 -0
  29. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/file.py +0 -0
  30. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/generator.py +0 -0
  31. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/lexer.py +0 -0
  32. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/parser.py +0 -0
  33. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/chat/utils.py +0 -0
  34. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/__init__.py +0 -0
  35. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/file.py +0 -0
  36. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/generator.py +0 -0
  37. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/formats/textgrid/parser.py +0 -0
  38. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/__init__.py +0 -0
  39. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/resolve.py +0 -0
  40. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/__init__.py +0 -0
  41. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/config.yaml +0 -0
  42. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/infer.py +0 -0
  43. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/speaker/utils.py +0 -0
  44. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/__init__.py +0 -0
  45. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/run.py +0 -0
  46. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/training/utils.py +0 -0
  47. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utils.py +0 -0
  48. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/__init__.py +0 -0
  49. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
  50. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/dataset.py +0 -0
  51. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/execute.py +0 -0
  52. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/infer.py +0 -0
  53. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/prep.py +0 -0
  54. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/utterance/train.py +0 -0
  55. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/wave2vec/__init__.py +0 -0
  56. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
  57. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/__init__.py +0 -0
  58. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/infer_asr.py +0 -0
  59. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/models/whisper/infer_fa.py +0 -0
  60. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
  61. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/analysis/eval.py +0 -0
  62. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/__init__.py +0 -0
  63. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
  64. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  65. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/utils.py +0 -0
  66. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/whisper.py +0 -0
  67. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
  68. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/base.py +0 -0
  69. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
  70. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  71. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  72. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  73. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
  74. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  75. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  76. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
  77. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/__init__.py +0 -0
  78. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  79. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  80. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  81. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  82. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  83. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  84. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  85. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  86. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  87. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  88. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/pipeline.py +0 -0
  89. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
  90. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  91. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/__init__.py +0 -0
  92. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
  93. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/seamless.py +0 -0
  94. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/translate/utils.py +0 -0
  95. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/__init__.py +0 -0
  96. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
  97. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/utils.py +0 -0
  98. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  99. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
  100. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  101. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/__init__.py +0 -0
  102. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/conftest.py +0 -0
  103. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  104. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  105. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  106. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  107. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  108. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  109. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  110. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  111. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  112. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  113. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  114. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  115. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/fixures.py +0 -0
  116. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  117. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  118. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/tests/test_document.py +0 -0
  119. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/__init__.py +0 -0
  120. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/abbrev.py +0 -0
  121. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/compounds.py +0 -0
  122. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/config.py +0 -0
  123. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/dp.py +0 -0
  124. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/names.py +0 -0
  125. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign/utils/utils.py +0 -0
  126. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/dependency_links.txt +0 -0
  127. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/entry_points.txt +0 -0
  128. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/batchalign.egg-info/top_level.txt +0 -0
  129. {batchalign-0.7.20.post18 → batchalign-0.7.21.post1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.20.post18
3
+ Version: 0.7.21.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -17,6 +17,8 @@ Requires-Dist: pydub
17
17
  Requires-Dist: plotly>=5.3.0
18
18
  Requires-Dist: transformers>=4.38.2
19
19
  Requires-Dist: tokenizers>=0.14.1
20
+ Requires-Dist: numba>=0.61.0
21
+ Requires-Dist: numpy<=2.2
20
22
  Requires-Dist: pycountry>=22.3
21
23
  Requires-Dist: stanza[transformers]>=1.10.1
22
24
  Requires-Dist: scipy~=1.11
@@ -35,7 +37,11 @@ Requires-Dist: tiktoken
35
37
  Requires-Dist: blobfile
36
38
  Requires-Dist: sentencepiece
37
39
  Requires-Dist: googletrans
38
- Requires-Dist: openai-whisper>=20240930
40
+ Requires-Dist: openai-whisper
41
+ Requires-Dist: llvmlite>=0.44.0
42
+ Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: pyannote.audio
44
+ Requires-Dist: onnxruntime
39
45
  Provides-Extra: dev
40
46
  Requires-Dist: pytest; extra == "dev"
41
47
  Provides-Extra: train
@@ -48,6 +54,7 @@ Dynamic: author-email
48
54
  Dynamic: classifier
49
55
  Dynamic: description
50
56
  Dynamic: description-content-type
57
+ Dynamic: license-file
51
58
  Dynamic: provides-extra
52
59
  Dynamic: requires-dist
53
60
  Dynamic: summary
@@ -356,6 +356,51 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
356
356
  in_dir, out_dir,
357
357
  loader, writer, C,
358
358
  asr="whisper" if whisper else "rev", **kwargs)
359
+
360
+
361
+ #################### AVQI ################################
362
+
363
+ @batchalign.command()
364
+ @click.argument("cs_file", type=click.Path(exists=True, file_okay=True))
365
+ @click.argument("sv_file", type=click.Path(exists=True, file_okay=True))
366
+ @click.option("--lang",
367
+ help="sample language in three-letter ISO 3166-1 alpha-3 code",
368
+ show_default=True,
369
+ default="eng",
370
+ type=str)
371
+ @click.pass_context
372
+ def avqi(ctx, cs_file, sv_file, lang, **kwargs):
373
+ """Calculate Acoustic Voice Quality Index (AVQI) from continuous speech and sustained vowel audio files."""
374
+
375
+ # Import AVQI engine
376
+ from batchalign.pipelines.avqi import AVQIEngine
377
+
378
+ # Get output file path (same directory as cs_file, with .avqi.txt extension)
379
+ cs_path = Path(cs_file)
380
+ output_file = cs_path.with_suffix('.avqi.txt')
381
+
382
+ # Create AVQI engine
383
+ avqi_engine = AVQIEngine()
384
+
385
+ try:
386
+ # Calculate AVQI
387
+ C.print(f"\n[blue]Calculating AVQI[/blue] for:")
388
+ C.print(f" Continuous Speech: [cyan]{cs_file}[/cyan]")
389
+ C.print(f" Sustained Vowel: [cyan]{sv_file}[/cyan]")
390
+ C.print(f" Language: [cyan]{lang}[/cyan]")
391
+ C.print(f" Output: [cyan]{output_file}[/cyan]\n")
392
+
393
+ results = avqi_engine.analyze(cs_file, sv_file, str(output_file), lang)
394
+
395
+ C.print(f"[bold green]✓ AVQI calculation completed![/bold green]")
396
+ C.print(f"[bold]AVQI Score: {results['avqi']:.3f}[/bold]")
397
+ C.print(f"Results saved to: [cyan]{output_file}[/cyan]\n")
398
+
399
+ except Exception as e:
400
+ C.print(f"[bold red]ERROR[/bold red]: {str(e)}")
401
+ if ctx.obj["verbose"] > 0:
402
+ import traceback
403
+ C.print(traceback.format_exc())
359
404
 
360
405
 
361
406
  #################### SETUP ################################
@@ -379,3 +424,4 @@ def version(ctx, **kwargs):
379
424
  f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
380
425
  "\nDeveloped by Brian MacWhinney and Houjun Liu")
381
426
  C.print("\n\n"+ptr+"\n\n")
427
+
@@ -163,9 +163,11 @@ def _dispatch(command, lang, num_speakers,
163
163
  callback=lambda *args:progress_callback(file, *args),
164
164
  **kw)
165
165
  msgs = [escape(str(i.message)).strip() for i in w]
166
+ msgs = [i for i in msgs if "torchaudio" not in i.lower()]
166
167
  # write the format, as needed
167
168
  writer(doc, output)
168
169
  # print any warnings
170
+
169
171
  if len(msgs) > 0:
170
172
  if ctx.obj["verbose"] > 1:
171
173
  Console().print(f"\n[bold yellow]WARN[/bold yellow] on {file}:\n","\n".join(msgs)+"\n")
@@ -13,3 +13,6 @@ from .analysis import EvaluationEngine
13
13
  from .utterance import StanzaUtteranceEngine
14
14
 
15
15
  from .translate import SeamlessTranslationModel, GoogleTranslateEngine
16
+ from .avqi import AVQIEngine
17
+
18
+ from .diarization import PyannoteEngine
@@ -28,9 +28,9 @@ class RevEngine(BatchalignEngine):
28
28
  # if there is no utterance segmentation scheme, we only
29
29
  # run ASR
30
30
  if self.__engine:
31
- return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
31
+ return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
32
32
  else:
33
- return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
33
+ return [ Task.ASR ]
34
34
 
35
35
  def __init__(self, key:str=None, lang="eng", num_speakers=2):
36
36
 
@@ -0,0 +1,8 @@
1
+ """
2
+ AVQI Pipeline Module
3
+ Acoustic Voice Quality Index calculation
4
+ """
5
+
6
+ from .engine import AVQIEngine
7
+
8
+ __all__ = ['AVQIEngine']
@@ -0,0 +1,264 @@
1
+ """
2
+ AVQI Engine for Batchalign2
3
+ Acoustic Voice Quality Index calculation for voice quality assessment
4
+ """
5
+
6
+ import parselmouth
7
+ import numpy as np
8
+ from parselmouth.praat import call
9
+ import re
10
+ from typing import Tuple, Dict, Optional
11
+ import os
12
+ from pathlib import Path
13
+ import logging
14
+
15
+ from batchalign.pipelines.base import BatchalignEngine
16
+ from batchalign.document import Task
17
+
18
+
19
+ L = logging.getLogger('batchalign')
20
+
21
+
22
+ class AVQIEngine(BatchalignEngine):
23
+ """Engine for calculating Acoustic Voice Quality Index (AVQI)."""
24
+
25
+ def __init__(self):
26
+ super().__init__()
27
+ self._tasks = [Task.FEATURE_EXTRACT]
28
+
29
+ @property
30
+ def tasks(self):
31
+ return self._tasks
32
+
33
+ def extract_voiced_segments(self, sound):
34
+ """Extract voiced segments from audio."""
35
+ original = call(sound, "Copy", "original")
36
+ sampling_rate = call(original, "Get sampling frequency")
37
+ onlyVoice = call("Create Sound", "onlyVoice", 0, 0.001, sampling_rate, "0")
38
+ textgrid = call(
39
+ original,
40
+ "To TextGrid (silences)",
41
+ 50,
42
+ 0.003,
43
+ -25,
44
+ 0.1,
45
+ 0.1,
46
+ "silence",
47
+ "sounding",
48
+ )
49
+ intervals = call(
50
+ [original, textgrid],
51
+ "Extract intervals where",
52
+ 1,
53
+ False,
54
+ "does not contain",
55
+ "silence",
56
+ )
57
+ onlyLoud = call(intervals, "Concatenate")
58
+ globalPower = call(onlyLoud, "Get power in air")
59
+ voicelessThreshold = globalPower * 0.3
60
+ signalEnd = call(onlyLoud, "Get end time")
61
+ windowBorderLeft = call(onlyLoud, "Get start time")
62
+ windowWidth = 0.03
63
+ while windowBorderLeft + windowWidth <= signalEnd:
64
+ part = call(
65
+ onlyLoud,
66
+ "Extract part",
67
+ windowBorderLeft,
68
+ windowBorderLeft + windowWidth,
69
+ "Rectangular",
70
+ 1.0,
71
+ False,
72
+ )
73
+ partialPower = call(part, "Get power in air")
74
+ if partialPower > voicelessThreshold:
75
+ try:
76
+ start = 0.0025
77
+ startZero = call(part, "Get nearest zero crossing", start)
78
+ if startZero is not None and not np.isinf(startZero):
79
+ onlyVoice = call([onlyVoice, part], "Concatenate")
80
+ except:
81
+ pass
82
+ windowBorderLeft += 0.03
83
+ return onlyVoice
84
+
85
+ def calculate_avqi_features(self, cs_file, sv_file):
86
+ """Calculate AVQI score and features from continuous speech and sustained vowel files."""
87
+ cs_sound = parselmouth.Sound(cs_file)
88
+ sv_sound = parselmouth.Sound(sv_file)
89
+ cs_filtered = call(cs_sound, "Filter (stop Hann band)", 0, 34, 0.1)
90
+ sv_filtered = call(sv_sound, "Filter (stop Hann band)", 0, 34, 0.1)
91
+ voiced_cs = self.extract_voiced_segments(cs_filtered)
92
+ sv_duration = call(sv_filtered, "Get total duration")
93
+ if sv_duration > 3:
94
+ sv_start = sv_duration - 3
95
+ sv_part = call(
96
+ sv_filtered, "Extract part", sv_start, sv_duration, "rectangular", 1, False
97
+ )
98
+ else:
99
+ sv_part = call(sv_filtered, "Copy", "sv_part")
100
+ concatenated = call([voiced_cs, sv_part], "Concatenate")
101
+ powercepstrogram = call(concatenated, "To PowerCepstrogram", 60, 0.002, 5000, 50)
102
+ cpps = call(
103
+ powercepstrogram,
104
+ "Get CPPS",
105
+ False,
106
+ 0.01,
107
+ 0.001,
108
+ 60,
109
+ 330,
110
+ 0.05,
111
+ "Parabolic",
112
+ 0.001,
113
+ 0,
114
+ "Straight",
115
+ "Robust",
116
+ )
117
+ ltas = call(concatenated, "To Ltas", 1)
118
+ slope = call(ltas, "Get slope", 0, 1000, 1000, 10000, "energy")
119
+ ltas_copy = call(ltas, "Copy", "ltas_for_tilt")
120
+ try:
121
+ call(ltas_copy, "Compute trend line", 1, 10000)
122
+ tilt = call(ltas_copy, "Get slope", 0, 1000, 1000, 10000, "energy")
123
+ if abs(tilt - slope) < 0.01:
124
+ ltas_copy2 = call(ltas, "Copy", "ltas_for_tilt2")
125
+ call(ltas_copy2, "Compute trend line", 100, 8000)
126
+ tilt = call(ltas_copy2, "Get slope", 0, 1000, 1000, 10000, "energy")
127
+ if abs(tilt - slope) < 0.01:
128
+ tilt = slope + 5.5
129
+ except:
130
+ tilt = slope + 5.5
131
+ pointprocess = call(concatenated, "To PointProcess (periodic, cc)", 50, 400)
132
+ shim_percent = call(
133
+ [concatenated, pointprocess],
134
+ "Get shimmer (local)",
135
+ 0,
136
+ 0,
137
+ 0.0001,
138
+ 0.02,
139
+ 1.3,
140
+ 1.6,
141
+ )
142
+ shim = shim_percent * 100
143
+ shdb = call(
144
+ [concatenated, pointprocess],
145
+ "Get shimmer (local_dB)",
146
+ 0,
147
+ 0,
148
+ 0.0001,
149
+ 0.02,
150
+ 1.3,
151
+ 1.6,
152
+ )
153
+ pitch = call(
154
+ concatenated,
155
+ "To Pitch (cc)",
156
+ 0,
157
+ 75,
158
+ 15,
159
+ False,
160
+ 0.03,
161
+ 0.45,
162
+ 0.01,
163
+ 0.35,
164
+ 0.14,
165
+ 600,
166
+ )
167
+ pointprocess2 = call([concatenated, pitch], "To PointProcess (cc)")
168
+ voice_report = call(
169
+ [concatenated, pitch, pointprocess2],
170
+ "Voice report",
171
+ 0,
172
+ 0,
173
+ 75,
174
+ 600,
175
+ 1.3,
176
+ 1.6,
177
+ 0.03,
178
+ 0.45,
179
+ )
180
+ hnr_match = re.search(
181
+ r"Mean harmonics-to-noise ratio:\s*([-+]?\d*\.?\d+)", voice_report
182
+ )
183
+ hnr = float(hnr_match.group(1)) if hnr_match else 0.0
184
+ avqi = (
185
+ 4.152
186
+ - (0.177 * cpps)
187
+ - (0.006 * hnr)
188
+ - (0.037 * shim)
189
+ + (0.941 * shdb)
190
+ + (0.01 * slope)
191
+ + (0.093 * tilt)
192
+ ) * 2.8902
193
+ return avqi, {
194
+ "cpps": cpps,
195
+ "hnr": hnr,
196
+ "shimmer_local": shim,
197
+ "shimmer_local_db": shdb,
198
+ "slope": slope,
199
+ "tilt": tilt,
200
+ }
201
+
202
+ def analyze(self, cs_file: str, sv_file: str, output_file: str, lang: str = 'eng', **kwargs) -> Dict:
203
+ """
204
+ Analyze audio files and calculate AVQI.
205
+
206
+ Parameters
207
+ ----------
208
+ cs_file : str
209
+ Path to continuous speech audio file
210
+ sv_file : str
211
+ Path to sustained vowel audio file
212
+ output_file : str
213
+ Path to output file
214
+ lang : str
215
+ Language code (default: 'eng')
216
+
217
+ Returns
218
+ -------
219
+ Dict
220
+ Dictionary containing AVQI score and features
221
+ """
222
+ L.info(f"Calculating AVQI for CS: {cs_file}, SV: {sv_file}")
223
+
224
+ try:
225
+ # Calculate AVQI using the proper algorithm
226
+ avqi_score, features = self.calculate_avqi_features(cs_file, sv_file)
227
+
228
+ # Prepare results
229
+ results = {
230
+ 'avqi': avqi_score,
231
+ 'cpps': features['cpps'],
232
+ 'hnr': features['hnr'],
233
+ 'shimmer_local': features['shimmer_local'],
234
+ 'shimmer_local_db': features['shimmer_local_db'],
235
+ 'slope': features['slope'],
236
+ 'tilt': features['tilt']
237
+ }
238
+
239
+ # Write results to file
240
+ with open(output_file, 'w') as f:
241
+ f.write(f"AVQI: {avqi_score:.3f}\n")
242
+ f.write(f"CPPS: {features['cpps']:.3f}\n")
243
+ f.write(f"HNR: {features['hnr']:.3f}\n")
244
+ f.write(f"Shimmer Local: {features['shimmer_local']:.3f}\n")
245
+ f.write(f"Shimmer Local dB: {features['shimmer_local_db']:.3f}\n")
246
+ f.write(f"LTAS Slope: {features['slope']:.3f}\n")
247
+ f.write(f"LTAS Tilt: {features['tilt']:.3f}\n")
248
+ f.write(f"Language: {lang}\n")
249
+
250
+ L.info(f"AVQI results written to: {output_file}")
251
+ return results
252
+
253
+ except Exception as e:
254
+ L.error(f"Error calculating AVQI: {e}")
255
+ # Return default values on error
256
+ return {
257
+ 'avqi': 0.0,
258
+ 'cpps': 0.0,
259
+ 'hnr': 0.0,
260
+ 'shimmer_local': 0.0,
261
+ 'shimmer_local_db': 0.0,
262
+ 'slope': 0.0,
263
+ 'tilt': 0.0
264
+ }
@@ -0,0 +1 @@
1
+ from .pyannote import PyannoteEngine
@@ -0,0 +1,103 @@
1
+ # system utils
2
+ import glob, os, re
3
+ from itertools import groupby
4
+
5
+ # pathing tools
6
+ from pathlib import Path
7
+
8
+ # UD tools
9
+ import stanza
10
+
11
+ import copy
12
+
13
+ from stanza.utils.conll import CoNLL
14
+ from stanza import Document, DownloadMethod
15
+ from stanza.models.common.doc import Token
16
+ from stanza.pipeline.core import CONSTITUENCY
17
+ from stanza import DownloadMethod
18
+ from torch import heaviside
19
+
20
+ from stanza.pipeline.processor import ProcessorVariant, register_processor_variant
21
+ from stanza.resources.common import download_resources_json, load_resources_json, get_language_resources
22
+
23
+ # the loading bar
24
+ from tqdm import tqdm
25
+
26
+ from bdb import BdbQuit
27
+
28
+ from nltk import word_tokenize
29
+ from collections import defaultdict
30
+
31
+ import warnings
32
+
33
+ from stanza.utils.conll import CoNLL
34
+
35
+ # Oneliner of directory-based glob and replace
36
+ globase = lambda path, statement: glob.glob(os.path.join(path, statement))
37
+ repath_file = lambda file_path, new_dir: os.path.join(new_dir, pathlib.Path(file_path).name)
38
+
39
+
40
+ from batchalign.document import *
41
+ from batchalign.constants import *
42
+ from batchalign.pipelines.base import *
43
+ from batchalign.formats.chat.parser import chat_parse_utterance
44
+
45
+ from batchalign.utils.dp import *
46
+
47
+ from pyannote.audio import Pipeline
48
+
49
+ import logging
50
+ L = logging.getLogger("batchalign")
51
+
52
+ import pycountry
53
+
54
+ class PyannoteEngine(BatchalignEngine):
55
+ tasks = [ Task.SPEAKER_RECOGNITION ]
56
+ status_hook = None
57
+
58
+ def __init__(self, num_speakers=2):
59
+ self.pipe = Pipeline.from_pretrained("talkbank/dia-fork")
60
+ self.num_speakers = num_speakers
61
+
62
+ def process(self, doc):
63
+ assert doc.media != None and doc.media.url != None, f"We cannot diarize something that doesn't have a media path! Provided media tier='{doc.media}'"
64
+ res = self.pipe(doc.media.url, num_speakers=self.num_speakers)
65
+
66
+ speakers = list(set([int(i[-1].split("_")[-1])
67
+ for i in res.itertracks(yield_label=True)]))
68
+ corpus = doc.tiers[0].corpus
69
+ lang = doc.tiers[0].lang
70
+ tiers = {
71
+ i:
72
+ Tier(
73
+ lang=lang, corpus=corpus,
74
+ id="PAR"+str(i), name="Participant",
75
+ birthday="",
76
+ )
77
+ for i in speakers
78
+ }
79
+
80
+ for i in doc.content:
81
+ if not isinstance(i, Utterance):
82
+ continue
83
+ if i.alignment is None:
84
+ continue
85
+ start,end = i.alignment
86
+ if start is None or end is None:
87
+ continue
88
+
89
+ for (a,b),_,speaker in res.itertracks(yield_label=True):
90
+ speaker_id = int(speaker.split("_")[-1])
91
+ tier = tiers.get(speaker_id)
92
+ # we set the end time of the utterance as the
93
+ # *LAST* segment it ends before
94
+ # i.e. [seg_end, ....., ut_end]
95
+ # like that
96
+ if b <= end/1000 and tier:
97
+ i.tier = tier
98
+
99
+ # doc.tiers = list(tiers.values())
100
+
101
+ return doc
102
+
103
+
@@ -7,7 +7,7 @@ from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
9
  StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel,
10
- GoogleTranslateEngine, OAIWhisperEngine)
10
+ GoogleTranslateEngine, OAIWhisperEngine, PyannoteEngine)
11
11
  from batchalign import BatchalignPipeline
12
12
  from batchalign.models import resolve
13
13
 
@@ -22,7 +22,7 @@ DEFAULT_PACKAGES = {
22
22
  "asr": "whisper_oai",
23
23
  "utr": "whisper_utr",
24
24
  "fa": "whisper_fa",
25
- "speaker": "nemo_speaker",
25
+ "speaker": "pyannote",
26
26
  "morphosyntax": "stanza",
27
27
  "disfluency": "replacement",
28
28
  "retracing": "ngram",
@@ -72,6 +72,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
72
72
  # if asr is in engines but disfluency or retracing is not
73
73
  # add them
74
74
  if "asr" in packages:
75
+ if "speaker" not in packages:
76
+ packages.append("speaker")
75
77
  if "disfluency" not in packages:
76
78
  packages.append("disfluency")
77
79
  if "retracing" not in packages:
@@ -137,6 +139,9 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
137
139
  engines.append(GoogleTranslateEngine())
138
140
  elif engine == "whisper_oai":
139
141
  engines.append(OAIWhisperEngine())
142
+ elif engine == "pyannote":
143
+ engines.append(PyannoteEngine())
144
+
140
145
 
141
146
  L.debug(f"Done initalizing packages.")
142
147
  return BatchalignPipeline(*engines)
@@ -0,0 +1,3 @@
1
+ 0.7.21-post.1
2
+ August 28, 2025
3
+ Pyannote support
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.20.post18
3
+ Version: 0.7.21.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -17,6 +17,8 @@ Requires-Dist: pydub
17
17
  Requires-Dist: plotly>=5.3.0
18
18
  Requires-Dist: transformers>=4.38.2
19
19
  Requires-Dist: tokenizers>=0.14.1
20
+ Requires-Dist: numba>=0.61.0
21
+ Requires-Dist: numpy<=2.2
20
22
  Requires-Dist: pycountry>=22.3
21
23
  Requires-Dist: stanza[transformers]>=1.10.1
22
24
  Requires-Dist: scipy~=1.11
@@ -35,7 +37,11 @@ Requires-Dist: tiktoken
35
37
  Requires-Dist: blobfile
36
38
  Requires-Dist: sentencepiece
37
39
  Requires-Dist: googletrans
38
- Requires-Dist: openai-whisper>=20240930
40
+ Requires-Dist: openai-whisper
41
+ Requires-Dist: llvmlite>=0.44.0
42
+ Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: pyannote.audio
44
+ Requires-Dist: onnxruntime
39
45
  Provides-Extra: dev
40
46
  Requires-Dist: pytest; extra == "dev"
41
47
  Provides-Extra: train
@@ -48,6 +54,7 @@ Dynamic: author-email
48
54
  Dynamic: classifier
49
55
  Dynamic: description
50
56
  Dynamic: description-content-type
57
+ Dynamic: license-file
51
58
  Dynamic: provides-extra
52
59
  Dynamic: requires-dist
53
60
  Dynamic: summary
@@ -64,6 +64,8 @@ batchalign/pipelines/asr/rev.py
64
64
  batchalign/pipelines/asr/utils.py
65
65
  batchalign/pipelines/asr/whisper.py
66
66
  batchalign/pipelines/asr/whisperx.py
67
+ batchalign/pipelines/avqi/__init__.py
68
+ batchalign/pipelines/avqi/engine.py
67
69
  batchalign/pipelines/cleanup/__init__.py
68
70
  batchalign/pipelines/cleanup/cleanup.py
69
71
  batchalign/pipelines/cleanup/disfluencies.py
@@ -72,6 +74,8 @@ batchalign/pipelines/cleanup/retrace.py
72
74
  batchalign/pipelines/cleanup/support/filled_pauses.eng
73
75
  batchalign/pipelines/cleanup/support/replacements.eng
74
76
  batchalign/pipelines/cleanup/support/test.test
77
+ batchalign/pipelines/diarization/__init__.py
78
+ batchalign/pipelines/diarization/pyannote.py
75
79
  batchalign/pipelines/fa/__init__.py
76
80
  batchalign/pipelines/fa/wave2vec_fa.py
77
81
  batchalign/pipelines/fa/whisper_fa.py
@@ -7,6 +7,8 @@ pydub
7
7
  plotly>=5.3.0
8
8
  transformers>=4.38.2
9
9
  tokenizers>=0.14.1
10
+ numba>=0.61.0
11
+ numpy<=2.2
10
12
  pycountry>=22.3
11
13
  stanza[transformers]>=1.10.1
12
14
  scipy~=1.11
@@ -25,7 +27,11 @@ tiktoken
25
27
  blobfile
26
28
  sentencepiece
27
29
  googletrans
28
- openai-whisper>=20240930
30
+ openai-whisper
31
+ llvmlite>=0.44.0
32
+ praat-parselmouth==0.4.6
33
+ pyannote.audio
34
+ onnxruntime
29
35
 
30
36
  [dev]
31
37
  pytest
@@ -39,6 +39,8 @@ setup(
39
39
  "plotly>=5.3.0",
40
40
  "transformers>=4.38.2",
41
41
  "tokenizers>=0.14.1",
42
+ "numba>=0.61.0",
43
+ "numpy<=2.2",
42
44
  "pycountry>=22.3",
43
45
  "stanza[transformers]>=1.10.1",
44
46
  "scipy~=1.11",
@@ -57,7 +59,11 @@ setup(
57
59
  "blobfile",
58
60
  "sentencepiece",
59
61
  "googletrans",
60
- "openai-whisper>=20240930"
62
+ "openai-whisper",
63
+ "llvmlite>=0.44.0",
64
+ "praat-parselmouth==0.4.6", # Added parselmouth for AVQI
65
+ "pyannote.audio",
66
+ "onnxruntime"
61
67
  ],
62
68
  extras_require={
63
69
  'dev': [
@@ -113,5 +119,3 @@ setup(
113
119
  "Topic :: Utilities"
114
120
  ]
115
121
  )
116
-
117
-
@@ -1,3 +0,0 @@
1
- 0.7.20-post.18
2
- August 19, 2025
3
- Patch small bug