batchalign 0.7.21.post11__tar.gz → 0.7.22.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (131) hide show
  1. {batchalign-0.7.21.post11/batchalign.egg-info → batchalign-0.7.22.post1}/PKG-INFO +3 -2
  2. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/cli/cli.py +34 -1
  3. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/cli/dispatch.py +1 -0
  4. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/__init__.py +1 -0
  5. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/dispatch.py +13 -1
  6. batchalign-0.7.22.post1/batchalign/pipelines/opensmile/__init__.py +7 -0
  7. batchalign-0.7.22.post1/batchalign/pipelines/opensmile/engine.py +191 -0
  8. batchalign-0.7.22.post1/batchalign/version +3 -0
  9. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1/batchalign.egg-info}/PKG-INFO +3 -2
  10. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign.egg-info/SOURCES.txt +2 -0
  11. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign.egg-info/requires.txt +2 -1
  12. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/setup.py +3 -2
  13. batchalign-0.7.21.post11/batchalign/version +0 -3
  14. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/LICENSE +0 -0
  15. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/MANIFEST.in +0 -0
  16. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/README.md +0 -0
  17. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/__init__.py +0 -0
  18. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/__main__.py +0 -0
  19. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/cli/__init__.py +0 -0
  20. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/constants.py +0 -0
  21. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/document.py +0 -0
  22. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/errors.py +0 -0
  23. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/__init__.py +0 -0
  24. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/base.py +0 -0
  25. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/file.py +0 -0
  27. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/parser.py +0 -0
  30. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/chat/utils.py +0 -0
  31. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/textgrid/__init__.py +0 -0
  32. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/textgrid/file.py +0 -0
  33. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/textgrid/generator.py +0 -0
  34. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/formats/textgrid/parser.py +0 -0
  35. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/__init__.py +0 -0
  36. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/resolve.py +0 -0
  37. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/speaker/__init__.py +0 -0
  38. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/speaker/config.yaml +0 -0
  39. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/speaker/infer.py +0 -0
  40. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/speaker/utils.py +0 -0
  41. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/training/__init__.py +0 -0
  42. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/training/run.py +0 -0
  43. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/training/utils.py +0 -0
  44. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utils.py +0 -0
  45. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/__init__.py +0 -0
  46. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
  47. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/dataset.py +0 -0
  48. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/execute.py +0 -0
  49. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/infer.py +0 -0
  50. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/prep.py +0 -0
  51. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/utterance/train.py +0 -0
  52. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/wave2vec/__init__.py +0 -0
  53. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
  54. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/whisper/__init__.py +0 -0
  55. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/whisper/infer_asr.py +0 -0
  56. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/models/whisper/infer_fa.py +0 -0
  57. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
  58. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/analysis/eval.py +0 -0
  59. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  62. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/rev.py +0 -0
  63. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/utils.py +0 -0
  64. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/avqi/__init__.py +0 -0
  67. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/avqi/engine.py +0 -0
  68. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/base.py +0 -0
  69. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
  70. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  71. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  72. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  73. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
  74. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  75. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  76. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
  77. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/diarization/__init__.py +0 -0
  78. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/diarization/pyannote.py +0 -0
  79. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/fa/__init__.py +0 -0
  80. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  81. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  82. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  83. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  84. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  85. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  86. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  87. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  88. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  89. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  90. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/pipeline.py +0 -0
  91. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
  92. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  93. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/translate/__init__.py +0 -0
  94. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
  95. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/translate/seamless.py +0 -0
  96. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/translate/utils.py +0 -0
  97. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utr/__init__.py +0 -0
  98. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
  99. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utr/utils.py +0 -0
  100. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  101. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
  102. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  103. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/__init__.py +0 -0
  104. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/conftest.py +0 -0
  105. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  106. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  107. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  108. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  109. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  110. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  111. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  112. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  113. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  114. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  115. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  116. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  117. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/fixures.py +0 -0
  118. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  119. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  120. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/tests/test_document.py +0 -0
  121. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/__init__.py +0 -0
  122. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/abbrev.py +0 -0
  123. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/compounds.py +0 -0
  124. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/config.py +0 -0
  125. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/dp.py +0 -0
  126. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/names.py +0 -0
  127. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign/utils/utils.py +0 -0
  128. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign.egg-info/dependency_links.txt +0 -0
  129. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign.egg-info/entry_points.txt +0 -0
  130. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/batchalign.egg-info/top_level.txt +0 -0
  131. {batchalign-0.7.21.post11 → batchalign-0.7.22.post1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post11
3
+ Version: 0.7.22.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -12,7 +12,7 @@ Requires-Dist: pydantic>=2.4
12
12
  Requires-Dist: nltk>=3.8
13
13
  Requires-Dist: praatio<6.1.0,>=6.0.0
14
14
  Requires-Dist: torch>=2.6.0
15
- Requires-Dist: torchaudio
15
+ Requires-Dist: torchaudio<2.9
16
16
  Requires-Dist: pydub
17
17
  Requires-Dist: plotly>=5.3.0
18
18
  Requires-Dist: transformers>=4.38.2
@@ -40,6 +40,7 @@ Requires-Dist: googletrans
40
40
  Requires-Dist: openai-whisper
41
41
  Requires-Dist: llvmlite>=0.44.0
42
42
  Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: opensmile>=2.3.0
43
44
  Requires-Dist: pyannote.audio
44
45
  Requires-Dist: onnxruntime
45
46
  Provides-Extra: dev
@@ -402,7 +402,41 @@ def avqi(ctx, cs_file, sv_file, lang, **kwargs):
402
402
  import traceback
403
403
  C.print(traceback.format_exc())
404
404
 
405
+ #################### OPENSMILE ################################
405
406
 
407
+ @batchalign.command()
408
+ @click.argument("input_dir", type=click.Path(exists=True, file_okay=False))
409
+ @click.argument("output_dir", type=click.Path(exists=True, file_okay=False))
410
+ @click.option("--feature-set",
411
+ type=click.Choice(['eGeMAPSv02', 'eGeMAPSv01b', 'GeMAPSv01b', 'ComParE_2016']),
412
+ default='eGeMAPSv02',
413
+ help="Feature set to extract")
414
+ @click.option("--lang",
415
+ help="sample language in three-letter ISO 3166-1 alpha-3 code",
416
+ show_default=True, default="eng", type=str)
417
+ @click.pass_context
418
+ def opensmile(ctx, input_dir, output_dir, feature_set, lang, **kwargs):
419
+ """Extract openSMILE audio features from speech samples."""
420
+
421
+ def loader(file):
422
+ doc = Document.new(media_path=file, lang=lang)
423
+ return doc, {"feature_set": feature_set}
424
+
425
+ def writer(results, output):
426
+ if results.get('success', False):
427
+ output_csv = Path(output).with_suffix('.opensmile.csv')
428
+ features_df = results.get('features_df')
429
+ if features_df is not None:
430
+ features_df.to_csv(output_csv, header=['value'], index_label='feature')
431
+ else:
432
+ error_file = Path(output).with_suffix('.error.txt')
433
+ with open(error_file, 'w') as f:
434
+ f.write(f"OpenSMILE extraction failed: {results.get('error', 'Unknown error')}\n")
435
+
436
+ _dispatch("opensmile", lang, 1, ["mp3", "mp4", "wav"], ctx,
437
+ input_dir, output_dir,
438
+ loader, writer, C, **kwargs)
439
+
406
440
  #################### SETUP ################################
407
441
 
408
442
  @batchalign.command()
@@ -424,4 +458,3 @@ def version(ctx, **kwargs):
424
458
  f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
425
459
  "\nDeveloped by Brian MacWhinney and Houjun Liu")
426
460
  C.print("\n\n"+ptr+"\n\n")
427
-
@@ -49,6 +49,7 @@ Cmd2Task = {
49
49
  "utseg": "utterance",
50
50
  "coref": "coref",
51
51
  "translate": "translate",
52
+ "opensmile": "opensmile",
52
53
  }
53
54
 
54
55
  # this is the main runner used by all functions
@@ -16,3 +16,4 @@ from .translate import SeamlessTranslationModel, GoogleTranslateEngine
16
16
  from .avqi import AVQIEngine
17
17
 
18
18
  from .diarization import PyannoteEngine
19
+ from .opensmile import OpenSMILEEngine
@@ -30,6 +30,7 @@ DEFAULT_PACKAGES = {
30
30
  "utterance": "stanza_utt",
31
31
  "coref": "stanza_coref",
32
32
  "translate": "gtrans",
33
+ "opensmile": "opensmile_egemaps",
33
34
  }
34
35
 
35
36
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -139,8 +140,19 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
139
140
  engines.append(OAIWhisperEngine())
140
141
  elif engine == "pyannote":
141
142
  engines.append(PyannoteEngine())
143
+ elif engine == "opensmile_egemaps":
144
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
145
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv02'))
146
+ elif engine == "opensmile_gemaps":
147
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
148
+ engines.append(OpenSMILEEngine(feature_set='GeMAPSv01b'))
149
+ elif engine == "opensmile_compare":
150
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
151
+ engines.append(OpenSMILEEngine(feature_set='ComParE_2016'))
152
+ elif engine == "opensmile_eGeMAPSv01b":
153
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
154
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv01b'))
142
155
 
143
156
 
144
157
  L.debug(f"Done initalizing packages.")
145
158
  return BatchalignPipeline(*engines)
146
-
@@ -0,0 +1,7 @@
1
+ """
2
+ OpenSMILE Pipeline Module
3
+ """
4
+
5
+ from .engine import OpenSMILEEngine
6
+
7
+ __all__ = ['OpenSMILEEngine']
@@ -0,0 +1,191 @@
1
+ """
2
+ OpenSMILE Engine for Batchalign2 - M1 Mac Compatible Version
3
+ Audio feature extraction using the openSMILE toolkit
4
+ """
5
+
6
+ import opensmile
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ import logging
10
+ from typing import Dict, Optional
11
+ import platform
12
+
13
+ from batchalign.pipelines.base import BatchalignEngine
14
+ from batchalign.document import Task, TaskType, Document
15
+
16
+ L = logging.getLogger('batchalign')
17
+
18
+ class OpenSMILEEngine(BatchalignEngine):
19
+ """Engine for extracting openSMILE audio features."""
20
+
21
+ def __init__(self, feature_set: str = 'eGeMAPSv02',
22
+ feature_level: str = 'functionals'):
23
+ super().__init__()
24
+ self._tasks = [Task.FEATURE_EXTRACT]
25
+
26
+ self.feature_set = feature_set
27
+ self.feature_level = feature_level
28
+
29
+ self.is_m1_mac = (platform.system() == 'Darwin' and
30
+ platform.processor() == 'arm')
31
+
32
+ try:
33
+ if self.is_m1_mac:
34
+ L.info("M1 Mac detected - using default openSMILE configuration")
35
+ self.smile = opensmile.Smile()
36
+ self._requested_feature_set = feature_set
37
+ else:
38
+ self.smile = opensmile.Smile(
39
+ feature_set=feature_set,
40
+ feature_level=feature_level,
41
+ )
42
+ L.debug(f"OpenSMILE initialized (M1 compatibility mode: {self.is_m1_mac})")
43
+ except Exception as e:
44
+ L.error(f"Failed to initialize openSMILE: {e}")
45
+ raise
46
+
47
+ @property
48
+ def tasks(self):
49
+ return self._tasks
50
+
51
+ def analyze(self, doc: Document, feature_set: str = None, **kwargs) -> Dict:
52
+ """
53
+ Extract openSMILE features from Document.
54
+
55
+ Args:
56
+ doc: Document with media attached
57
+ feature_set: Feature set to use (ignored on M1 Mac)
58
+ **kwargs: Additional arguments
59
+
60
+ Returns:
61
+ Dictionary with extraction results and metadata
62
+ """
63
+
64
+ if not doc.media or not doc.media.url:
65
+ return {
66
+ 'error': 'Document has no media attached',
67
+ 'success': False
68
+ }
69
+
70
+ actual_audio_path = doc.media.url
71
+
72
+ if feature_set and feature_set != self.feature_set:
73
+ if self.is_m1_mac:
74
+ L.warning(f"Feature set switching not supported on M1 Mac - using default features instead of {feature_set}")
75
+ else:
76
+ L.info(f"Switching feature set from {self.feature_set} to {feature_set}")
77
+ try:
78
+ self.feature_set = feature_set
79
+ self.smile = opensmile.Smile(
80
+ feature_set=feature_set,
81
+ feature_level=self.feature_level,
82
+ )
83
+ except Exception as e:
84
+ L.error(f"Failed to switch to feature set {feature_set}: {e}")
85
+ return {
86
+ 'feature_set': self.feature_set,
87
+ 'num_features': 0,
88
+ 'error': f"Feature set switch failed: {str(e)}",
89
+ 'success': False
90
+ }
91
+
92
+ try:
93
+ L.info(f"Extracting features from: {Path(actual_audio_path).name}")
94
+ if self.is_m1_mac:
95
+ L.info("Using M1-compatible default feature set (eGeMAPSv02 equivalent)")
96
+ else:
97
+ L.info(f"Using {self.feature_set} feature set")
98
+
99
+ features_df = self.smile.process_file(actual_audio_path)
100
+
101
+ if features_df is None or features_df.empty:
102
+ raise ValueError("Feature extraction returned empty results")
103
+
104
+ results_df = features_df.T
105
+
106
+ num_features = len(features_df.columns)
107
+ duration_segments = len(features_df)
108
+
109
+ first_row_features = {}
110
+ if duration_segments > 0:
111
+ first_row_features = features_df.iloc[0].to_dict()
112
+
113
+ actual_feature_set = self.feature_set
114
+ if self.is_m1_mac:
115
+ actual_feature_set = "M1-default (eGeMAPSv02-like)"
116
+
117
+ results = {
118
+ 'feature_set': actual_feature_set,
119
+ 'feature_level': self.feature_level,
120
+ 'num_features': num_features,
121
+ 'duration_segments': duration_segments,
122
+ 'audio_file': str(actual_audio_path),
123
+ 'features_sample': first_row_features,
124
+ 'success': True,
125
+ 'm1_compatibility_mode': self.is_m1_mac,
126
+ 'features_df': results_df,
127
+ }
128
+
129
+ if self.is_m1_mac and hasattr(self, '_requested_feature_set'):
130
+ results['requested_feature_set'] = self._requested_feature_set
131
+ results['warning'] = f"M1 Mac compatibility: used default features instead of {self._requested_feature_set}"
132
+
133
+ L.info(f"Successfully extracted {num_features} features from {duration_segments} segments")
134
+ return results
135
+
136
+ except Exception as e:
137
+ L.error(f"Error extracting openSMILE features from {actual_audio_path}: {e}")
138
+ return {
139
+ 'feature_set': self.feature_set,
140
+ 'feature_level': self.feature_level,
141
+ 'num_features': 0,
142
+ 'duration_segments': 0,
143
+ 'audio_file': str(actual_audio_path),
144
+ 'error': str(e),
145
+ 'success': False,
146
+ 'm1_compatibility_mode': self.is_m1_mac
147
+ }
148
+
149
+ def get_available_feature_sets(self) -> list:
150
+ """Return list of available feature sets (limited on M1 Mac)."""
151
+ if self.is_m1_mac:
152
+ return ['M1-default (eGeMAPSv02-like)']
153
+ return [
154
+ 'eGeMAPSv02',
155
+ 'eGeMAPSv01b',
156
+ 'GeMAPSv01b',
157
+ 'ComParE_2016'
158
+ ]
159
+
160
+ def get_feature_set_info(self, feature_set: str) -> dict:
161
+ """Get information about a specific feature set."""
162
+ if self.is_m1_mac:
163
+ return {
164
+ 'description': 'M1 Mac compatible default feature set (similar to eGeMAPSv02)',
165
+ 'num_features': 'Variable',
166
+ 'recommended_for': 'General audio analysis on Apple Silicon'
167
+ }
168
+
169
+ info = {
170
+ 'eGeMAPSv02': {
171
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v02',
172
+ 'num_features': 88,
173
+ 'recommended_for': 'General emotion and paralinguistic analysis'
174
+ },
175
+ 'eGeMAPSv01b': {
176
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v01b',
177
+ 'num_features': 88,
178
+ 'recommended_for': 'Emotion recognition, clinical assessment'
179
+ },
180
+ 'GeMAPSv01b': {
181
+ 'description': 'Geneva Minimalistic Acoustic Parameter Set v01b',
182
+ 'num_features': 62,
183
+ 'recommended_for': 'Basic paralinguistic analysis'
184
+ },
185
+ 'ComParE_2016': {
186
+ 'description': 'Computational Paralinguistics Challenge 2016 feature set',
187
+ 'num_features': 6373,
188
+ 'recommended_for': 'Comprehensive analysis (large feature space)'
189
+ }
190
+ }
191
+ return info.get(feature_set, {'description': 'Unknown feature set', 'num_features': 'Unknown'})
@@ -0,0 +1,3 @@
1
+ 0.7.22-post.1
2
+ October 15th, 2025
3
+ Bump down torch audio
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post11
3
+ Version: 0.7.22.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -12,7 +12,7 @@ Requires-Dist: pydantic>=2.4
12
12
  Requires-Dist: nltk>=3.8
13
13
  Requires-Dist: praatio<6.1.0,>=6.0.0
14
14
  Requires-Dist: torch>=2.6.0
15
- Requires-Dist: torchaudio
15
+ Requires-Dist: torchaudio<2.9
16
16
  Requires-Dist: pydub
17
17
  Requires-Dist: plotly>=5.3.0
18
18
  Requires-Dist: transformers>=4.38.2
@@ -40,6 +40,7 @@ Requires-Dist: googletrans
40
40
  Requires-Dist: openai-whisper
41
41
  Requires-Dist: llvmlite>=0.44.0
42
42
  Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: opensmile>=2.3.0
43
44
  Requires-Dist: pyannote.audio
44
45
  Requires-Dist: onnxruntime
45
46
  Provides-Extra: dev
@@ -87,6 +87,8 @@ batchalign/pipelines/morphosyntax/fr/apm.py
87
87
  batchalign/pipelines/morphosyntax/fr/apmn.py
88
88
  batchalign/pipelines/morphosyntax/fr/case.py
89
89
  batchalign/pipelines/morphosyntax/ja/verbforms.py
90
+ batchalign/pipelines/opensmile/__init__.py
91
+ batchalign/pipelines/opensmile/engine.py
90
92
  batchalign/pipelines/speaker/__init__.py
91
93
  batchalign/pipelines/speaker/nemo_speaker.py
92
94
  batchalign/pipelines/translate/__init__.py
@@ -2,7 +2,7 @@ pydantic>=2.4
2
2
  nltk>=3.8
3
3
  praatio<6.1.0,>=6.0.0
4
4
  torch>=2.6.0
5
- torchaudio
5
+ torchaudio<2.9
6
6
  pydub
7
7
  plotly>=5.3.0
8
8
  transformers>=4.38.2
@@ -30,6 +30,7 @@ googletrans
30
30
  openai-whisper
31
31
  llvmlite>=0.44.0
32
32
  praat-parselmouth==0.4.6
33
+ opensmile>=2.3.0
33
34
  pyannote.audio
34
35
  onnxruntime
35
36
 
@@ -30,7 +30,7 @@ setup(
30
30
  "nltk>=3.8",
31
31
  "praatio>=6.0.0,<6.1.0",
32
32
  "torch>=2.6.0",
33
- "torchaudio",
33
+ "torchaudio<2.9",
34
34
  # "pyAudioAnalysis",
35
35
  # "hmmlearn==0.3.0",
36
36
  # "eyed3",
@@ -61,7 +61,8 @@ setup(
61
61
  "googletrans",
62
62
  "openai-whisper",
63
63
  "llvmlite>=0.44.0",
64
- "praat-parselmouth==0.4.6", # Added parselmouth for AVQI
64
+ "praat-parselmouth==0.4.6",
65
+ "opensmile>=2.3.0",
65
66
  "pyannote.audio",
66
67
  "onnxruntime"
67
68
  ],
@@ -1,3 +0,0 @@
1
- 0.7.21-post.11
2
- October 2st, 2025
3
- Fix arabic punctuation