batchalign 0.7.21.post10__tar.gz → 0.7.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (131) hide show
  1. {batchalign-0.7.21.post10/batchalign.egg-info → batchalign-0.7.22}/PKG-INFO +2 -1
  2. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/cli/cli.py +34 -1
  3. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/cli/dispatch.py +1 -0
  4. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/__init__.py +1 -0
  5. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/utils.py +16 -15
  6. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/dispatch.py +13 -1
  7. batchalign-0.7.22/batchalign/pipelines/opensmile/__init__.py +7 -0
  8. batchalign-0.7.22/batchalign/pipelines/opensmile/engine.py +191 -0
  9. batchalign-0.7.22/batchalign/version +3 -0
  10. {batchalign-0.7.21.post10 → batchalign-0.7.22/batchalign.egg-info}/PKG-INFO +2 -1
  11. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign.egg-info/SOURCES.txt +2 -0
  12. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign.egg-info/requires.txt +1 -0
  13. {batchalign-0.7.21.post10 → batchalign-0.7.22}/setup.py +2 -1
  14. batchalign-0.7.21.post10/batchalign/version +0 -3
  15. {batchalign-0.7.21.post10 → batchalign-0.7.22}/LICENSE +0 -0
  16. {batchalign-0.7.21.post10 → batchalign-0.7.22}/MANIFEST.in +0 -0
  17. {batchalign-0.7.21.post10 → batchalign-0.7.22}/README.md +0 -0
  18. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/__init__.py +0 -0
  19. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/__main__.py +0 -0
  20. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/cli/__init__.py +0 -0
  21. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/constants.py +0 -0
  22. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/document.py +0 -0
  23. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/errors.py +0 -0
  24. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/__init__.py +0 -0
  25. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/base.py +0 -0
  26. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/__init__.py +0 -0
  27. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/file.py +0 -0
  28. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/generator.py +0 -0
  29. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/parser.py +0 -0
  31. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/chat/utils.py +0 -0
  32. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/textgrid/__init__.py +0 -0
  33. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/textgrid/file.py +0 -0
  34. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/textgrid/generator.py +0 -0
  35. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/formats/textgrid/parser.py +0 -0
  36. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/__init__.py +0 -0
  37. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/resolve.py +0 -0
  38. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/training/__init__.py +0 -0
  43. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/training/run.py +0 -0
  44. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/training/utils.py +0 -0
  45. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utils.py +0 -0
  46. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/cantonese_infer.py +0 -0
  48. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/dataset.py +0 -0
  49. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/execute.py +0 -0
  50. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/infer.py +0 -0
  51. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/prep.py +0 -0
  52. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/utterance/train.py +0 -0
  53. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/wave2vec/__init__.py +0 -0
  54. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/wave2vec/infer_fa.py +0 -0
  55. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/whisper/__init__.py +0 -0
  56. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/whisper/infer_asr.py +0 -0
  57. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/models/whisper/infer_fa.py +0 -0
  58. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/analysis/__init__.py +0 -0
  59. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/analysis/eval.py +0 -0
  60. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/__init__.py +0 -0
  61. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/num2chinese.py +0 -0
  62. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  63. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/rev.py +0 -0
  64. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/avqi/__init__.py +0 -0
  67. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/avqi/engine.py +0 -0
  68. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/base.py +0 -0
  69. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/__init__.py +0 -0
  70. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  71. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  72. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  73. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/retrace.py +0 -0
  74. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  75. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  76. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/cleanup/support/test.test +0 -0
  77. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/diarization/__init__.py +0 -0
  78. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/diarization/pyannote.py +0 -0
  79. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/fa/__init__.py +0 -0
  80. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  81. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  82. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  83. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  84. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  85. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  86. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  87. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  88. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  89. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  90. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/pipeline.py +0 -0
  91. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/speaker/__init__.py +0 -0
  92. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  93. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/translate/__init__.py +0 -0
  94. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/translate/gtrans.py +0 -0
  95. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/translate/seamless.py +0 -0
  96. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/translate/utils.py +0 -0
  97. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utr/__init__.py +0 -0
  98. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utr/rev_utr.py +0 -0
  99. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utr/utils.py +0 -0
  100. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  101. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utterance/__init__.py +0 -0
  102. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  103. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/__init__.py +0 -0
  104. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/conftest.py +0 -0
  105. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  106. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  107. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  108. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  109. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  110. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  111. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  112. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  113. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  114. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  115. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  116. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  117. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/fixures.py +0 -0
  118. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  119. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  120. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/tests/test_document.py +0 -0
  121. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/__init__.py +0 -0
  122. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/abbrev.py +0 -0
  123. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/compounds.py +0 -0
  124. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/config.py +0 -0
  125. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/dp.py +0 -0
  126. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/names.py +0 -0
  127. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign/utils/utils.py +0 -0
  128. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign.egg-info/dependency_links.txt +0 -0
  129. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign.egg-info/entry_points.txt +0 -0
  130. {batchalign-0.7.21.post10 → batchalign-0.7.22}/batchalign.egg-info/top_level.txt +0 -0
  131. {batchalign-0.7.21.post10 → batchalign-0.7.22}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post10
3
+ Version: 0.7.22
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -40,6 +40,7 @@ Requires-Dist: googletrans
40
40
  Requires-Dist: openai-whisper
41
41
  Requires-Dist: llvmlite>=0.44.0
42
42
  Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: opensmile>=2.3.0
43
44
  Requires-Dist: pyannote.audio
44
45
  Requires-Dist: onnxruntime
45
46
  Provides-Extra: dev
@@ -402,7 +402,41 @@ def avqi(ctx, cs_file, sv_file, lang, **kwargs):
402
402
  import traceback
403
403
  C.print(traceback.format_exc())
404
404
 
405
+ #################### OPENSMILE ################################
405
406
 
407
+ @batchalign.command()
408
+ @click.argument("input_dir", type=click.Path(exists=True, file_okay=False))
409
+ @click.argument("output_dir", type=click.Path(exists=True, file_okay=False))
410
+ @click.option("--feature-set",
411
+ type=click.Choice(['eGeMAPSv02', 'eGeMAPSv01b', 'GeMAPSv01b', 'ComParE_2016']),
412
+ default='eGeMAPSv02',
413
+ help="Feature set to extract")
414
+ @click.option("--lang",
415
+ help="sample language in three-letter ISO 3166-1 alpha-3 code",
416
+ show_default=True, default="eng", type=str)
417
+ @click.pass_context
418
+ def opensmile(ctx, input_dir, output_dir, feature_set, lang, **kwargs):
419
+ """Extract openSMILE audio features from speech samples."""
420
+
421
+ def loader(file):
422
+ doc = Document.new(media_path=file, lang=lang)
423
+ return doc, {"feature_set": feature_set}
424
+
425
+ def writer(results, output):
426
+ if results.get('success', False):
427
+ output_csv = Path(output).with_suffix('.opensmile.csv')
428
+ features_df = results.get('features_df')
429
+ if features_df is not None:
430
+ features_df.to_csv(output_csv, header=['value'], index_label='feature')
431
+ else:
432
+ error_file = Path(output).with_suffix('.error.txt')
433
+ with open(error_file, 'w') as f:
434
+ f.write(f"OpenSMILE extraction failed: {results.get('error', 'Unknown error')}\n")
435
+
436
+ _dispatch("opensmile", lang, 1, ["mp3", "mp4", "wav"], ctx,
437
+ input_dir, output_dir,
438
+ loader, writer, C, **kwargs)
439
+
406
440
  #################### SETUP ################################
407
441
 
408
442
  @batchalign.command()
@@ -424,4 +458,3 @@ def version(ctx, **kwargs):
424
458
  f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
425
459
  "\nDeveloped by Brian MacWhinney and Houjun Liu")
426
460
  C.print("\n\n"+ptr+"\n\n")
427
-
@@ -49,6 +49,7 @@ Cmd2Task = {
49
49
  "utseg": "utterance",
50
50
  "coref": "coref",
51
51
  "translate": "translate",
52
+ "opensmile": "opensmile",
52
53
  }
53
54
 
54
55
  # this is the main runner used by all functions
@@ -16,3 +16,4 @@ from .translate import SeamlessTranslationModel, GoogleTranslateEngine
16
16
  from .avqi import AVQIEngine
17
17
 
18
18
  from .diarization import PyannoteEngine
19
+ from .opensmile import OpenSMILEEngine
@@ -60,10 +60,11 @@ def retokenize(intermediate_output):
60
60
  word = word.replace("。", ".")
61
61
  word = word.replace("¿", " ").replace("¡", " ")
62
62
  tmp.append((word, bullet))
63
- if len(word) > 0 and (word in ENDING_PUNCT or word[-1] in ENDING_PUNCT):
64
- if word in ENDING_PUNCT:
63
+ if len(word) > 0 and (word in ENDING_PUNCT+["؟", "۔", "،", "؛"]
64
+ or word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]):
65
+ if word in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
65
66
  final_outputs.append((speaker, tmp))
66
- elif word[-1] in ENDING_PUNCT:
67
+ elif word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
67
68
  # we want to seperate the ending punct out
68
69
  final, time = tmp.pop(-1)
69
70
  tmp.append((final[:-1], time))
@@ -102,7 +103,7 @@ def retokenize_with_engine(intermediate_output, engine):
102
103
  # because we are using an utterance engine, we need
103
104
  # to get rid of all the preexisting punctuation
104
105
  for i in utterance:
105
- for j in MOR_PUNCT+ENDING_PUNCT:
106
+ for j in MOR_PUNCT+ENDING_PUNCT+["؟", "۔", "،", "؛"]:
106
107
  i[0] = i[0].strip(j).lower()
107
108
 
108
109
  # remove everything that's now blank
@@ -118,7 +119,7 @@ def retokenize_with_engine(intermediate_output, engine):
118
119
  # align the utterance against original splits and generate final outputs
119
120
  for i in split:
120
121
  # Check if the split has ending punctuation
121
- if i[-1] in ENDING_PUNCT:
122
+ if i[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
122
123
  new_ut, delim = (i[:-1].split(" "), i[-1])
123
124
  else:
124
125
  new_ut, delim = (i.split(" "), ".")
@@ -264,16 +265,8 @@ def process_generation(output, lang="eng", utterance_engine=None):
264
265
  seen_word = False
265
266
  if word.strip() == "":
266
267
  continue
267
- if word not in ENDING_PUNCT+MOR_PUNCT:
268
+ if word not in ENDING_PUNCT+MOR_PUNCT+["؟", "۔", "،", "؛"]:
268
269
  word_replaced = word
269
- if word_replaced.strip() == "؟":
270
- word_replaced = "?"
271
- elif word_replaced.strip() == "۔":
272
- word_replaced = "."
273
- elif word_replaced.strip() == "،":
274
- word_replaced = ","
275
- elif word_replaced.strip() == "؛":
276
- word_replaced = ";"
277
270
 
278
271
  if start == None or end == None:
279
272
  words.append(Form(text=word_replaced, time=None))
@@ -281,7 +274,15 @@ def process_generation(output, lang="eng", utterance_engine=None):
281
274
  seen_word = True
282
275
  words.append(Form(text=word_replaced, time=(int(start), int(end))))
283
276
  else:
284
- words.append(Form(text=word, time=None))
277
+ if word.strip() == "؟":
278
+ word = "?"
279
+ elif word.strip() == "۔":
280
+ word = "."
281
+ elif word.strip() == "،":
282
+ word = ","
283
+ elif word.strip() == "؛":
284
+ word = ";"
285
+ words.append(Form(text=word, time=None))
285
286
 
286
287
  final_utterances.append(Utterance(
287
288
  tier=participant,
@@ -30,6 +30,7 @@ DEFAULT_PACKAGES = {
30
30
  "utterance": "stanza_utt",
31
31
  "coref": "stanza_coref",
32
32
  "translate": "gtrans",
33
+ "opensmile": "opensmile_egemaps",
33
34
  }
34
35
 
35
36
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -139,8 +140,19 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
139
140
  engines.append(OAIWhisperEngine())
140
141
  elif engine == "pyannote":
141
142
  engines.append(PyannoteEngine())
143
+ elif engine == "opensmile_egemaps":
144
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
145
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv02'))
146
+ elif engine == "opensmile_gemaps":
147
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
148
+ engines.append(OpenSMILEEngine(feature_set='GeMAPSv01b'))
149
+ elif engine == "opensmile_compare":
150
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
151
+ engines.append(OpenSMILEEngine(feature_set='ComParE_2016'))
152
+ elif engine == "opensmile_eGeMAPSv01b":
153
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
154
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv01b'))
142
155
 
143
156
 
144
157
  L.debug(f"Done initalizing packages.")
145
158
  return BatchalignPipeline(*engines)
146
-
@@ -0,0 +1,7 @@
1
+ """
2
+ OpenSMILE Pipeline Module
3
+ """
4
+
5
+ from .engine import OpenSMILEEngine
6
+
7
+ __all__ = ['OpenSMILEEngine']
@@ -0,0 +1,191 @@
1
+ """
2
+ OpenSMILE Engine for Batchalign2 - M1 Mac Compatible Version
3
+ Audio feature extraction using the openSMILE toolkit
4
+ """
5
+
6
+ import opensmile
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ import logging
10
+ from typing import Dict, Optional
11
+ import platform
12
+
13
+ from batchalign.pipelines.base import BatchalignEngine
14
+ from batchalign.document import Task, TaskType, Document
15
+
16
+ L = logging.getLogger('batchalign')
17
+
18
+ class OpenSMILEEngine(BatchalignEngine):
19
+ """Engine for extracting openSMILE audio features."""
20
+
21
+ def __init__(self, feature_set: str = 'eGeMAPSv02',
22
+ feature_level: str = 'functionals'):
23
+ super().__init__()
24
+ self._tasks = [Task.FEATURE_EXTRACT]
25
+
26
+ self.feature_set = feature_set
27
+ self.feature_level = feature_level
28
+
29
+ self.is_m1_mac = (platform.system() == 'Darwin' and
30
+ platform.processor() == 'arm')
31
+
32
+ try:
33
+ if self.is_m1_mac:
34
+ L.info("M1 Mac detected - using default openSMILE configuration")
35
+ self.smile = opensmile.Smile()
36
+ self._requested_feature_set = feature_set
37
+ else:
38
+ self.smile = opensmile.Smile(
39
+ feature_set=feature_set,
40
+ feature_level=feature_level,
41
+ )
42
+ L.debug(f"OpenSMILE initialized (M1 compatibility mode: {self.is_m1_mac})")
43
+ except Exception as e:
44
+ L.error(f"Failed to initialize openSMILE: {e}")
45
+ raise
46
+
47
+ @property
48
+ def tasks(self):
49
+ return self._tasks
50
+
51
+ def analyze(self, doc: Document, feature_set: str = None, **kwargs) -> Dict:
52
+ """
53
+ Extract openSMILE features from Document.
54
+
55
+ Args:
56
+ doc: Document with media attached
57
+ feature_set: Feature set to use (ignored on M1 Mac)
58
+ **kwargs: Additional arguments
59
+
60
+ Returns:
61
+ Dictionary with extraction results and metadata
62
+ """
63
+
64
+ if not doc.media or not doc.media.url:
65
+ return {
66
+ 'error': 'Document has no media attached',
67
+ 'success': False
68
+ }
69
+
70
+ actual_audio_path = doc.media.url
71
+
72
+ if feature_set and feature_set != self.feature_set:
73
+ if self.is_m1_mac:
74
+ L.warning(f"Feature set switching not supported on M1 Mac - using default features instead of {feature_set}")
75
+ else:
76
+ L.info(f"Switching feature set from {self.feature_set} to {feature_set}")
77
+ try:
78
+ self.feature_set = feature_set
79
+ self.smile = opensmile.Smile(
80
+ feature_set=feature_set,
81
+ feature_level=self.feature_level,
82
+ )
83
+ except Exception as e:
84
+ L.error(f"Failed to switch to feature set {feature_set}: {e}")
85
+ return {
86
+ 'feature_set': self.feature_set,
87
+ 'num_features': 0,
88
+ 'error': f"Feature set switch failed: {str(e)}",
89
+ 'success': False
90
+ }
91
+
92
+ try:
93
+ L.info(f"Extracting features from: {Path(actual_audio_path).name}")
94
+ if self.is_m1_mac:
95
+ L.info("Using M1-compatible default feature set (eGeMAPSv02 equivalent)")
96
+ else:
97
+ L.info(f"Using {self.feature_set} feature set")
98
+
99
+ features_df = self.smile.process_file(actual_audio_path)
100
+
101
+ if features_df is None or features_df.empty:
102
+ raise ValueError("Feature extraction returned empty results")
103
+
104
+ results_df = features_df.T
105
+
106
+ num_features = len(features_df.columns)
107
+ duration_segments = len(features_df)
108
+
109
+ first_row_features = {}
110
+ if duration_segments > 0:
111
+ first_row_features = features_df.iloc[0].to_dict()
112
+
113
+ actual_feature_set = self.feature_set
114
+ if self.is_m1_mac:
115
+ actual_feature_set = "M1-default (eGeMAPSv02-like)"
116
+
117
+ results = {
118
+ 'feature_set': actual_feature_set,
119
+ 'feature_level': self.feature_level,
120
+ 'num_features': num_features,
121
+ 'duration_segments': duration_segments,
122
+ 'audio_file': str(actual_audio_path),
123
+ 'features_sample': first_row_features,
124
+ 'success': True,
125
+ 'm1_compatibility_mode': self.is_m1_mac,
126
+ 'features_df': results_df,
127
+ }
128
+
129
+ if self.is_m1_mac and hasattr(self, '_requested_feature_set'):
130
+ results['requested_feature_set'] = self._requested_feature_set
131
+ results['warning'] = f"M1 Mac compatibility: used default features instead of {self._requested_feature_set}"
132
+
133
+ L.info(f"Successfully extracted {num_features} features from {duration_segments} segments")
134
+ return results
135
+
136
+ except Exception as e:
137
+ L.error(f"Error extracting openSMILE features from {actual_audio_path}: {e}")
138
+ return {
139
+ 'feature_set': self.feature_set,
140
+ 'feature_level': self.feature_level,
141
+ 'num_features': 0,
142
+ 'duration_segments': 0,
143
+ 'audio_file': str(actual_audio_path),
144
+ 'error': str(e),
145
+ 'success': False,
146
+ 'm1_compatibility_mode': self.is_m1_mac
147
+ }
148
+
149
+ def get_available_feature_sets(self) -> list:
150
+ """Return list of available feature sets (limited on M1 Mac)."""
151
+ if self.is_m1_mac:
152
+ return ['M1-default (eGeMAPSv02-like)']
153
+ return [
154
+ 'eGeMAPSv02',
155
+ 'eGeMAPSv01b',
156
+ 'GeMAPSv01b',
157
+ 'ComParE_2016'
158
+ ]
159
+
160
+ def get_feature_set_info(self, feature_set: str) -> dict:
161
+ """Get information about a specific feature set."""
162
+ if self.is_m1_mac:
163
+ return {
164
+ 'description': 'M1 Mac compatible default feature set (similar to eGeMAPSv02)',
165
+ 'num_features': 'Variable',
166
+ 'recommended_for': 'General audio analysis on Apple Silicon'
167
+ }
168
+
169
+ info = {
170
+ 'eGeMAPSv02': {
171
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v02',
172
+ 'num_features': 88,
173
+ 'recommended_for': 'General emotion and paralinguistic analysis'
174
+ },
175
+ 'eGeMAPSv01b': {
176
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v01b',
177
+ 'num_features': 88,
178
+ 'recommended_for': 'Emotion recognition, clinical assessment'
179
+ },
180
+ 'GeMAPSv01b': {
181
+ 'description': 'Geneva Minimalistic Acoustic Parameter Set v01b',
182
+ 'num_features': 62,
183
+ 'recommended_for': 'Basic paralinguistic analysis'
184
+ },
185
+ 'ComParE_2016': {
186
+ 'description': 'Computational Paralinguistics Challenge 2016 feature set',
187
+ 'num_features': 6373,
188
+ 'recommended_for': 'Comprehensive analysis (large feature space)'
189
+ }
190
+ }
191
+ return info.get(feature_set, {'description': 'Unknown feature set', 'num_features': 'Unknown'})
@@ -0,0 +1,3 @@
1
+ 0.7.22
2
+ October 10st, 2025
3
+ OpenSMILE Analysis
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post10
3
+ Version: 0.7.22
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -40,6 +40,7 @@ Requires-Dist: googletrans
40
40
  Requires-Dist: openai-whisper
41
41
  Requires-Dist: llvmlite>=0.44.0
42
42
  Requires-Dist: praat-parselmouth==0.4.6
43
+ Requires-Dist: opensmile>=2.3.0
43
44
  Requires-Dist: pyannote.audio
44
45
  Requires-Dist: onnxruntime
45
46
  Provides-Extra: dev
@@ -87,6 +87,8 @@ batchalign/pipelines/morphosyntax/fr/apm.py
87
87
  batchalign/pipelines/morphosyntax/fr/apmn.py
88
88
  batchalign/pipelines/morphosyntax/fr/case.py
89
89
  batchalign/pipelines/morphosyntax/ja/verbforms.py
90
+ batchalign/pipelines/opensmile/__init__.py
91
+ batchalign/pipelines/opensmile/engine.py
90
92
  batchalign/pipelines/speaker/__init__.py
91
93
  batchalign/pipelines/speaker/nemo_speaker.py
92
94
  batchalign/pipelines/translate/__init__.py
@@ -30,6 +30,7 @@ googletrans
30
30
  openai-whisper
31
31
  llvmlite>=0.44.0
32
32
  praat-parselmouth==0.4.6
33
+ opensmile>=2.3.0
33
34
  pyannote.audio
34
35
  onnxruntime
35
36
 
@@ -61,7 +61,8 @@ setup(
61
61
  "googletrans",
62
62
  "openai-whisper",
63
63
  "llvmlite>=0.44.0",
64
- "praat-parselmouth==0.4.6", # Added parselmouth for AVQI
64
+ "praat-parselmouth==0.4.6",
65
+ "opensmile>=2.3.0",
65
66
  "pyannote.audio",
66
67
  "onnxruntime"
67
68
  ],
@@ -1,3 +0,0 @@
1
- 0.7.21-post.10
2
- October 1st, 2025
3
- Count initalisms only when its caps
File without changes