BatchalignHK 0.7.22.post10__tar.gz → 0.7.22.post12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/PKG-INFO +2 -1
  2. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/SOURCES.txt +2 -0
  3. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/requires.txt +1 -0
  4. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/PKG-INFO +2 -1
  5. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/cli/cli.py +34 -1
  6. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/cli/dispatch.py +1 -0
  7. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/__init__.py +1 -0
  8. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/utils.py +16 -15
  9. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/dispatch.py +13 -1
  10. batchalignhk-0.7.22.post12/batchalign/pipelines/opensmile/__init__.py +7 -0
  11. batchalignhk-0.7.22.post12/batchalign/pipelines/opensmile/engine.py +191 -0
  12. batchalignhk-0.7.22.post12/batchalign/version +3 -0
  13. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/setup.py +2 -1
  14. batchalignhk-0.7.22.post10/batchalign/version +0 -3
  15. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  16. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/entry_points.txt +0 -0
  17. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/BatchalignHK.egg-info/top_level.txt +0 -0
  18. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/LICENSE +0 -0
  19. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/MANIFEST.in +0 -0
  20. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/README.md +0 -0
  21. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/__init__.py +0 -0
  22. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/__main__.py +0 -0
  23. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/cli/__init__.py +0 -0
  24. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/constants.py +0 -0
  25. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/document.py +0 -0
  26. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/errors.py +0 -0
  27. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/__init__.py +0 -0
  28. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/core.py +0 -0
  29. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/exception.py +0 -0
  30. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/logging.py +0 -0
  31. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/realtime_meeting.py +0 -0
  32. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/speech_recognizer.py +0 -0
  33. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/speech_synthesizer.py +0 -0
  34. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/speech_transcriber.py +0 -0
  35. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/stream_input_tts.py +0 -0
  36. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/token.py +0 -0
  37. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/util.py +0 -0
  38. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/version.py +0 -0
  39. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/__init__.py +0 -0
  40. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_abnf.py +0 -0
  41. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_app.py +0 -0
  42. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
  43. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_core.py +0 -0
  44. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
  45. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_handshake.py +0 -0
  46. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_http.py +0 -0
  47. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_logging.py +0 -0
  48. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_socket.py +0 -0
  49. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
  50. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_url.py +0 -0
  51. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/_utils.py +0 -0
  52. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
  53. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
  54. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
  55. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
  56. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
  57. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
  58. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
  59. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
  60. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/__init__.py +0 -0
  61. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/base.py +0 -0
  62. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/__init__.py +0 -0
  63. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/file.py +0 -0
  64. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/generator.py +0 -0
  65. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/lexer.py +0 -0
  66. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/parser.py +0 -0
  67. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/chat/utils.py +0 -0
  68. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/textgrid/__init__.py +0 -0
  69. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/textgrid/file.py +0 -0
  70. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/textgrid/generator.py +0 -0
  71. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/formats/textgrid/parser.py +0 -0
  72. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/__init__.py +0 -0
  73. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/resolve.py +0 -0
  74. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/speaker/__init__.py +0 -0
  75. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/speaker/config.yaml +0 -0
  76. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/speaker/infer.py +0 -0
  77. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/speaker/utils.py +0 -0
  78. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/training/__init__.py +0 -0
  79. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/training/run.py +0 -0
  80. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/training/utils.py +0 -0
  81. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utils.py +0 -0
  82. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/__init__.py +0 -0
  83. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/cantonese_infer.py +0 -0
  84. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/dataset.py +0 -0
  85. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/execute.py +0 -0
  86. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/infer.py +0 -0
  87. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/prep.py +0 -0
  88. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/utterance/train.py +0 -0
  89. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/wave2vec/__init__.py +0 -0
  90. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/wave2vec/infer_fa.py +0 -0
  91. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/whisper/__init__.py +0 -0
  92. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/whisper/infer_asr.py +0 -0
  93. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/models/whisper/infer_fa.py +0 -0
  94. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/analysis/__init__.py +0 -0
  95. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/analysis/eval.py +0 -0
  96. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/__init__.py +0 -0
  97. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/aliyun.py +0 -0
  98. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/funaudio.py +0 -0
  99. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/num2chinese.py +0 -0
  100. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  101. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/rev.py +0 -0
  102. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/tencent.py +0 -0
  103. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/whisper.py +0 -0
  104. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/asr/whisperx.py +0 -0
  105. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/avqi/__init__.py +0 -0
  106. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/avqi/engine.py +0 -0
  107. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/base.py +0 -0
  108. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/__init__.py +0 -0
  109. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  110. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  111. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  112. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/retrace.py +0 -0
  113. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  114. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  115. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/cleanup/support/test.test +0 -0
  116. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/diarization/__init__.py +0 -0
  117. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/diarization/pyannote.py +0 -0
  118. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/fa/__init__.py +0 -0
  119. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  120. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  121. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  122. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  123. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  124. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  125. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  126. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  127. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  128. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  129. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/pipeline.py +0 -0
  130. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/speaker/__init__.py +0 -0
  131. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  132. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/translate/__init__.py +0 -0
  133. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/translate/gtrans.py +0 -0
  134. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/translate/seamless.py +0 -0
  135. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/translate/utils.py +0 -0
  136. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/__init__.py +0 -0
  137. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
  138. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/rev_utr.py +0 -0
  139. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  140. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/utils.py +0 -0
  141. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  142. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utterance/__init__.py +0 -0
  143. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  144. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/__init__.py +0 -0
  145. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/conftest.py +0 -0
  146. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  147. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  148. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  149. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  150. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  151. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  152. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  153. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  154. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  155. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  156. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  157. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  158. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/fixures.py +0 -0
  159. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  160. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  161. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/tests/test_document.py +0 -0
  162. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/__init__.py +0 -0
  163. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/abbrev.py +0 -0
  164. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/compounds.py +0 -0
  165. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/config.py +0 -0
  166. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/dp.py +0 -0
  167. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/names.py +0 -0
  168. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/batchalign/utils/utils.py +0 -0
  169. {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post12}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.22.post10
3
+ Version: 0.7.22.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,6 +48,7 @@ Requires-Dist: cos-python-sdk-v5
48
48
  Requires-Dist: openai-whisper
49
49
  Requires-Dist: llvmlite>=0.44.0
50
50
  Requires-Dist: praat-parselmouth==0.4.6
51
+ Requires-Dist: opensmile>=2.3.0
51
52
  Requires-Dist: pyannote.audio
52
53
  Requires-Dist: onnxruntime
53
54
  Provides-Extra: dev
@@ -123,6 +123,8 @@ batchalign/pipelines/morphosyntax/fr/apm.py
123
123
  batchalign/pipelines/morphosyntax/fr/apmn.py
124
124
  batchalign/pipelines/morphosyntax/fr/case.py
125
125
  batchalign/pipelines/morphosyntax/ja/verbforms.py
126
+ batchalign/pipelines/opensmile/__init__.py
127
+ batchalign/pipelines/opensmile/engine.py
126
128
  batchalign/pipelines/speaker/__init__.py
127
129
  batchalign/pipelines/speaker/nemo_speaker.py
128
130
  batchalign/pipelines/translate/__init__.py
@@ -38,6 +38,7 @@ cos-python-sdk-v5
38
38
  openai-whisper
39
39
  llvmlite>=0.44.0
40
40
  praat-parselmouth==0.4.6
41
+ opensmile>=2.3.0
41
42
  pyannote.audio
42
43
  onnxruntime
43
44
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.22.post10
3
+ Version: 0.7.22.post12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,6 +48,7 @@ Requires-Dist: cos-python-sdk-v5
48
48
  Requires-Dist: openai-whisper
49
49
  Requires-Dist: llvmlite>=0.44.0
50
50
  Requires-Dist: praat-parselmouth==0.4.6
51
+ Requires-Dist: opensmile>=2.3.0
51
52
  Requires-Dist: pyannote.audio
52
53
  Requires-Dist: onnxruntime
53
54
  Provides-Extra: dev
@@ -425,7 +425,41 @@ def avqi(ctx, cs_file, sv_file, lang, **kwargs):
425
425
  import traceback
426
426
  C.print(traceback.format_exc())
427
427
 
428
+ #################### OPENSMILE ################################
428
429
 
430
+ @batchalign.command()
431
+ @click.argument("input_dir", type=click.Path(exists=True, file_okay=False))
432
+ @click.argument("output_dir", type=click.Path(exists=True, file_okay=False))
433
+ @click.option("--feature-set",
434
+ type=click.Choice(['eGeMAPSv02', 'eGeMAPSv01b', 'GeMAPSv01b', 'ComParE_2016']),
435
+ default='eGeMAPSv02',
436
+ help="Feature set to extract")
437
+ @click.option("--lang",
438
+ help="sample language in three-letter ISO 3166-1 alpha-3 code",
439
+ show_default=True, default="eng", type=str)
440
+ @click.pass_context
441
+ def opensmile(ctx, input_dir, output_dir, feature_set, lang, **kwargs):
442
+ """Extract openSMILE audio features from speech samples."""
443
+
444
+ def loader(file):
445
+ doc = Document.new(media_path=file, lang=lang)
446
+ return doc, {"feature_set": feature_set}
447
+
448
+ def writer(results, output):
449
+ if results.get('success', False):
450
+ output_csv = Path(output).with_suffix('.opensmile.csv')
451
+ features_df = results.get('features_df')
452
+ if features_df is not None:
453
+ features_df.to_csv(output_csv, header=['value'], index_label='feature')
454
+ else:
455
+ error_file = Path(output).with_suffix('.error.txt')
456
+ with open(error_file, 'w') as f:
457
+ f.write(f"OpenSMILE extraction failed: {results.get('error', 'Unknown error')}\n")
458
+
459
+ _dispatch("opensmile", lang, 1, ["mp3", "mp4", "wav"], ctx,
460
+ input_dir, output_dir,
461
+ loader, writer, C, **kwargs)
462
+
429
463
  #################### SETUP ################################
430
464
 
431
465
  @batchalign.command()
@@ -447,4 +481,3 @@ def version(ctx, **kwargs):
447
481
  f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
448
482
  "\nDeveloped by Brian MacWhinney and Houjun Liu")
449
483
  C.print("\n\n"+ptr+"\n\n")
450
-
@@ -50,6 +50,7 @@ Cmd2Task = {
50
50
  "utseg": "utterance",
51
51
  "coref": "coref",
52
52
  "translate": "translate",
53
+ "opensmile": "opensmile",
53
54
  }
54
55
 
55
56
  # this is the main runner used by all functions
@@ -17,3 +17,4 @@ from .translate import SeamlessTranslationModel, GoogleTranslateEngine
17
17
  from .avqi import AVQIEngine
18
18
 
19
19
  from .diarization import PyannoteEngine
20
+ from .opensmile import OpenSMILEEngine
@@ -60,10 +60,11 @@ def retokenize(intermediate_output):
60
60
  word = word.replace("。", ".")
61
61
  word = word.replace("¿", " ").replace("¡", " ")
62
62
  tmp.append((word, bullet))
63
- if len(word) > 0 and (word in ENDING_PUNCT or word[-1] in ENDING_PUNCT):
64
- if word in ENDING_PUNCT:
63
+ if len(word) > 0 and (word in ENDING_PUNCT+["؟", "۔", "،", "؛"]
64
+ or word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]):
65
+ if word in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
65
66
  final_outputs.append((speaker, tmp))
66
- elif word[-1] in ENDING_PUNCT:
67
+ elif word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
67
68
  # we want to seperate the ending punct out
68
69
  final, time = tmp.pop(-1)
69
70
  tmp.append((final[:-1], time))
@@ -102,7 +103,7 @@ def retokenize_with_engine(intermediate_output, engine):
102
103
  # because we are using an utterance engine, we need
103
104
  # to get rid of all the preexisting punctuation
104
105
  for i in utterance:
105
- for j in MOR_PUNCT+ENDING_PUNCT:
106
+ for j in MOR_PUNCT+ENDING_PUNCT+["؟", "۔", "،", "؛"]:
106
107
  i[0] = i[0].strip(j).lower()
107
108
 
108
109
  # remove everything that's now blank
@@ -118,7 +119,7 @@ def retokenize_with_engine(intermediate_output, engine):
118
119
  # align the utterance against original splits and generate final outputs
119
120
  for i in split:
120
121
  # Check if the split has ending punctuation
121
- if i[-1] in ENDING_PUNCT:
122
+ if i[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
122
123
  new_ut, delim = (i[:-1].split(" "), i[-1])
123
124
  else:
124
125
  new_ut, delim = (i.split(" "), ".")
@@ -273,16 +274,8 @@ def process_generation(output, lang="eng", utterance_engine=None):
273
274
  seen_word = False
274
275
  if word.strip() == "":
275
276
  continue
276
- if word not in ENDING_PUNCT+MOR_PUNCT:
277
+ if word not in ENDING_PUNCT+MOR_PUNCT+["؟", "۔", "،", "؛"]:
277
278
  word_replaced = word
278
- if word_replaced.strip() == "؟":
279
- word_replaced = "?"
280
- elif word_replaced.strip() == "۔":
281
- word_replaced = "."
282
- elif word_replaced.strip() == "،":
283
- word_replaced = ","
284
- elif word_replaced.strip() == "؛":
285
- word_replaced = ";"
286
279
 
287
280
  if start == None or end == None:
288
281
  words.append(Form(text=word_replaced, time=None))
@@ -290,7 +283,15 @@ def process_generation(output, lang="eng", utterance_engine=None):
290
283
  seen_word = True
291
284
  words.append(Form(text=word_replaced, time=(int(start), int(end))))
292
285
  else:
293
- words.append(Form(text=word, time=None))
286
+ if word.strip() == "؟":
287
+ word = "?"
288
+ elif word.strip() == "۔":
289
+ word = "."
290
+ elif word.strip() == "،":
291
+ word = ","
292
+ elif word.strip() == "؛":
293
+ word = ";"
294
+ words.append(Form(text=word, time=None))
294
295
 
295
296
  final_utterances.append(Utterance(
296
297
  tier=participant,
@@ -33,6 +33,7 @@ DEFAULT_PACKAGES = {
33
33
  "utterance": "stanza_utt",
34
34
  "coref": "stanza_coref",
35
35
  "translate": "gtrans",
36
+ "opensmile": "opensmile_egemaps",
36
37
  }
37
38
 
38
39
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -152,8 +153,19 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
152
153
  engines.append(FunAudioUTREngine(lang=lang))
153
154
  elif engine == "pyannote":
154
155
  engines.append(PyannoteEngine())
156
+ elif engine == "opensmile_egemaps":
157
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
158
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv02'))
159
+ elif engine == "opensmile_gemaps":
160
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
161
+ engines.append(OpenSMILEEngine(feature_set='GeMAPSv01b'))
162
+ elif engine == "opensmile_compare":
163
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
164
+ engines.append(OpenSMILEEngine(feature_set='ComParE_2016'))
165
+ elif engine == "opensmile_eGeMAPSv01b":
166
+ from batchalign.pipelines.opensmile import OpenSMILEEngine
167
+ engines.append(OpenSMILEEngine(feature_set='eGeMAPSv01b'))
155
168
 
156
169
 
157
170
  L.debug(f"Done initalizing packages.")
158
171
  return BatchalignPipeline(*engines)
159
-
@@ -0,0 +1,7 @@
1
+ """
2
+ OpenSMILE Pipeline Module
3
+ """
4
+
5
+ from .engine import OpenSMILEEngine
6
+
7
+ __all__ = ['OpenSMILEEngine']
@@ -0,0 +1,191 @@
1
+ """
2
+ OpenSMILE Engine for Batchalign2 - M1 Mac Compatible Version
3
+ Audio feature extraction using the openSMILE toolkit
4
+ """
5
+
6
+ import opensmile
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ import logging
10
+ from typing import Dict, Optional
11
+ import platform
12
+
13
+ from batchalign.pipelines.base import BatchalignEngine
14
+ from batchalign.document import Task, TaskType, Document
15
+
16
+ L = logging.getLogger('batchalign')
17
+
18
+ class OpenSMILEEngine(BatchalignEngine):
19
+ """Engine for extracting openSMILE audio features."""
20
+
21
+ def __init__(self, feature_set: str = 'eGeMAPSv02',
22
+ feature_level: str = 'functionals'):
23
+ super().__init__()
24
+ self._tasks = [Task.FEATURE_EXTRACT]
25
+
26
+ self.feature_set = feature_set
27
+ self.feature_level = feature_level
28
+
29
+ self.is_m1_mac = (platform.system() == 'Darwin' and
30
+ platform.processor() == 'arm')
31
+
32
+ try:
33
+ if self.is_m1_mac:
34
+ L.info("M1 Mac detected - using default openSMILE configuration")
35
+ self.smile = opensmile.Smile()
36
+ self._requested_feature_set = feature_set
37
+ else:
38
+ self.smile = opensmile.Smile(
39
+ feature_set=feature_set,
40
+ feature_level=feature_level,
41
+ )
42
+ L.debug(f"OpenSMILE initialized (M1 compatibility mode: {self.is_m1_mac})")
43
+ except Exception as e:
44
+ L.error(f"Failed to initialize openSMILE: {e}")
45
+ raise
46
+
47
+ @property
48
+ def tasks(self):
49
+ return self._tasks
50
+
51
+ def analyze(self, doc: Document, feature_set: str = None, **kwargs) -> Dict:
52
+ """
53
+ Extract openSMILE features from Document.
54
+
55
+ Args:
56
+ doc: Document with media attached
57
+ feature_set: Feature set to use (ignored on M1 Mac)
58
+ **kwargs: Additional arguments
59
+
60
+ Returns:
61
+ Dictionary with extraction results and metadata
62
+ """
63
+
64
+ if not doc.media or not doc.media.url:
65
+ return {
66
+ 'error': 'Document has no media attached',
67
+ 'success': False
68
+ }
69
+
70
+ actual_audio_path = doc.media.url
71
+
72
+ if feature_set and feature_set != self.feature_set:
73
+ if self.is_m1_mac:
74
+ L.warning(f"Feature set switching not supported on M1 Mac - using default features instead of {feature_set}")
75
+ else:
76
+ L.info(f"Switching feature set from {self.feature_set} to {feature_set}")
77
+ try:
78
+ self.feature_set = feature_set
79
+ self.smile = opensmile.Smile(
80
+ feature_set=feature_set,
81
+ feature_level=self.feature_level,
82
+ )
83
+ except Exception as e:
84
+ L.error(f"Failed to switch to feature set {feature_set}: {e}")
85
+ return {
86
+ 'feature_set': self.feature_set,
87
+ 'num_features': 0,
88
+ 'error': f"Feature set switch failed: {str(e)}",
89
+ 'success': False
90
+ }
91
+
92
+ try:
93
+ L.info(f"Extracting features from: {Path(actual_audio_path).name}")
94
+ if self.is_m1_mac:
95
+ L.info("Using M1-compatible default feature set (eGeMAPSv02 equivalent)")
96
+ else:
97
+ L.info(f"Using {self.feature_set} feature set")
98
+
99
+ features_df = self.smile.process_file(actual_audio_path)
100
+
101
+ if features_df is None or features_df.empty:
102
+ raise ValueError("Feature extraction returned empty results")
103
+
104
+ results_df = features_df.T
105
+
106
+ num_features = len(features_df.columns)
107
+ duration_segments = len(features_df)
108
+
109
+ first_row_features = {}
110
+ if duration_segments > 0:
111
+ first_row_features = features_df.iloc[0].to_dict()
112
+
113
+ actual_feature_set = self.feature_set
114
+ if self.is_m1_mac:
115
+ actual_feature_set = "M1-default (eGeMAPSv02-like)"
116
+
117
+ results = {
118
+ 'feature_set': actual_feature_set,
119
+ 'feature_level': self.feature_level,
120
+ 'num_features': num_features,
121
+ 'duration_segments': duration_segments,
122
+ 'audio_file': str(actual_audio_path),
123
+ 'features_sample': first_row_features,
124
+ 'success': True,
125
+ 'm1_compatibility_mode': self.is_m1_mac,
126
+ 'features_df': results_df,
127
+ }
128
+
129
+ if self.is_m1_mac and hasattr(self, '_requested_feature_set'):
130
+ results['requested_feature_set'] = self._requested_feature_set
131
+ results['warning'] = f"M1 Mac compatibility: used default features instead of {self._requested_feature_set}"
132
+
133
+ L.info(f"Successfully extracted {num_features} features from {duration_segments} segments")
134
+ return results
135
+
136
+ except Exception as e:
137
+ L.error(f"Error extracting openSMILE features from {actual_audio_path}: {e}")
138
+ return {
139
+ 'feature_set': self.feature_set,
140
+ 'feature_level': self.feature_level,
141
+ 'num_features': 0,
142
+ 'duration_segments': 0,
143
+ 'audio_file': str(actual_audio_path),
144
+ 'error': str(e),
145
+ 'success': False,
146
+ 'm1_compatibility_mode': self.is_m1_mac
147
+ }
148
+
149
+ def get_available_feature_sets(self) -> list:
150
+ """Return list of available feature sets (limited on M1 Mac)."""
151
+ if self.is_m1_mac:
152
+ return ['M1-default (eGeMAPSv02-like)']
153
+ return [
154
+ 'eGeMAPSv02',
155
+ 'eGeMAPSv01b',
156
+ 'GeMAPSv01b',
157
+ 'ComParE_2016'
158
+ ]
159
+
160
+ def get_feature_set_info(self, feature_set: str) -> dict:
161
+ """Get information about a specific feature set."""
162
+ if self.is_m1_mac:
163
+ return {
164
+ 'description': 'M1 Mac compatible default feature set (similar to eGeMAPSv02)',
165
+ 'num_features': 'Variable',
166
+ 'recommended_for': 'General audio analysis on Apple Silicon'
167
+ }
168
+
169
+ info = {
170
+ 'eGeMAPSv02': {
171
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v02',
172
+ 'num_features': 88,
173
+ 'recommended_for': 'General emotion and paralinguistic analysis'
174
+ },
175
+ 'eGeMAPSv01b': {
176
+ 'description': 'Extended Geneva Minimalistic Acoustic Parameter Set v01b',
177
+ 'num_features': 88,
178
+ 'recommended_for': 'Emotion recognition, clinical assessment'
179
+ },
180
+ 'GeMAPSv01b': {
181
+ 'description': 'Geneva Minimalistic Acoustic Parameter Set v01b',
182
+ 'num_features': 62,
183
+ 'recommended_for': 'Basic paralinguistic analysis'
184
+ },
185
+ 'ComParE_2016': {
186
+ 'description': 'Computational Paralinguistics Challenge 2016 feature set',
187
+ 'num_features': 6373,
188
+ 'recommended_for': 'Comprehensive analysis (large feature space)'
189
+ }
190
+ }
191
+ return info.get(feature_set, {'description': 'Unknown feature set', 'num_features': 'Unknown'})
@@ -0,0 +1,3 @@
1
+ 0.7.22-post.12
2
+ October 10st, 2025
3
+ OpenSMILE Analysis
@@ -69,7 +69,8 @@ setup(
69
69
  "cos-python-sdk-v5",
70
70
  "openai-whisper",
71
71
  "llvmlite>=0.44.0",
72
- "praat-parselmouth==0.4.6", # Added parselmouth for AVQI
72
+ "praat-parselmouth==0.4.6",
73
+ "opensmile>=2.3.0",
73
74
  "pyannote.audio",
74
75
  "onnxruntime"
75
76
  ],
@@ -1,3 +0,0 @@
1
- 0.7.22-post.10
2
- October 1st, 2025
3
- Count initalisms only when its caps