BatchalignHK 0.8.0.post1__tar.gz → 0.8.0.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/PKG-INFO +1 -1
  3. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/cli/dispatch.py +267 -16
  4. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/utils.py +6 -2
  5. batchalignhk-0.8.0.post3/batchalign/version +3 -0
  6. batchalignhk-0.8.0.post1/batchalign/version +0 -3
  7. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  8. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  9. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/entry_points.txt +0 -0
  10. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/requires.txt +0 -0
  11. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/BatchalignHK.egg-info/top_level.txt +0 -0
  12. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/LICENSE +0 -0
  13. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/MANIFEST.in +0 -0
  14. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/README.md +0 -0
  15. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/__init__.py +0 -0
  16. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/__main__.py +0 -0
  17. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/cli/__init__.py +0 -0
  18. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/cli/cli.py +0 -0
  19. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/constants.py +0 -0
  20. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/document.py +0 -0
  21. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/errors.py +0 -0
  22. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/__init__.py +0 -0
  23. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/core.py +0 -0
  24. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/exception.py +0 -0
  25. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/logging.py +0 -0
  26. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/realtime_meeting.py +0 -0
  27. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/speech_recognizer.py +0 -0
  28. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/speech_synthesizer.py +0 -0
  29. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/speech_transcriber.py +0 -0
  30. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/stream_input_tts.py +0 -0
  31. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/token.py +0 -0
  32. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/util.py +0 -0
  33. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/version.py +0 -0
  34. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/__init__.py +0 -0
  35. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_abnf.py +0 -0
  36. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_app.py +0 -0
  37. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
  38. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_core.py +0 -0
  39. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
  40. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_handshake.py +0 -0
  41. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_http.py +0 -0
  42. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_logging.py +0 -0
  43. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_socket.py +0 -0
  44. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
  45. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_url.py +0 -0
  46. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/_utils.py +0 -0
  47. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
  48. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
  49. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
  50. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
  51. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
  52. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
  53. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
  54. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
  55. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/__init__.py +0 -0
  56. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/base.py +0 -0
  57. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/__init__.py +0 -0
  58. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/file.py +0 -0
  59. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/generator.py +0 -0
  60. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/lexer.py +0 -0
  61. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/parser.py +0 -0
  62. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/chat/utils.py +0 -0
  63. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/textgrid/__init__.py +0 -0
  64. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/textgrid/file.py +0 -0
  65. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/textgrid/generator.py +0 -0
  66. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/formats/textgrid/parser.py +0 -0
  67. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/__init__.py +0 -0
  68. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/resolve.py +0 -0
  69. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/speaker/__init__.py +0 -0
  70. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/speaker/config.yaml +0 -0
  71. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/speaker/infer.py +0 -0
  72. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/speaker/utils.py +0 -0
  73. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/training/__init__.py +0 -0
  74. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/training/run.py +0 -0
  75. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/training/utils.py +0 -0
  76. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utils.py +0 -0
  77. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/__init__.py +0 -0
  78. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/cantonese_infer.py +0 -0
  79. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/dataset.py +0 -0
  80. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/execute.py +0 -0
  81. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/infer.py +0 -0
  82. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/prep.py +0 -0
  83. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/utterance/train.py +0 -0
  84. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/wave2vec/__init__.py +0 -0
  85. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/wave2vec/infer_fa.py +0 -0
  86. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/whisper/__init__.py +0 -0
  87. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/whisper/infer_asr.py +0 -0
  88. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/models/whisper/infer_fa.py +0 -0
  89. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/__init__.py +0 -0
  90. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/analysis/__init__.py +0 -0
  91. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/analysis/eval.py +0 -0
  92. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/__init__.py +0 -0
  93. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/aliyun.py +0 -0
  94. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/funaudio.py +0 -0
  95. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2chinese.py +0 -0
  96. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
  97. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
  98. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
  99. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
  100. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
  101. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
  102. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
  103. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
  104. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
  105. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
  106. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/por.py +0 -0
  107. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
  108. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
  109. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  110. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/rev.py +0 -0
  111. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/tencent.py +0 -0
  112. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/whisper.py +0 -0
  113. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/asr/whisperx.py +0 -0
  114. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/avqi/__init__.py +0 -0
  115. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/avqi/engine.py +0 -0
  116. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/base.py +0 -0
  117. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/__init__.py +0 -0
  118. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  119. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  120. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  121. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/retrace.py +0 -0
  122. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  123. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  124. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/cleanup/support/test.test +0 -0
  125. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/diarization/__init__.py +0 -0
  126. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/diarization/pyannote.py +0 -0
  127. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/dispatch.py +0 -0
  128. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/fa/__init__.py +0 -0
  129. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/fa/iic_fa.py +0 -0
  130. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  131. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/fa/wave2vec_fa_canto.py +0 -0
  132. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  133. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  134. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  135. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  136. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  137. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  138. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  139. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  140. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  141. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/opensmile/__init__.py +0 -0
  142. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/opensmile/engine.py +0 -0
  143. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/pipeline.py +0 -0
  144. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/speaker/__init__.py +0 -0
  145. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  146. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/translate/__init__.py +0 -0
  147. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/translate/gtrans.py +0 -0
  148. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/translate/seamless.py +0 -0
  149. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/translate/utils.py +0 -0
  150. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/__init__.py +0 -0
  151. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
  152. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/rev_utr.py +0 -0
  153. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  154. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/utils.py +0 -0
  155. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  156. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utterance/__init__.py +0 -0
  157. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  158. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/__init__.py +0 -0
  159. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/conftest.py +0 -0
  160. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  161. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  162. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  163. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  164. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  165. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  166. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  167. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  168. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  169. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  170. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  171. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  172. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/fixures.py +0 -0
  173. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  174. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  175. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/tests/test_document.py +0 -0
  176. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/__init__.py +0 -0
  177. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/abbrev.py +0 -0
  178. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/compounds.py +0 -0
  179. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/config.py +0 -0
  180. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/dp.py +0 -0
  181. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/names.py +0 -0
  182. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/batchalign/utils/utils.py +0 -0
  183. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/setup.cfg +0 -0
  184. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0.post1
3
+ Version: 0.8.0.post3
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0.post1
3
+ Version: 0.8.0.post3
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -33,6 +33,7 @@ import time
33
33
  import traceback
34
34
  import logging as L
35
35
  baL = L.getLogger('batchalign')
36
+ import psutil
36
37
 
37
38
  warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
38
39
 
@@ -56,6 +57,29 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
56
57
 
57
58
  file, output = file_info
58
59
  pid = os.getpid()
60
+ rss_start = None
61
+ rss_end = None
62
+ rss_peak = None
63
+
64
+ def _safe_rss():
65
+ try:
66
+ import psutil
67
+ return psutil.Process(pid).memory_info().rss
68
+ except Exception:
69
+ return None
70
+
71
+ def _safe_peak_rss():
72
+ try:
73
+ import resource
74
+ peak = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
75
+ if peak is None:
76
+ return None
77
+ # ru_maxrss is KB on Linux, bytes on macOS; normalize to bytes.
78
+ return int(peak * 1024) if peak < 1024 * 1024 * 1024 else int(peak)
79
+ except Exception:
80
+ return None
81
+
82
+ rss_start = _safe_rss()
59
83
 
60
84
  # Configure logging in this worker process
61
85
  if verbose >= 1:
@@ -71,9 +95,8 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
71
95
  else:
72
96
  baL.setLevel(logging.DEBUG)
73
97
 
74
- # Only capture output if not in verbose mode
75
- # In verbose mode, let logs stream naturally to the console
76
- should_capture = verbose == 0
98
+ # Always capture output to avoid interleaving with progress rendering.
99
+ should_capture = True
77
100
 
78
101
  if should_capture:
79
102
  # Use a temporary file to capture ALL output at the FD level
@@ -135,8 +158,87 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
135
158
  doc = pipeline(doc, callback=progress_callback, **kw)
136
159
  CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
137
160
 
161
+ elif command in ["transcribe", "transcribe_s"]:
162
+ from batchalign.document import CustomLine, CustomLineType
163
+ # For transcribe, the "loader" just passes the file path
164
+ doc = file
165
+
166
+ # Process through pipeline
167
+ doc = pipeline(doc, callback=progress_callback)
168
+
169
+ # Write output with ASR comment
170
+ asr = kwargs.get("asr", "rev")
171
+ with open(Path(__file__).parent.parent / "version", 'r') as df:
172
+ VERSION_NUMBER = df.readline().strip()
173
+ doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
174
+ content=f"Batchalign {VERSION_NUMBER}, ASR Engine {asr}. Unchecked output of ASR model."))
175
+ CHATFile(doc=doc).write(output
176
+ .replace(".wav", ".cha")
177
+ .replace(".WAV", ".cha")
178
+ .replace(".mp4", ".cha")
179
+ .replace(".MP4", ".cha")
180
+ .replace(".mp3", ".cha")
181
+ .replace(".MP3", ".cha"),
182
+ write_wor=kwargs.get("wor", False))
183
+
184
+ elif command == "translate":
185
+ cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
186
+ doc = cf.doc
187
+ doc = pipeline(doc, callback=progress_callback)
188
+ CHATFile(doc=doc).write(output)
189
+
190
+ elif command == "utseg":
191
+ doc = CHATFile(path=os.path.abspath(file)).doc
192
+ doc = pipeline(doc, callback=progress_callback)
193
+ CHATFile(doc=doc).write(output)
194
+
195
+ elif command == "coref":
196
+ cf = CHATFile(path=os.path.abspath(file))
197
+ doc = cf.doc
198
+ doc = pipeline(doc, callback=progress_callback)
199
+ CHATFile(doc=doc).write(output)
200
+
201
+ elif command == "benchmark":
202
+ # Find gold transcript
203
+ from pathlib import Path as P
204
+ p = P(file)
205
+ cha = p.with_suffix(".cha")
206
+ if not cha.exists():
207
+ raise FileNotFoundError(f"No gold .cha transcript found for benchmarking. audio: {p.name}, desired cha: {cha.name}, looked in: {str(cha)}")
208
+
209
+ gold_doc = CHATFile(path=str(cha), special_mor_=True).doc
210
+ doc = pipeline(file, callback=progress_callback, gold=gold_doc)
211
+
212
+ # Write benchmark results
213
+ import os
214
+ os.remove(P(output).with_suffix(".cha"))
215
+ with open(P(output).with_suffix(".wer.txt"), 'w') as df:
216
+ df.write(str(doc["wer"]))
217
+ with open(P(output).with_suffix(".diff"), 'w') as df:
218
+ df.write(str(doc["diff"]))
219
+ CHATFile(doc=doc["doc"]).write(str(P(output).with_suffix(".asr.cha")),
220
+ write_wor=kwargs.get("wor", False))
221
+
222
+ elif command == "opensmile":
223
+ from batchalign.document import Document
224
+ doc = Document.new(media_path=file, lang=lang)
225
+ results = pipeline(doc, callback=progress_callback, feature_set=kwargs.get("feature_set", "eGeMAPSv02"))
226
+
227
+ # Write opensmile results
228
+ if results.get('success', False):
229
+ output_csv = Path(output).with_suffix('.opensmile.csv')
230
+ features_df = results.get('features_df')
231
+ if features_df is not None:
232
+ features_df.to_csv(output_csv, header=['value'], index_label='feature')
233
+ else:
234
+ error_file = Path(output).with_suffix('.error.txt')
235
+ with open(error_file, 'w') as f:
236
+ f.write(f"OpenSMILE extraction failed: {results.get('error', 'Unknown error')}\n")
237
+
138
238
  else:
139
239
  loader, writer = loader_info, writer_info
240
+ if loader is None or writer is None:
241
+ raise ValueError(f"Command '{command}' requires loader and writer functions, but they are None. This may indicate an unimplemented command or configuration issue.")
140
242
  doc = loader(os.path.abspath(file))
141
243
  kw = {}
142
244
  if isinstance(doc, tuple) and len(doc) > 1:
@@ -153,7 +255,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
153
255
  else:
154
256
  captured = ""
155
257
 
156
- return file, None, None, captured
258
+ rss_end = _safe_rss()
259
+ rss_peak = _safe_peak_rss()
260
+ mem_info = {
261
+ "pid": pid,
262
+ "rss_start": rss_start,
263
+ "rss_end": rss_end,
264
+ "rss_peak": rss_peak,
265
+ }
266
+ return file, None, None, captured, mem_info
157
267
  except Exception as e:
158
268
  # Flush and read captured output if we were capturing
159
269
  if should_capture:
@@ -163,7 +273,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
163
273
  captured = log_file.read()
164
274
  else:
165
275
  captured = ""
166
- return file, traceback.format_exc(), e, captured
276
+ rss_end = _safe_rss()
277
+ rss_peak = _safe_peak_rss()
278
+ mem_info = {
279
+ "pid": pid,
280
+ "rss_start": rss_start,
281
+ "rss_end": rss_end,
282
+ "rss_peak": rss_peak,
283
+ }
284
+ return file, traceback.format_exc(), e, captured, mem_info
167
285
  finally:
168
286
  # Restore original FDs only if we redirected them
169
287
  if should_capture:
@@ -281,6 +399,7 @@ def _dispatch(command, lang, num_speakers,
281
399
  file_pairs = list(zip(files, outputs))
282
400
  file_pairs.sort(key=lambda fo: os.path.getsize(fo[0]) if os.path.exists(fo[0]) else 0, reverse=True)
283
401
  files, outputs = zip(*file_pairs) if file_pairs else ([], [])
402
+ file_sizes = {f: os.path.getsize(f) if os.path.exists(f) else 0 for f in files}
284
403
 
285
404
  C.print(f"\nMode: [blue]{command}[/blue]; got [bold cyan]{len(files)}[/bold cyan] transcript{'s' if len(files) > 1 else ''} to process from {in_dir}:\n")
286
405
 
@@ -315,8 +434,66 @@ def _dispatch(command, lang, num_speakers,
315
434
  # create the spinner
316
435
  prog = Progress(SpinnerColumn(), *Progress.get_default_columns()[:-1],
317
436
  TimeElapsedColumn(),
318
- TextColumn("[cyan]{task.fields[processor]}[/cyan]"), console=C)
437
+ TextColumn("[magenta]{task.fields[mem]}[/magenta]"),
438
+ TextColumn("[cyan]{task.fields[processor]}[/cyan]"),
439
+ console=C, refresh_per_second=5)
319
440
  errors = []
441
+ mem_records = {}
442
+ mem_samples = []
443
+ last_low_mem_warn = 0.0
444
+
445
+ def _format_bytes(count, precision=2):
446
+ if count is None:
447
+ return "unknown"
448
+ units = ["B", "KB", "MB", "GB", "TB"]
449
+ idx = 0
450
+ size = float(count)
451
+ while size >= 1024 and idx < len(units) - 1:
452
+ size /= 1024
453
+ idx += 1
454
+ if idx == 0:
455
+ return f"{int(size)}{units[idx]}"
456
+ return f"{size:.{precision}f}{units[idx]}"
457
+
458
+ def _mem_label(base, available=None, low_mem=False):
459
+ parts = [base]
460
+ if available is not None:
461
+ parts.append(f"avail {_format_bytes(available, precision=1)}")
462
+ if low_mem:
463
+ parts.append("LOW MEM")
464
+ return " | ".join(parts)
465
+
466
+ def _system_memory():
467
+ try:
468
+ vm = psutil.virtual_memory()
469
+ return vm.total, vm.available
470
+ except Exception:
471
+ return None, None
472
+
473
+ def _memory_reserve(total):
474
+ if total is None:
475
+ return None
476
+ return max(int(total * 0.10), 2 * 1024 * 1024 * 1024)
477
+
478
+ def _estimate_worker_bytes(file_size):
479
+ if not mem_samples:
480
+ return 512 * 1024 * 1024
481
+ ratios = [mem / size for size, mem in mem_samples if size and mem]
482
+ if not ratios:
483
+ return 512 * 1024 * 1024
484
+ ratios.sort()
485
+ median_ratio = ratios[len(ratios) // 2]
486
+ est = int(median_ratio * file_size)
487
+ return max(512 * 1024 * 1024, min(est, 6 * 1024 * 1024 * 1024))
488
+
489
+ def _should_throttle(est_bytes):
490
+ total, available = _system_memory()
491
+ if total is None or available is None:
492
+ return False, total, available
493
+ reserve = _memory_reserve(total)
494
+ if reserve is None:
495
+ return False, total, available
496
+ return (available - est_bytes) < reserve, total, available
320
497
 
321
498
  try:
322
499
  with prog as prog:
@@ -324,8 +501,9 @@ def _dispatch(command, lang, num_speakers,
324
501
  task_totals = {}
325
502
 
326
503
  for f in files:
327
- tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...")
504
+ tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...", mem="queued")
328
505
  task_totals[f] = 1
506
+ prog.start_task(tasks[f])
329
507
 
330
508
  def drain_progress_queue():
331
509
  if not progress_queue:
@@ -341,27 +519,76 @@ def _dispatch(command, lang, num_speakers,
341
519
  continue
342
520
  task_total = max(int(total) if total else task_totals.get(file, 1), 1)
343
521
  task_totals[file] = task_total
522
+ total_mem, available_mem = _system_memory()
523
+ reserve = _memory_reserve(total_mem)
524
+ low_mem = False
525
+ if reserve is not None and available_mem is not None:
526
+ low_mem = available_mem < reserve
344
527
  prog.update(tasks[file],
345
528
  total=task_total,
346
529
  completed=min(int(completed), task_total),
347
- processor=render_stage(stage_tasks))
530
+ processor=render_stage(stage_tasks),
531
+ mem=_mem_label("running", available_mem, low_mem))
348
532
 
349
533
  with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
350
534
  worker_func = partial(_worker_task,
351
535
  command=command,
352
536
  lang=lang,
353
537
  num_speakers=num_speakers,
354
- loader_info=None,
355
- writer_info=None,
538
+ loader_info=loader,
539
+ writer_info=writer,
356
540
  progress_queue=progress_queue,
357
541
  verbose=ctx.obj["verbose"],
358
542
  **kwargs)
359
543
 
360
- future_to_file = {executor.submit(worker_func, (f, o)): f for f, o in zip(files, outputs)}
544
+ file_iter = iter(zip(files, outputs))
545
+ future_to_file = {}
546
+
547
+ def submit_one(file_path, output_path):
548
+ future = executor.submit(worker_func, (file_path, output_path))
549
+ future_to_file[future] = file_path
550
+ est_bytes = _estimate_worker_bytes(file_sizes.get(file_path, 0))
551
+ total_mem, available_mem = _system_memory()
552
+ reserve = _memory_reserve(total_mem)
553
+ low_mem = False
554
+ if reserve is not None and available_mem is not None:
555
+ low_mem = available_mem < reserve
556
+ prog.update(
557
+ tasks[file_path],
558
+ processor="Processing...",
559
+ mem=_mem_label(f"est {_format_bytes(est_bytes)}", available_mem, low_mem),
560
+ )
361
561
 
362
- for f in files:
363
- prog.start_task(tasks[f])
364
- prog.update(tasks[f], processor="Processing...")
562
+ def schedule_available():
563
+ nonlocal last_low_mem_warn
564
+ while len(future_to_file) < num_workers:
565
+ try:
566
+ next_file, next_output = next(file_iter)
567
+ except StopIteration:
568
+ break
569
+ est_bytes = _estimate_worker_bytes(file_sizes.get(next_file, 0))
570
+ throttle, total, available = _should_throttle(est_bytes)
571
+ if throttle and future_to_file:
572
+ now = time.time()
573
+ if now - last_low_mem_warn > 10:
574
+ reserve = _memory_reserve(total)
575
+ prog.console.print(
576
+ f"[bold yellow]Low memory[/bold yellow]: "
577
+ f"{_format_bytes(available)} free, "
578
+ f"{_format_bytes(reserve)} reserve. "
579
+ f"Throttling new workers."
580
+ )
581
+ last_low_mem_warn = now
582
+ break
583
+ if throttle and not future_to_file:
584
+ prog.console.print(
585
+ f"[bold yellow]Low memory[/bold yellow]: "
586
+ f"{_format_bytes(available)} free. "
587
+ "Continuing with a single worker."
588
+ )
589
+ submit_one(next_file, next_output)
590
+
591
+ schedule_available()
365
592
 
366
593
  pending = set(future_to_file.keys())
367
594
  while pending:
@@ -374,8 +601,9 @@ def _dispatch(command, lang, num_speakers,
374
601
 
375
602
  for future in done:
376
603
  file = future_to_file[future]
604
+ future_to_file.pop(future, None)
377
605
  try:
378
- res_file, trcbk, e, captured = future.result()
606
+ res_file, trcbk, e, captured, mem_info = future.result()
379
607
  final_total = max(task_totals.get(file, 1), 1)
380
608
  if e:
381
609
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
@@ -383,12 +611,25 @@ def _dispatch(command, lang, num_speakers,
383
611
  else:
384
612
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold green]DONE[/bold green]")
385
613
  if ctx.obj["verbose"] >= 1 and captured.strip():
386
- errors.append((res_file, "Logs only (Success)", None, captured))
614
+ prog.console.print(f"[bold blue]INFO[/bold blue] on file [italic]{Path(file).name}[/italic]:\n{escape(captured.strip())}\n")
615
+ if mem_info:
616
+ mem_records[file] = mem_info
617
+ peak = mem_info.get("rss_peak") or mem_info.get("rss_end")
618
+ if peak:
619
+ mem_samples.append((file_sizes.get(file, 0), peak))
620
+ total_mem, available_mem = _system_memory()
621
+ reserve = _memory_reserve(total_mem)
622
+ low_mem = False
623
+ if reserve is not None and available_mem is not None:
624
+ low_mem = available_mem < reserve
625
+ prog.update(tasks[file], mem=_mem_label(_format_bytes(peak), available_mem, low_mem))
387
626
  except Exception as e:
388
627
  final_total = max(task_totals.get(file, 1), 1)
389
628
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
390
629
  errors.append((file, traceback.format_exc(), e, ""))
391
630
 
631
+ schedule_available()
632
+ pending = set(future_to_file.keys())
392
633
  drain_progress_queue()
393
634
  finally:
394
635
  if manager:
@@ -412,6 +653,16 @@ def _dispatch(command, lang, num_speakers,
412
653
  else:
413
654
  C.print(f"\nAll done. Results saved to {out_dir}!\n")
414
655
 
656
+ if mem_records and ctx.obj["verbose"] >= 1:
657
+ C.print("\nMemory usage per file (worker RSS peak):")
658
+ for file, info in mem_records.items():
659
+ rel_path = os.path.relpath(str(Path(file).absolute()), in_dir)
660
+ peak = info.get("rss_peak") or info.get("rss_end")
661
+ C.print(f"- {rel_path}: {_format_bytes(peak)}")
662
+ total, available = _system_memory()
663
+ if total is not None and available is not None:
664
+ C.print(f"\nSystem memory available: {_format_bytes(available)} / {_format_bytes(total)}")
665
+
415
666
  if ctx.obj["verbose"] > 1:
416
667
  C.end_capture()
417
668
 
@@ -93,11 +93,15 @@ def retokenize_with_engine(intermediate_output, engine):
93
93
  ----------
94
94
  intermediate_output : List
95
95
  Rev.AI style output.
96
-
96
+
97
97
  engine : UtteranceEngine
98
98
  The utterance Engine to use.
99
99
  """
100
-
100
+
101
+ # Safety check: if engine is None or not callable, fall back to regular retokenize
102
+ if engine is None or not callable(engine):
103
+ return retokenize(intermediate_output)
104
+
101
105
  final_outputs = []
102
106
 
103
107
  for speaker, utterance in intermediate_output:
@@ -0,0 +1,3 @@
1
+ 0.8.0-post.3
2
+ Jan 16th, 2025
3
+ Patch regression?
@@ -1,3 +0,0 @@
1
- 0.8.0-post.1
2
- Jan 13th, 2025
3
- Speed