BatchalignHK 0.8.0.post1__tar.gz → 0.8.0.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/PKG-INFO +1 -1
  3. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/cli/dispatch.py +186 -14
  4. batchalignhk-0.8.0.post2/batchalign/version +3 -0
  5. batchalignhk-0.8.0.post1/batchalign/version +0 -3
  6. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  7. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  8. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/entry_points.txt +0 -0
  9. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/requires.txt +0 -0
  10. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/top_level.txt +0 -0
  11. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/LICENSE +0 -0
  12. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/MANIFEST.in +0 -0
  13. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/README.md +0 -0
  14. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/__init__.py +0 -0
  15. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/__main__.py +0 -0
  16. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/cli/__init__.py +0 -0
  17. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/cli/cli.py +0 -0
  18. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/constants.py +0 -0
  19. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/document.py +0 -0
  20. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/errors.py +0 -0
  21. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/__init__.py +0 -0
  22. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/core.py +0 -0
  23. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/exception.py +0 -0
  24. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/logging.py +0 -0
  25. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/realtime_meeting.py +0 -0
  26. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_recognizer.py +0 -0
  27. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_synthesizer.py +0 -0
  28. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_transcriber.py +0 -0
  29. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/stream_input_tts.py +0 -0
  30. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/token.py +0 -0
  31. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/util.py +0 -0
  32. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/version.py +0 -0
  33. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/__init__.py +0 -0
  34. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_abnf.py +0 -0
  35. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_app.py +0 -0
  36. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
  37. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_core.py +0 -0
  38. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
  39. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_handshake.py +0 -0
  40. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_http.py +0 -0
  41. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_logging.py +0 -0
  42. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_socket.py +0 -0
  43. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
  44. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_url.py +0 -0
  45. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_utils.py +0 -0
  46. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
  47. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
  48. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
  49. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
  50. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
  51. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
  52. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
  53. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
  54. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/__init__.py +0 -0
  55. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/base.py +0 -0
  56. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/__init__.py +0 -0
  57. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/file.py +0 -0
  58. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/generator.py +0 -0
  59. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/lexer.py +0 -0
  60. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/parser.py +0 -0
  61. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/utils.py +0 -0
  62. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/__init__.py +0 -0
  63. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/file.py +0 -0
  64. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/generator.py +0 -0
  65. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/parser.py +0 -0
  66. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/__init__.py +0 -0
  67. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/resolve.py +0 -0
  68. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/__init__.py +0 -0
  69. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/config.yaml +0 -0
  70. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/infer.py +0 -0
  71. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/utils.py +0 -0
  72. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/training/__init__.py +0 -0
  73. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/training/run.py +0 -0
  74. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/training/utils.py +0 -0
  75. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utils.py +0 -0
  76. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/__init__.py +0 -0
  77. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/cantonese_infer.py +0 -0
  78. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/dataset.py +0 -0
  79. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/execute.py +0 -0
  80. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/infer.py +0 -0
  81. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/prep.py +0 -0
  82. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/train.py +0 -0
  83. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/wave2vec/__init__.py +0 -0
  84. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/wave2vec/infer_fa.py +0 -0
  85. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/__init__.py +0 -0
  86. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/infer_asr.py +0 -0
  87. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/infer_fa.py +0 -0
  88. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/__init__.py +0 -0
  89. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/analysis/__init__.py +0 -0
  90. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/analysis/eval.py +0 -0
  91. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/__init__.py +0 -0
  92. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/aliyun.py +0 -0
  93. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/funaudio.py +0 -0
  94. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2chinese.py +0 -0
  95. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
  96. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
  97. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
  98. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
  99. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
  100. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
  101. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
  102. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
  103. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
  104. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
  105. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/por.py +0 -0
  106. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
  107. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
  108. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  109. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/rev.py +0 -0
  110. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/tencent.py +0 -0
  111. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/utils.py +0 -0
  112. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/whisper.py +0 -0
  113. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/whisperx.py +0 -0
  114. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/avqi/__init__.py +0 -0
  115. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/avqi/engine.py +0 -0
  116. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/base.py +0 -0
  117. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/__init__.py +0 -0
  118. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  119. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  120. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  121. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/retrace.py +0 -0
  122. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  123. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  124. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/test.test +0 -0
  125. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/diarization/__init__.py +0 -0
  126. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/diarization/pyannote.py +0 -0
  127. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/dispatch.py +0 -0
  128. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/__init__.py +0 -0
  129. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/iic_fa.py +0 -0
  130. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  131. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/wave2vec_fa_canto.py +0 -0
  132. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  133. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  134. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  135. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  136. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  137. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  138. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  139. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  140. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  141. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/opensmile/__init__.py +0 -0
  142. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/opensmile/engine.py +0 -0
  143. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/pipeline.py +0 -0
  144. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/speaker/__init__.py +0 -0
  145. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  146. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/__init__.py +0 -0
  147. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/gtrans.py +0 -0
  148. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/seamless.py +0 -0
  149. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/utils.py +0 -0
  150. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/__init__.py +0 -0
  151. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
  152. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/rev_utr.py +0 -0
  153. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  154. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/utils.py +0 -0
  155. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  156. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utterance/__init__.py +0 -0
  157. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  158. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/__init__.py +0 -0
  159. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/conftest.py +0 -0
  160. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  161. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  162. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  163. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  164. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  165. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  166. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  167. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  168. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  169. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  170. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  171. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  172. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/fixures.py +0 -0
  173. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  174. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  175. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/tests/test_document.py +0 -0
  176. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/__init__.py +0 -0
  177. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/abbrev.py +0 -0
  178. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/compounds.py +0 -0
  179. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/config.py +0 -0
  180. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/dp.py +0 -0
  181. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/names.py +0 -0
  182. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/batchalign/utils/utils.py +0 -0
  183. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/setup.cfg +0 -0
  184. {batchalignhk-0.8.0.post1 → batchalignhk-0.8.0.post2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0.post1
3
+ Version: 0.8.0.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0.post1
3
+ Version: 0.8.0.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -33,6 +33,7 @@ import time
33
33
  import traceback
34
34
  import logging as L
35
35
  baL = L.getLogger('batchalign')
36
+ import psutil
36
37
 
37
38
  warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
38
39
 
@@ -56,6 +57,29 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
56
57
 
57
58
  file, output = file_info
58
59
  pid = os.getpid()
60
+ rss_start = None
61
+ rss_end = None
62
+ rss_peak = None
63
+
64
+ def _safe_rss():
65
+ try:
66
+ import psutil
67
+ return psutil.Process(pid).memory_info().rss
68
+ except Exception:
69
+ return None
70
+
71
+ def _safe_peak_rss():
72
+ try:
73
+ import resource
74
+ peak = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
75
+ if peak is None:
76
+ return None
77
+ # ru_maxrss is KB on Linux, bytes on macOS; normalize to bytes.
78
+ return int(peak * 1024) if peak < 1024 * 1024 * 1024 else int(peak)
79
+ except Exception:
80
+ return None
81
+
82
+ rss_start = _safe_rss()
59
83
 
60
84
  # Configure logging in this worker process
61
85
  if verbose >= 1:
@@ -71,9 +95,8 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
71
95
  else:
72
96
  baL.setLevel(logging.DEBUG)
73
97
 
74
- # Only capture output if not in verbose mode
75
- # In verbose mode, let logs stream naturally to the console
76
- should_capture = verbose == 0
98
+ # Always capture output to avoid interleaving with progress rendering.
99
+ should_capture = True
77
100
 
78
101
  if should_capture:
79
102
  # Use a temporary file to capture ALL output at the FD level
@@ -153,7 +176,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
153
176
  else:
154
177
  captured = ""
155
178
 
156
- return file, None, None, captured
179
+ rss_end = _safe_rss()
180
+ rss_peak = _safe_peak_rss()
181
+ mem_info = {
182
+ "pid": pid,
183
+ "rss_start": rss_start,
184
+ "rss_end": rss_end,
185
+ "rss_peak": rss_peak,
186
+ }
187
+ return file, None, None, captured, mem_info
157
188
  except Exception as e:
158
189
  # Flush and read captured output if we were capturing
159
190
  if should_capture:
@@ -163,7 +194,15 @@ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_inf
163
194
  captured = log_file.read()
164
195
  else:
165
196
  captured = ""
166
- return file, traceback.format_exc(), e, captured
197
+ rss_end = _safe_rss()
198
+ rss_peak = _safe_peak_rss()
199
+ mem_info = {
200
+ "pid": pid,
201
+ "rss_start": rss_start,
202
+ "rss_end": rss_end,
203
+ "rss_peak": rss_peak,
204
+ }
205
+ return file, traceback.format_exc(), e, captured, mem_info
167
206
  finally:
168
207
  # Restore original FDs only if we redirected them
169
208
  if should_capture:
@@ -281,6 +320,7 @@ def _dispatch(command, lang, num_speakers,
281
320
  file_pairs = list(zip(files, outputs))
282
321
  file_pairs.sort(key=lambda fo: os.path.getsize(fo[0]) if os.path.exists(fo[0]) else 0, reverse=True)
283
322
  files, outputs = zip(*file_pairs) if file_pairs else ([], [])
323
+ file_sizes = {f: os.path.getsize(f) if os.path.exists(f) else 0 for f in files}
284
324
 
285
325
  C.print(f"\nMode: [blue]{command}[/blue]; got [bold cyan]{len(files)}[/bold cyan] transcript{'s' if len(files) > 1 else ''} to process from {in_dir}:\n")
286
326
 
@@ -315,8 +355,66 @@ def _dispatch(command, lang, num_speakers,
315
355
  # create the spinner
316
356
  prog = Progress(SpinnerColumn(), *Progress.get_default_columns()[:-1],
317
357
  TimeElapsedColumn(),
318
- TextColumn("[cyan]{task.fields[processor]}[/cyan]"), console=C)
358
+ TextColumn("[magenta]{task.fields[mem]}[/magenta]"),
359
+ TextColumn("[cyan]{task.fields[processor]}[/cyan]"),
360
+ console=C, refresh_per_second=5)
319
361
  errors = []
362
+ mem_records = {}
363
+ mem_samples = []
364
+ last_low_mem_warn = 0.0
365
+
366
+ def _format_bytes(count, precision=2):
367
+ if count is None:
368
+ return "unknown"
369
+ units = ["B", "KB", "MB", "GB", "TB"]
370
+ idx = 0
371
+ size = float(count)
372
+ while size >= 1024 and idx < len(units) - 1:
373
+ size /= 1024
374
+ idx += 1
375
+ if idx == 0:
376
+ return f"{int(size)}{units[idx]}"
377
+ return f"{size:.{precision}f}{units[idx]}"
378
+
379
+ def _mem_label(base, available=None, low_mem=False):
380
+ parts = [base]
381
+ if available is not None:
382
+ parts.append(f"avail {_format_bytes(available, precision=1)}")
383
+ if low_mem:
384
+ parts.append("LOW MEM")
385
+ return " | ".join(parts)
386
+
387
+ def _system_memory():
388
+ try:
389
+ vm = psutil.virtual_memory()
390
+ return vm.total, vm.available
391
+ except Exception:
392
+ return None, None
393
+
394
+ def _memory_reserve(total):
395
+ if total is None:
396
+ return None
397
+ return max(int(total * 0.10), 2 * 1024 * 1024 * 1024)
398
+
399
+ def _estimate_worker_bytes(file_size):
400
+ if not mem_samples:
401
+ return 512 * 1024 * 1024
402
+ ratios = [mem / size for size, mem in mem_samples if size and mem]
403
+ if not ratios:
404
+ return 512 * 1024 * 1024
405
+ ratios.sort()
406
+ median_ratio = ratios[len(ratios) // 2]
407
+ est = int(median_ratio * file_size)
408
+ return max(512 * 1024 * 1024, min(est, 6 * 1024 * 1024 * 1024))
409
+
410
+ def _should_throttle(est_bytes):
411
+ total, available = _system_memory()
412
+ if total is None or available is None:
413
+ return False, total, available
414
+ reserve = _memory_reserve(total)
415
+ if reserve is None:
416
+ return False, total, available
417
+ return (available - est_bytes) < reserve, total, available
320
418
 
321
419
  try:
322
420
  with prog as prog:
@@ -324,8 +422,9 @@ def _dispatch(command, lang, num_speakers,
324
422
  task_totals = {}
325
423
 
326
424
  for f in files:
327
- tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...")
425
+ tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...", mem="queued")
328
426
  task_totals[f] = 1
427
+ prog.start_task(tasks[f])
329
428
 
330
429
  def drain_progress_queue():
331
430
  if not progress_queue:
@@ -341,10 +440,16 @@ def _dispatch(command, lang, num_speakers,
341
440
  continue
342
441
  task_total = max(int(total) if total else task_totals.get(file, 1), 1)
343
442
  task_totals[file] = task_total
443
+ total_mem, available_mem = _system_memory()
444
+ reserve = _memory_reserve(total_mem)
445
+ low_mem = False
446
+ if reserve is not None and available_mem is not None:
447
+ low_mem = available_mem < reserve
344
448
  prog.update(tasks[file],
345
449
  total=task_total,
346
450
  completed=min(int(completed), task_total),
347
- processor=render_stage(stage_tasks))
451
+ processor=render_stage(stage_tasks),
452
+ mem=_mem_label("running", available_mem, low_mem))
348
453
 
349
454
  with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
350
455
  worker_func = partial(_worker_task,
@@ -357,11 +462,54 @@ def _dispatch(command, lang, num_speakers,
357
462
  verbose=ctx.obj["verbose"],
358
463
  **kwargs)
359
464
 
360
- future_to_file = {executor.submit(worker_func, (f, o)): f for f, o in zip(files, outputs)}
465
+ file_iter = iter(zip(files, outputs))
466
+ future_to_file = {}
467
+
468
+ def submit_one(file_path, output_path):
469
+ future = executor.submit(worker_func, (file_path, output_path))
470
+ future_to_file[future] = file_path
471
+ est_bytes = _estimate_worker_bytes(file_sizes.get(file_path, 0))
472
+ total_mem, available_mem = _system_memory()
473
+ reserve = _memory_reserve(total_mem)
474
+ low_mem = False
475
+ if reserve is not None and available_mem is not None:
476
+ low_mem = available_mem < reserve
477
+ prog.update(
478
+ tasks[file_path],
479
+ processor="Processing...",
480
+ mem=_mem_label(f"est {_format_bytes(est_bytes)}", available_mem, low_mem),
481
+ )
361
482
 
362
- for f in files:
363
- prog.start_task(tasks[f])
364
- prog.update(tasks[f], processor="Processing...")
483
+ def schedule_available():
484
+ nonlocal last_low_mem_warn
485
+ while len(future_to_file) < num_workers:
486
+ try:
487
+ next_file, next_output = next(file_iter)
488
+ except StopIteration:
489
+ break
490
+ est_bytes = _estimate_worker_bytes(file_sizes.get(next_file, 0))
491
+ throttle, total, available = _should_throttle(est_bytes)
492
+ if throttle and future_to_file:
493
+ now = time.time()
494
+ if now - last_low_mem_warn > 10:
495
+ reserve = _memory_reserve(total)
496
+ prog.console.print(
497
+ f"[bold yellow]Low memory[/bold yellow]: "
498
+ f"{_format_bytes(available)} free, "
499
+ f"{_format_bytes(reserve)} reserve. "
500
+ f"Throttling new workers."
501
+ )
502
+ last_low_mem_warn = now
503
+ break
504
+ if throttle and not future_to_file:
505
+ prog.console.print(
506
+ f"[bold yellow]Low memory[/bold yellow]: "
507
+ f"{_format_bytes(available)} free. "
508
+ "Continuing with a single worker."
509
+ )
510
+ submit_one(next_file, next_output)
511
+
512
+ schedule_available()
365
513
 
366
514
  pending = set(future_to_file.keys())
367
515
  while pending:
@@ -374,8 +522,9 @@ def _dispatch(command, lang, num_speakers,
374
522
 
375
523
  for future in done:
376
524
  file = future_to_file[future]
525
+ future_to_file.pop(future, None)
377
526
  try:
378
- res_file, trcbk, e, captured = future.result()
527
+ res_file, trcbk, e, captured, mem_info = future.result()
379
528
  final_total = max(task_totals.get(file, 1), 1)
380
529
  if e:
381
530
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
@@ -383,12 +532,25 @@ def _dispatch(command, lang, num_speakers,
383
532
  else:
384
533
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold green]DONE[/bold green]")
385
534
  if ctx.obj["verbose"] >= 1 and captured.strip():
386
- errors.append((res_file, "Logs only (Success)", None, captured))
535
+ prog.console.print(f"[bold blue]INFO[/bold blue] on file [italic]{Path(file).name}[/italic]:\n{escape(captured.strip())}\n")
536
+ if mem_info:
537
+ mem_records[file] = mem_info
538
+ peak = mem_info.get("rss_peak") or mem_info.get("rss_end")
539
+ if peak:
540
+ mem_samples.append((file_sizes.get(file, 0), peak))
541
+ total_mem, available_mem = _system_memory()
542
+ reserve = _memory_reserve(total_mem)
543
+ low_mem = False
544
+ if reserve is not None and available_mem is not None:
545
+ low_mem = available_mem < reserve
546
+ prog.update(tasks[file], mem=_mem_label(_format_bytes(peak), available_mem, low_mem))
387
547
  except Exception as e:
388
548
  final_total = max(task_totals.get(file, 1), 1)
389
549
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
390
550
  errors.append((file, traceback.format_exc(), e, ""))
391
551
 
552
+ schedule_available()
553
+ pending = set(future_to_file.keys())
392
554
  drain_progress_queue()
393
555
  finally:
394
556
  if manager:
@@ -412,6 +574,16 @@ def _dispatch(command, lang, num_speakers,
412
574
  else:
413
575
  C.print(f"\nAll done. Results saved to {out_dir}!\n")
414
576
 
577
+ if mem_records and ctx.obj["verbose"] >= 1:
578
+ C.print("\nMemory usage per file (worker RSS peak):")
579
+ for file, info in mem_records.items():
580
+ rel_path = os.path.relpath(str(Path(file).absolute()), in_dir)
581
+ peak = info.get("rss_peak") or info.get("rss_end")
582
+ C.print(f"- {rel_path}: {_format_bytes(peak)}")
583
+ total, available = _system_memory()
584
+ if total is not None and available is not None:
585
+ C.print(f"\nSystem memory available: {_format_bytes(available)} / {_format_bytes(total)}")
586
+
415
587
  if ctx.obj["verbose"] > 1:
416
588
  C.end_capture()
417
589
 
@@ -0,0 +1,3 @@
1
+ 0.8.0-post.2
2
+ Jan 15th, 2025
3
+ Memory Safegaurds
@@ -1,3 +0,0 @@
1
- 0.8.0-post.1
2
- Jan 13th, 2025
3
- Speed