BatchalignHK 0.8.0__tar.gz → 0.8.0.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/PKG-INFO +1 -1
  3. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/cli/dispatch.py +286 -85
  4. batchalignhk-0.8.0.post2/batchalign/version +3 -0
  5. batchalignhk-0.8.0/batchalign/version +0 -3
  6. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  7. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  8. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/entry_points.txt +0 -0
  9. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/requires.txt +0 -0
  10. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/BatchalignHK.egg-info/top_level.txt +0 -0
  11. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/LICENSE +0 -0
  12. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/MANIFEST.in +0 -0
  13. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/README.md +0 -0
  14. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/__init__.py +0 -0
  15. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/__main__.py +0 -0
  16. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/cli/__init__.py +0 -0
  17. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/cli/cli.py +0 -0
  18. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/constants.py +0 -0
  19. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/document.py +0 -0
  20. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/errors.py +0 -0
  21. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/__init__.py +0 -0
  22. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/core.py +0 -0
  23. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/exception.py +0 -0
  24. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/logging.py +0 -0
  25. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/realtime_meeting.py +0 -0
  26. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_recognizer.py +0 -0
  27. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_synthesizer.py +0 -0
  28. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/speech_transcriber.py +0 -0
  29. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/stream_input_tts.py +0 -0
  30. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/token.py +0 -0
  31. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/util.py +0 -0
  32. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/version.py +0 -0
  33. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/__init__.py +0 -0
  34. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_abnf.py +0 -0
  35. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_app.py +0 -0
  36. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
  37. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_core.py +0 -0
  38. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
  39. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_handshake.py +0 -0
  40. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_http.py +0 -0
  41. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_logging.py +0 -0
  42. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_socket.py +0 -0
  43. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
  44. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_url.py +0 -0
  45. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/_utils.py +0 -0
  46. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
  47. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
  48. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
  49. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
  50. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
  51. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
  52. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
  53. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
  54. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/__init__.py +0 -0
  55. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/base.py +0 -0
  56. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/__init__.py +0 -0
  57. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/file.py +0 -0
  58. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/generator.py +0 -0
  59. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/lexer.py +0 -0
  60. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/parser.py +0 -0
  61. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/chat/utils.py +0 -0
  62. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/__init__.py +0 -0
  63. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/file.py +0 -0
  64. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/generator.py +0 -0
  65. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/formats/textgrid/parser.py +0 -0
  66. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/__init__.py +0 -0
  67. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/resolve.py +0 -0
  68. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/__init__.py +0 -0
  69. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/config.yaml +0 -0
  70. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/infer.py +0 -0
  71. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/speaker/utils.py +0 -0
  72. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/training/__init__.py +0 -0
  73. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/training/run.py +0 -0
  74. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/training/utils.py +0 -0
  75. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utils.py +0 -0
  76. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/__init__.py +0 -0
  77. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/cantonese_infer.py +0 -0
  78. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/dataset.py +0 -0
  79. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/execute.py +0 -0
  80. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/infer.py +0 -0
  81. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/prep.py +0 -0
  82. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/utterance/train.py +0 -0
  83. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/wave2vec/__init__.py +0 -0
  84. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/wave2vec/infer_fa.py +0 -0
  85. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/__init__.py +0 -0
  86. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/infer_asr.py +0 -0
  87. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/models/whisper/infer_fa.py +0 -0
  88. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/__init__.py +0 -0
  89. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/analysis/__init__.py +0 -0
  90. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/analysis/eval.py +0 -0
  91. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/__init__.py +0 -0
  92. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/aliyun.py +0 -0
  93. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/funaudio.py +0 -0
  94. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2chinese.py +0 -0
  95. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/__init__.py +0 -0
  96. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/deu.py +0 -0
  97. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ell.py +0 -0
  98. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eng.py +0 -0
  99. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/eus.py +0 -0
  100. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/fra.py +0 -0
  101. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/hrv.py +0 -0
  102. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/ind.py +0 -0
  103. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/jpn.py +0 -0
  104. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/nld.py +0 -0
  105. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/por.py +0 -0
  106. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/spa.py +0 -0
  107. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/num2lang/tha.py +0 -0
  108. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  109. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/rev.py +0 -0
  110. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/tencent.py +0 -0
  111. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/utils.py +0 -0
  112. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/whisper.py +0 -0
  113. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/asr/whisperx.py +0 -0
  114. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/avqi/__init__.py +0 -0
  115. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/avqi/engine.py +0 -0
  116. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/base.py +0 -0
  117. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/__init__.py +0 -0
  118. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  119. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  120. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  121. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/retrace.py +0 -0
  122. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  123. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  124. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/cleanup/support/test.test +0 -0
  125. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/diarization/__init__.py +0 -0
  126. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/diarization/pyannote.py +0 -0
  127. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/dispatch.py +0 -0
  128. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/__init__.py +0 -0
  129. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/iic_fa.py +0 -0
  130. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  131. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/wave2vec_fa_canto.py +0 -0
  132. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  133. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  134. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  135. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  136. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  137. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  138. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  139. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  140. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  141. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/opensmile/__init__.py +0 -0
  142. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/opensmile/engine.py +0 -0
  143. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/pipeline.py +0 -0
  144. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/speaker/__init__.py +0 -0
  145. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  146. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/__init__.py +0 -0
  147. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/gtrans.py +0 -0
  148. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/seamless.py +0 -0
  149. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/translate/utils.py +0 -0
  150. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/__init__.py +0 -0
  151. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
  152. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/rev_utr.py +0 -0
  153. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  154. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/utils.py +0 -0
  155. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  156. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utterance/__init__.py +0 -0
  157. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  158. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/__init__.py +0 -0
  159. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/conftest.py +0 -0
  160. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  161. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  162. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  163. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  164. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  165. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  166. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  167. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  168. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  169. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  170. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  171. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  172. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/fixures.py +0 -0
  173. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  174. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  175. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/tests/test_document.py +0 -0
  176. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/__init__.py +0 -0
  177. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/abbrev.py +0 -0
  178. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/compounds.py +0 -0
  179. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/config.py +0 -0
  180. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/dp.py +0 -0
  181. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/names.py +0 -0
  182. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/batchalign/utils/utils.py +0 -0
  183. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/setup.cfg +0 -0
  184. {batchalignhk-0.8.0 → batchalignhk-0.8.0.post2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0
3
+ Version: 0.8.0.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: BatchalignHK
3
- Version: 0.8.0
3
+ Version: 0.8.0.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -33,6 +33,7 @@ import time
33
33
  import traceback
34
34
  import logging as L
35
35
  baL = L.getLogger('batchalign')
36
+ import psutil
36
37
 
37
38
  warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
38
39
 
@@ -47,103 +48,169 @@ def _get_worker_pipeline(command, lang, num_speakers, **kwargs):
47
48
  lang=lang, num_speakers=num_speakers, **kwargs)
48
49
  return _worker_pipeline
49
50
 
50
- def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_info, progress_queue=None, **kwargs):
51
+ def _worker_task(file_info, command, lang, num_speakers, loader_info, writer_info, progress_queue=None, verbose=0, **kwargs):
51
52
  """The task executed in each worker process."""
52
53
  import sys
53
54
  import os
54
55
  import tempfile
55
-
56
+ import logging
57
+
56
58
  file, output = file_info
57
59
  pid = os.getpid()
58
-
59
- # Use a temporary file to capture ALL output at the FD level
60
- # This is the most robust way to prevent interleaved output
61
- with tempfile.TemporaryFile(mode='w+') as log_file:
60
+ rss_start = None
61
+ rss_end = None
62
+ rss_peak = None
63
+
64
+ def _safe_rss():
65
+ try:
66
+ import psutil
67
+ return psutil.Process(pid).memory_info().rss
68
+ except Exception:
69
+ return None
70
+
71
+ def _safe_peak_rss():
72
+ try:
73
+ import resource
74
+ peak = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
75
+ if peak is None:
76
+ return None
77
+ # ru_maxrss is KB on Linux, bytes on macOS; normalize to bytes.
78
+ return int(peak * 1024) if peak < 1024 * 1024 * 1024 else int(peak)
79
+ except Exception:
80
+ return None
81
+
82
+ rss_start = _safe_rss()
83
+
84
+ # Configure logging in this worker process
85
+ if verbose >= 1:
86
+ # Ensure basicConfig is called so logging works
87
+ logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.ERROR)
88
+
89
+ # Configure batchalign logger level in this worker process
90
+ baL = logging.getLogger('batchalign')
91
+ if verbose == 0:
92
+ baL.setLevel(logging.WARN)
93
+ elif verbose == 1:
94
+ baL.setLevel(logging.INFO)
95
+ else:
96
+ baL.setLevel(logging.DEBUG)
97
+
98
+ # Always capture output to avoid interleaving with progress rendering.
99
+ should_capture = True
100
+
101
+ if should_capture:
102
+ # Use a temporary file to capture ALL output at the FD level
103
+ # This is the most robust way to prevent interleaved output
104
+ log_file = tempfile.TemporaryFile(mode='w+')
62
105
  old_stdout_fd = os.dup(sys.stdout.fileno())
63
106
  old_stderr_fd = os.dup(sys.stderr.fileno())
64
-
65
- try:
66
- # Redirect FD 1 and 2 to our temp file
67
- os.dup2(log_file.fileno(), sys.stdout.fileno())
68
- os.dup2(log_file.fileno(), sys.stderr.fileno())
69
-
70
- pipeline = _get_worker_pipeline(command, lang, num_speakers, **kwargs)
71
107
 
72
- def progress_callback(completed, total, tasks):
73
- if not progress_queue:
74
- return
75
- try:
76
- progress_queue.put((file, completed, total, tasks))
77
- except Exception:
78
- pass
79
-
80
- # For now, we'll re-import what we need
81
- from batchalign.formats.chat import CHATFile
82
-
83
- # Morphosyntax specific loader/writer logic moved here for picklability
84
- if command == "morphotag":
85
- # Extract morphotag-specific arguments from kwargs
86
- mwt = kwargs.pop("mwt", {})
87
- retokenize = kwargs.pop("retokenize", False)
88
- skipmultilang = kwargs.pop("skipmultilang", False)
89
-
90
- cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
91
- doc = cf.doc
92
- if str(cf).count("%mor") > 0:
93
- doc.ba_special_["special_mor_notation"] = True
94
-
95
- # Prepare arguments for the pipeline
96
- pipeline_kwargs = {
97
- "retokenize": retokenize,
98
- "skipmultilang": skipmultilang,
99
- "mwt": mwt
100
- }
101
- # Add any remaining kwargs
102
- pipeline_kwargs.update(kwargs)
103
-
104
- # Process
105
- doc = pipeline(doc, callback=progress_callback, **pipeline_kwargs)
106
-
107
- # Write
108
- CHATFile(doc=doc, special_mor_=doc.ba_special_.get("special_mor_notation", False)).write(output)
109
-
110
- # Add other commands as needed, or use a more generic registry
111
- elif command == "align":
112
- cf = CHATFile(path=os.path.abspath(file))
113
- doc = cf.doc
114
- kw = {"pauses": kwargs.get("pauses", False)}
115
- doc = pipeline(doc, callback=progress_callback, **kw)
116
- CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
117
-
118
- else:
119
- loader, writer = loader_info, writer_info
120
- doc = loader(os.path.abspath(file))
121
- kw = {}
122
- if isinstance(doc, tuple) and len(doc) > 1:
123
- doc, kw = doc
124
- doc = pipeline(doc, callback=progress_callback, **kw)
125
- writer(doc, output)
126
-
127
- # Flush everything before reading back
108
+ # Redirect FD 1 and 2 to our temp file
109
+ os.dup2(log_file.fileno(), sys.stdout.fileno())
110
+ os.dup2(log_file.fileno(), sys.stderr.fileno())
111
+
112
+ try:
113
+ pipeline = _get_worker_pipeline(command, lang, num_speakers, **kwargs)
114
+
115
+ def progress_callback(completed, total, tasks):
116
+ if not progress_queue:
117
+ return
118
+ try:
119
+ progress_queue.put((file, completed, total, tasks))
120
+ except Exception:
121
+ pass
122
+
123
+ # For now, we'll re-import what we need
124
+ from batchalign.formats.chat import CHATFile
125
+
126
+ # Morphosyntax specific loader/writer logic moved here for picklability
127
+ if command == "morphotag":
128
+ # Extract morphotag-specific arguments from kwargs
129
+ mwt = kwargs.pop("mwt", {})
130
+ retokenize = kwargs.pop("retokenize", False)
131
+ skipmultilang = kwargs.pop("skipmultilang", False)
132
+
133
+ cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
134
+ doc = cf.doc
135
+ if str(cf).count("%mor") > 0:
136
+ doc.ba_special_["special_mor_notation"] = True
137
+
138
+ # Prepare arguments for the pipeline
139
+ pipeline_kwargs = {
140
+ "retokenize": retokenize,
141
+ "skipmultilang": skipmultilang,
142
+ "mwt": mwt
143
+ }
144
+ # Add any remaining kwargs
145
+ pipeline_kwargs.update(kwargs)
146
+
147
+ # Process
148
+ doc = pipeline(doc, callback=progress_callback, **pipeline_kwargs)
149
+
150
+ # Write
151
+ CHATFile(doc=doc, special_mor_=doc.ba_special_.get("special_mor_notation", False)).write(output)
152
+
153
+ # Add other commands as needed, or use a more generic registry
154
+ elif command == "align":
155
+ cf = CHATFile(path=os.path.abspath(file))
156
+ doc = cf.doc
157
+ kw = {"pauses": kwargs.get("pauses", False)}
158
+ doc = pipeline(doc, callback=progress_callback, **kw)
159
+ CHATFile(doc=doc).write(output, write_wor=kwargs.get("wor", True))
160
+
161
+ else:
162
+ loader, writer = loader_info, writer_info
163
+ doc = loader(os.path.abspath(file))
164
+ kw = {}
165
+ if isinstance(doc, tuple) and len(doc) > 1:
166
+ doc, kw = doc
167
+ doc = pipeline(doc, callback=progress_callback, **kw)
168
+ writer(doc, output)
169
+
170
+ # Flush and read captured output if we were capturing
171
+ if should_capture:
128
172
  sys.stdout.flush()
129
173
  sys.stderr.flush()
130
174
  log_file.seek(0)
131
175
  captured = log_file.read()
132
-
133
- return file, None, None, captured
134
- except Exception as e:
135
- # Flush everything before reading back
176
+ else:
177
+ captured = ""
178
+
179
+ rss_end = _safe_rss()
180
+ rss_peak = _safe_peak_rss()
181
+ mem_info = {
182
+ "pid": pid,
183
+ "rss_start": rss_start,
184
+ "rss_end": rss_end,
185
+ "rss_peak": rss_peak,
186
+ }
187
+ return file, None, None, captured, mem_info
188
+ except Exception as e:
189
+ # Flush and read captured output if we were capturing
190
+ if should_capture:
136
191
  sys.stdout.flush()
137
192
  sys.stderr.flush()
138
193
  log_file.seek(0)
139
194
  captured = log_file.read()
140
- return file, traceback.format_exc(), e, captured
141
- finally:
142
- # Restore original FDs
195
+ else:
196
+ captured = ""
197
+ rss_end = _safe_rss()
198
+ rss_peak = _safe_peak_rss()
199
+ mem_info = {
200
+ "pid": pid,
201
+ "rss_start": rss_start,
202
+ "rss_end": rss_end,
203
+ "rss_peak": rss_peak,
204
+ }
205
+ return file, traceback.format_exc(), e, captured, mem_info
206
+ finally:
207
+ # Restore original FDs only if we redirected them
208
+ if should_capture:
143
209
  os.dup2(old_stdout_fd, sys.stdout.fileno())
144
210
  os.dup2(old_stderr_fd, sys.stderr.fileno())
145
211
  os.close(old_stdout_fd)
146
212
  os.close(old_stderr_fd)
213
+ log_file.close()
147
214
 
148
215
  # this dictionary maps what commands are executed
149
216
  # against what BatchalignPipeline tasks are actually ran
@@ -253,6 +320,7 @@ def _dispatch(command, lang, num_speakers,
253
320
  file_pairs = list(zip(files, outputs))
254
321
  file_pairs.sort(key=lambda fo: os.path.getsize(fo[0]) if os.path.exists(fo[0]) else 0, reverse=True)
255
322
  files, outputs = zip(*file_pairs) if file_pairs else ([], [])
323
+ file_sizes = {f: os.path.getsize(f) if os.path.exists(f) else 0 for f in files}
256
324
 
257
325
  C.print(f"\nMode: [blue]{command}[/blue]; got [bold cyan]{len(files)}[/bold cyan] transcript{'s' if len(files) > 1 else ''} to process from {in_dir}:\n")
258
326
 
@@ -287,8 +355,66 @@ def _dispatch(command, lang, num_speakers,
287
355
  # create the spinner
288
356
  prog = Progress(SpinnerColumn(), *Progress.get_default_columns()[:-1],
289
357
  TimeElapsedColumn(),
290
- TextColumn("[cyan]{task.fields[processor]}[/cyan]"), console=C)
358
+ TextColumn("[magenta]{task.fields[mem]}[/magenta]"),
359
+ TextColumn("[cyan]{task.fields[processor]}[/cyan]"),
360
+ console=C, refresh_per_second=5)
291
361
  errors = []
362
+ mem_records = {}
363
+ mem_samples = []
364
+ last_low_mem_warn = 0.0
365
+
366
+ def _format_bytes(count, precision=2):
367
+ if count is None:
368
+ return "unknown"
369
+ units = ["B", "KB", "MB", "GB", "TB"]
370
+ idx = 0
371
+ size = float(count)
372
+ while size >= 1024 and idx < len(units) - 1:
373
+ size /= 1024
374
+ idx += 1
375
+ if idx == 0:
376
+ return f"{int(size)}{units[idx]}"
377
+ return f"{size:.{precision}f}{units[idx]}"
378
+
379
+ def _mem_label(base, available=None, low_mem=False):
380
+ parts = [base]
381
+ if available is not None:
382
+ parts.append(f"avail {_format_bytes(available, precision=1)}")
383
+ if low_mem:
384
+ parts.append("LOW MEM")
385
+ return " | ".join(parts)
386
+
387
+ def _system_memory():
388
+ try:
389
+ vm = psutil.virtual_memory()
390
+ return vm.total, vm.available
391
+ except Exception:
392
+ return None, None
393
+
394
+ def _memory_reserve(total):
395
+ if total is None:
396
+ return None
397
+ return max(int(total * 0.10), 2 * 1024 * 1024 * 1024)
398
+
399
+ def _estimate_worker_bytes(file_size):
400
+ if not mem_samples:
401
+ return 512 * 1024 * 1024
402
+ ratios = [mem / size for size, mem in mem_samples if size and mem]
403
+ if not ratios:
404
+ return 512 * 1024 * 1024
405
+ ratios.sort()
406
+ median_ratio = ratios[len(ratios) // 2]
407
+ est = int(median_ratio * file_size)
408
+ return max(512 * 1024 * 1024, min(est, 6 * 1024 * 1024 * 1024))
409
+
410
+ def _should_throttle(est_bytes):
411
+ total, available = _system_memory()
412
+ if total is None or available is None:
413
+ return False, total, available
414
+ reserve = _memory_reserve(total)
415
+ if reserve is None:
416
+ return False, total, available
417
+ return (available - est_bytes) < reserve, total, available
292
418
 
293
419
  try:
294
420
  with prog as prog:
@@ -296,8 +422,9 @@ def _dispatch(command, lang, num_speakers,
296
422
  task_totals = {}
297
423
 
298
424
  for f in files:
299
- tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...")
425
+ tasks[f] = prog.add_task(Path(f).name, start=False, total=1, processor="Waiting...", mem="queued")
300
426
  task_totals[f] = 1
427
+ prog.start_task(tasks[f])
301
428
 
302
429
  def drain_progress_queue():
303
430
  if not progress_queue:
@@ -313,10 +440,16 @@ def _dispatch(command, lang, num_speakers,
313
440
  continue
314
441
  task_total = max(int(total) if total else task_totals.get(file, 1), 1)
315
442
  task_totals[file] = task_total
443
+ total_mem, available_mem = _system_memory()
444
+ reserve = _memory_reserve(total_mem)
445
+ low_mem = False
446
+ if reserve is not None and available_mem is not None:
447
+ low_mem = available_mem < reserve
316
448
  prog.update(tasks[file],
317
449
  total=task_total,
318
450
  completed=min(int(completed), task_total),
319
- processor=render_stage(stage_tasks))
451
+ processor=render_stage(stage_tasks),
452
+ mem=_mem_label("running", available_mem, low_mem))
320
453
 
321
454
  with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
322
455
  worker_func = partial(_worker_task,
@@ -326,13 +459,57 @@ def _dispatch(command, lang, num_speakers,
326
459
  loader_info=None,
327
460
  writer_info=None,
328
461
  progress_queue=progress_queue,
462
+ verbose=ctx.obj["verbose"],
329
463
  **kwargs)
330
464
 
331
- future_to_file = {executor.submit(worker_func, (f, o)): f for f, o in zip(files, outputs)}
465
+ file_iter = iter(zip(files, outputs))
466
+ future_to_file = {}
467
+
468
+ def submit_one(file_path, output_path):
469
+ future = executor.submit(worker_func, (file_path, output_path))
470
+ future_to_file[future] = file_path
471
+ est_bytes = _estimate_worker_bytes(file_sizes.get(file_path, 0))
472
+ total_mem, available_mem = _system_memory()
473
+ reserve = _memory_reserve(total_mem)
474
+ low_mem = False
475
+ if reserve is not None and available_mem is not None:
476
+ low_mem = available_mem < reserve
477
+ prog.update(
478
+ tasks[file_path],
479
+ processor="Processing...",
480
+ mem=_mem_label(f"est {_format_bytes(est_bytes)}", available_mem, low_mem),
481
+ )
332
482
 
333
- for f in files:
334
- prog.start_task(tasks[f])
335
- prog.update(tasks[f], processor="Processing...")
483
+ def schedule_available():
484
+ nonlocal last_low_mem_warn
485
+ while len(future_to_file) < num_workers:
486
+ try:
487
+ next_file, next_output = next(file_iter)
488
+ except StopIteration:
489
+ break
490
+ est_bytes = _estimate_worker_bytes(file_sizes.get(next_file, 0))
491
+ throttle, total, available = _should_throttle(est_bytes)
492
+ if throttle and future_to_file:
493
+ now = time.time()
494
+ if now - last_low_mem_warn > 10:
495
+ reserve = _memory_reserve(total)
496
+ prog.console.print(
497
+ f"[bold yellow]Low memory[/bold yellow]: "
498
+ f"{_format_bytes(available)} free, "
499
+ f"{_format_bytes(reserve)} reserve. "
500
+ f"Throttling new workers."
501
+ )
502
+ last_low_mem_warn = now
503
+ break
504
+ if throttle and not future_to_file:
505
+ prog.console.print(
506
+ f"[bold yellow]Low memory[/bold yellow]: "
507
+ f"{_format_bytes(available)} free. "
508
+ "Continuing with a single worker."
509
+ )
510
+ submit_one(next_file, next_output)
511
+
512
+ schedule_available()
336
513
 
337
514
  pending = set(future_to_file.keys())
338
515
  while pending:
@@ -345,8 +522,9 @@ def _dispatch(command, lang, num_speakers,
345
522
 
346
523
  for future in done:
347
524
  file = future_to_file[future]
525
+ future_to_file.pop(future, None)
348
526
  try:
349
- res_file, trcbk, e, captured = future.result()
527
+ res_file, trcbk, e, captured, mem_info = future.result()
350
528
  final_total = max(task_totals.get(file, 1), 1)
351
529
  if e:
352
530
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
@@ -354,12 +532,25 @@ def _dispatch(command, lang, num_speakers,
354
532
  else:
355
533
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold green]DONE[/bold green]")
356
534
  if ctx.obj["verbose"] >= 1 and captured.strip():
357
- errors.append((res_file, "Logs only (Success)", None, captured))
535
+ prog.console.print(f"[bold blue]INFO[/bold blue] on file [italic]{Path(file).name}[/italic]:\n{escape(captured.strip())}\n")
536
+ if mem_info:
537
+ mem_records[file] = mem_info
538
+ peak = mem_info.get("rss_peak") or mem_info.get("rss_end")
539
+ if peak:
540
+ mem_samples.append((file_sizes.get(file, 0), peak))
541
+ total_mem, available_mem = _system_memory()
542
+ reserve = _memory_reserve(total_mem)
543
+ low_mem = False
544
+ if reserve is not None and available_mem is not None:
545
+ low_mem = available_mem < reserve
546
+ prog.update(tasks[file], mem=_mem_label(_format_bytes(peak), available_mem, low_mem))
358
547
  except Exception as e:
359
548
  final_total = max(task_totals.get(file, 1), 1)
360
549
  prog.update(tasks[file], total=final_total, completed=final_total, processor="[bold red]FAIL[/bold red]")
361
550
  errors.append((file, traceback.format_exc(), e, ""))
362
551
 
552
+ schedule_available()
553
+ pending = set(future_to_file.keys())
363
554
  drain_progress_queue()
364
555
  finally:
365
556
  if manager:
@@ -383,6 +574,16 @@ def _dispatch(command, lang, num_speakers,
383
574
  else:
384
575
  C.print(f"\nAll done. Results saved to {out_dir}!\n")
385
576
 
577
+ if mem_records and ctx.obj["verbose"] >= 1:
578
+ C.print("\nMemory usage per file (worker RSS peak):")
579
+ for file, info in mem_records.items():
580
+ rel_path = os.path.relpath(str(Path(file).absolute()), in_dir)
581
+ peak = info.get("rss_peak") or info.get("rss_end")
582
+ C.print(f"- {rel_path}: {_format_bytes(peak)}")
583
+ total, available = _system_memory()
584
+ if total is not None and available is not None:
585
+ C.print(f"\nSystem memory available: {_format_bytes(available)} / {_format_bytes(total)}")
586
+
386
587
  if ctx.obj["verbose"] > 1:
387
588
  C.end_capture()
388
589
 
@@ -0,0 +1,3 @@
1
+ 0.8.0-post.2
2
+ Jan 15th, 2025
3
+ Memory Safegaurds
@@ -1,3 +0,0 @@
1
- 0.8.0
2
- Jan 13th, 2025
3
- Speed
File without changes