BatchalignHK 0.7.22.post5__tar.gz → 0.7.22.post7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/PKG-INFO +1 -1
  3. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/file.py +4 -2
  4. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/generator.py +4 -1
  5. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/tencent.py +4 -28
  6. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/ud.py +3 -1
  7. batchalignhk-0.7.22.post7/batchalign/version +3 -0
  8. batchalignhk-0.7.22.post5/batchalign/version +0 -3
  9. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  10. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  11. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/entry_points.txt +0 -0
  12. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/requires.txt +0 -0
  13. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/top_level.txt +0 -0
  14. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/LICENSE +0 -0
  15. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/MANIFEST.in +0 -0
  16. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/README.md +0 -0
  17. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/__init__.py +0 -0
  18. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/__main__.py +0 -0
  19. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/cli/__init__.py +0 -0
  20. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/cli/cli.py +0 -0
  21. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/cli/dispatch.py +0 -0
  22. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/constants.py +0 -0
  23. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/document.py +0 -0
  24. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/errors.py +0 -0
  25. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/__init__.py +0 -0
  26. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/core.py +0 -0
  27. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/exception.py +0 -0
  28. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/logging.py +0 -0
  29. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/realtime_meeting.py +0 -0
  30. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_recognizer.py +0 -0
  31. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_synthesizer.py +0 -0
  32. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_transcriber.py +0 -0
  33. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/stream_input_tts.py +0 -0
  34. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/token.py +0 -0
  35. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/util.py +0 -0
  36. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/version.py +0 -0
  37. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/__init__.py +0 -0
  38. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_abnf.py +0 -0
  39. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_app.py +0 -0
  40. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
  41. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_core.py +0 -0
  42. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
  43. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_handshake.py +0 -0
  44. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_http.py +0 -0
  45. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_logging.py +0 -0
  46. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_socket.py +0 -0
  47. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
  48. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_url.py +0 -0
  49. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_utils.py +0 -0
  50. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
  51. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
  52. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
  53. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
  54. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
  55. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
  56. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
  57. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
  58. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/__init__.py +0 -0
  59. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/base.py +0 -0
  60. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/__init__.py +0 -0
  61. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/lexer.py +0 -0
  62. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/parser.py +0 -0
  63. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/utils.py +0 -0
  64. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/__init__.py +0 -0
  65. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/file.py +0 -0
  66. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/generator.py +0 -0
  67. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/parser.py +0 -0
  68. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/__init__.py +0 -0
  69. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/resolve.py +0 -0
  70. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/__init__.py +0 -0
  71. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/config.yaml +0 -0
  72. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/infer.py +0 -0
  73. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/utils.py +0 -0
  74. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/training/__init__.py +0 -0
  75. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/training/run.py +0 -0
  76. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/training/utils.py +0 -0
  77. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utils.py +0 -0
  78. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/__init__.py +0 -0
  79. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/cantonese_infer.py +0 -0
  80. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/dataset.py +0 -0
  81. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/execute.py +0 -0
  82. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/infer.py +0 -0
  83. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/prep.py +0 -0
  84. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/train.py +0 -0
  85. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/__init__.py +0 -0
  86. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/infer_fa.py +0 -0
  87. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/__init__.py +0 -0
  88. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_asr.py +0 -0
  89. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_fa.py +0 -0
  90. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/__init__.py +0 -0
  91. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/__init__.py +0 -0
  92. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/eval.py +0 -0
  93. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/__init__.py +0 -0
  94. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/aliyun.py +0 -0
  95. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/funaudio.py +0 -0
  96. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/num2chinese.py +0 -0
  97. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  98. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/rev.py +0 -0
  99. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/utils.py +0 -0
  100. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/whisper.py +0 -0
  101. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/whisperx.py +0 -0
  102. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/avqi/__init__.py +0 -0
  103. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/avqi/engine.py +0 -0
  104. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/base.py +0 -0
  105. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  106. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  107. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  108. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  109. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  110. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  111. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  112. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  113. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/__init__.py +0 -0
  114. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/pyannote.py +0 -0
  115. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/dispatch.py +0 -0
  116. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/__init__.py +0 -0
  117. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  118. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  119. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  120. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  121. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  122. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  123. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  124. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  125. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  126. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/pipeline.py +0 -0
  127. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/__init__.py +0 -0
  128. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  129. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/__init__.py +0 -0
  130. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/gtrans.py +0 -0
  131. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/seamless.py +0 -0
  132. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/utils.py +0 -0
  133. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/__init__.py +0 -0
  134. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
  135. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  136. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  137. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/utils.py +0 -0
  138. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  139. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utterance/__init__.py +0 -0
  140. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  141. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/__init__.py +0 -0
  142. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/conftest.py +0 -0
  143. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  144. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  145. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  146. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  147. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  148. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  149. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  150. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  151. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  152. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  153. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  154. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  155. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/fixures.py +0 -0
  156. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  157. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  158. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/tests/test_document.py +0 -0
  159. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/__init__.py +0 -0
  160. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/abbrev.py +0 -0
  161. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/compounds.py +0 -0
  162. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/config.py +0 -0
  163. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/dp.py +0 -0
  164. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/names.py +0 -0
  165. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/batchalign/utils/utils.py +0 -0
  166. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/setup.cfg +0 -0
  167. {batchalignhk-0.7.22.post5 → batchalignhk-0.7.22.post7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.22.post5
3
+ Version: 0.7.22.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.22.post5
3
+ Version: 0.7.22.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -127,8 +127,10 @@ class CHATFile(BaseFormat):
127
127
  elif str(i).strip() in ENDING_PUNCT:
128
128
  continue
129
129
  else:
130
- main.append(generate_chat_utterance(i, special and doc.langs[0] == "eng",
131
- write_wor=write_wor))
130
+ main.append(generate_chat_utterance(i,
131
+ special and doc.langs[0] == "eng",
132
+ write_wor=write_wor,
133
+ merge_letters="yue" in doc.langs))
132
134
  main.append("@End\n")
133
135
 
134
136
  raw = "\n".join(main)
@@ -11,7 +11,7 @@ import warnings
11
11
  # document[3].text = None
12
12
  # document[3].model_dump()
13
13
 
14
- def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True):
14
+ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True, merge_letters=False):
15
15
  """Converts at Utterance to a CHAT string.
16
16
 
17
17
  Parameters
@@ -30,6 +30,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
30
30
  main_line = str(utterance)
31
31
  # last minut ecorrections
32
32
  # main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
33
+ if merge_letters:
34
+ main_line = re.sub(r"([a-z]) ", r"\1", main_line)
35
+ main_line = re.sub(r"([a-z])([^a-z])", r"\1 \2", main_line)
33
36
  main_line = re.sub(r"«", "“", main_line)
34
37
  main_line = re.sub(r"»", "”", main_line)
35
38
  main_line = re.sub(r"—", "-", main_line)
@@ -189,40 +189,17 @@ class TencentEngine(BatchalignEngine):
189
189
  roman_cache = ""
190
190
  roman_cache_start = i.StartMs
191
191
  roman_cache_end = i.StartMs
192
+
192
193
  for j in i.Words:
193
194
  word = j.Word
194
195
  if self.__lang == "yue":
195
196
  word = cc.convert(word)
196
-
197
197
  word = self.replace_cantonese_words(word)
198
-
199
- if self.is_roman(word):
200
- if roman_cache == "":
201
- roman_cache_start = (j.OffsetStartMs + start)
202
- roman_cache = roman_cache + word
203
- roman_cache_end = (j.OffsetEndMs + start)
204
- else:
205
- if roman_cache != "":
206
- turn.append({
207
- "type": "text",
208
- "ts": roman_cache_start / 1000,
209
- "end_ts": roman_cache_end / 1000,
210
- "value": roman_cache
211
- })
212
- roman_cache = ""
213
- turn.append({
214
- "type": "text",
215
- "ts": (j.OffsetStartMs + start) / 1000,
216
- "end_ts": (j.OffsetEndMs + start) / 1000,
217
- "value": word
218
- })
219
-
220
- if roman_cache != "":
221
198
  turn.append({
222
199
  "type": "text",
223
- "ts": roman_cache_start / 1000,
224
- "end_ts": roman_cache_end / 1000,
225
- "value": roman_cache
200
+ "ts": (j.OffsetStartMs + start) / 1000,
201
+ "end_ts": (j.OffsetEndMs + start) / 1000,
202
+ "value": word
226
203
  })
227
204
 
228
205
  turns.append({
@@ -232,7 +209,6 @@ class TencentEngine(BatchalignEngine):
232
209
  L.debug(f"Tencent done.")
233
210
 
234
211
  # Extract the text from the small volume parts for translation
235
-
236
212
  doc = process_generation({"monologues": turns},
237
213
  self.__lang_code,
238
214
  utterance_engine=self.__engine)
@@ -79,6 +79,8 @@ def handler(word, lang=None):
79
79
  target = word.text
80
80
  if not target:
81
81
  target = word.text
82
+ target = target.replace("」", "")
83
+ target = target.replace("「", "")
82
84
 
83
85
  # unknown flag
84
86
  unknown = False
@@ -145,7 +147,7 @@ def handler(word, lang=None):
145
147
  pos,target = verbform(pos,target,word.text)
146
148
  target = target.replace(',', 'cm')
147
149
 
148
- target = re.sub(r'@\w$', '', target)
150
+ target = re.sub(r'@\w$', '', target).strip()
149
151
  return f"{'' if not unknown else '0'}{pos}|{target}"
150
152
 
151
153
  # POS specific handler
@@ -0,0 +1,3 @@
1
+ 0.7.22-post.7
2
+ September 28rd, 2025
3
+ Remove spaces between letters for cantonese
@@ -1,3 +0,0 @@
1
- 0.7.22-post.5
2
- September 3rd, 2025
3
- Japanese parsing fixes