BatchalignHK 0.7.19.post18__tar.gz → 0.7.19.post20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/PKG-INFO +4 -2
  2. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/SOURCES.txt +34 -0
  3. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/requires.txt +2 -0
  4. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/PKG-INFO +4 -2
  5. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/README.md +1 -1
  6. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/cli.py +4 -0
  7. batchalignhk-0.7.19.post20/batchalign/extern/nls/__init__.py +10 -0
  8. batchalignhk-0.7.19.post20/batchalign/extern/nls/core.py +183 -0
  9. batchalignhk-0.7.19.post20/batchalign/extern/nls/exception.py +31 -0
  10. batchalignhk-0.7.19.post20/batchalign/extern/nls/logging.py +65 -0
  11. batchalignhk-0.7.19.post20/batchalign/extern/nls/realtime_meeting.py +321 -0
  12. batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_recognizer.py +315 -0
  13. batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_synthesizer.py +288 -0
  14. batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_transcriber.py +375 -0
  15. batchalignhk-0.7.19.post20/batchalign/extern/nls/stream_input_tts.py +439 -0
  16. batchalignhk-0.7.19.post20/batchalign/extern/nls/token.py +49 -0
  17. batchalignhk-0.7.19.post20/batchalign/extern/nls/util.py +44 -0
  18. batchalignhk-0.7.19.post20/batchalign/extern/nls/version.py +2 -0
  19. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/__init__.py +26 -0
  20. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_abnf.py +423 -0
  21. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_app.py +426 -0
  22. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_cookiejar.py +67 -0
  23. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_core.py +607 -0
  24. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_exceptions.py +84 -0
  25. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_handshake.py +200 -0
  26. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_http.py +335 -0
  27. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_logging.py +90 -0
  28. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_socket.py +182 -0
  29. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_ssl_compat.py +44 -0
  30. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_url.py +176 -0
  31. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_utils.py +104 -0
  32. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/echo-server.py +21 -0
  33. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_abnf.py +89 -0
  34. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_app.py +179 -0
  35. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_cookiejar.py +119 -0
  36. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_http.py +177 -0
  37. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_url.py +301 -0
  38. batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_websocket.py +458 -0
  39. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/__init__.py +2 -1
  40. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/__init__.py +1 -0
  41. batchalignhk-0.7.19.post20/batchalign/pipelines/asr/aliyun.py +254 -0
  42. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/utils.py +1 -1
  43. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/dispatch.py +4 -1
  44. batchalignhk-0.7.19.post20/batchalign/tests/__init__.py +0 -0
  45. batchalignhk-0.7.19.post20/batchalign/version +3 -0
  46. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/setup.py +2 -0
  47. batchalignhk-0.7.19.post18/batchalign/version +0 -3
  48. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  49. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/entry_points.txt +0 -0
  50. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/top_level.txt +0 -0
  51. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/LICENSE +0 -0
  52. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/MANIFEST.in +0 -0
  53. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/__init__.py +0 -0
  54. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/__main__.py +0 -0
  55. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/__init__.py +0 -0
  56. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/dispatch.py +0 -0
  57. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/constants.py +0 -0
  58. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/document.py +0 -0
  59. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/errors.py +0 -0
  60. {batchalignhk-0.7.19.post18/batchalign → batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket}/tests/__init__.py +0 -0
  61. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/__init__.py +0 -0
  62. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/base.py +0 -0
  63. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/__init__.py +0 -0
  64. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/file.py +0 -0
  65. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/generator.py +0 -0
  66. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/lexer.py +0 -0
  67. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/parser.py +0 -0
  68. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/utils.py +0 -0
  69. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/__init__.py +0 -0
  70. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/file.py +0 -0
  71. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/generator.py +0 -0
  72. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/parser.py +0 -0
  73. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/__init__.py +0 -0
  74. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/resolve.py +0 -0
  75. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/__init__.py +0 -0
  76. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/config.yaml +0 -0
  77. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/infer.py +0 -0
  78. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/utils.py +0 -0
  79. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/__init__.py +0 -0
  80. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/run.py +0 -0
  81. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/utils.py +0 -0
  82. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utils.py +0 -0
  83. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/__init__.py +0 -0
  84. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/cantonese_infer.py +0 -0
  85. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/dataset.py +0 -0
  86. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/execute.py +0 -0
  87. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/infer.py +0 -0
  88. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/prep.py +0 -0
  89. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/train.py +0 -0
  90. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/wave2vec/__init__.py +0 -0
  91. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/wave2vec/infer_fa.py +0 -0
  92. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/__init__.py +0 -0
  93. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/infer_asr.py +0 -0
  94. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/infer_fa.py +0 -0
  95. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/analysis/__init__.py +0 -0
  96. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/analysis/eval.py +0 -0
  97. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/num2chinese.py +0 -0
  98. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  99. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/rev.py +0 -0
  100. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/tencent.py +0 -0
  101. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/whisper.py +0 -0
  102. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/whisperx.py +0 -0
  103. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/base.py +0 -0
  104. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/__init__.py +0 -0
  105. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  106. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  107. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  108. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/retrace.py +0 -0
  109. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  110. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  111. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/test.test +0 -0
  112. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/__init__.py +0 -0
  113. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  114. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  115. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  116. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  117. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  118. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  119. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  120. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  121. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  122. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  123. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/pipeline.py +0 -0
  124. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/speaker/__init__.py +0 -0
  125. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  126. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/__init__.py +0 -0
  127. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/gtrans.py +0 -0
  128. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/seamless.py +0 -0
  129. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/utils.py +0 -0
  130. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/__init__.py +0 -0
  131. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/rev_utr.py +0 -0
  132. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/tencent_utr.py +0 -0
  133. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/utils.py +0 -0
  134. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  135. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utterance/__init__.py +0 -0
  136. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  137. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/conftest.py +0 -0
  138. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  139. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  140. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  141. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  142. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  143. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  144. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  145. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  146. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  147. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  148. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  149. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  150. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/fixures.py +0 -0
  151. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  152. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  153. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/test_document.py +0 -0
  154. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/__init__.py +0 -0
  155. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/abbrev.py +0 -0
  156. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/config.py +0 -0
  157. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/dp.py +0 -0
  158. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/names.py +0 -0
  159. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/utils.py +0 -0
  160. {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post18
3
+ Version: 0.7.19.post20
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
37
37
  Requires-Dist: tencentcloud-sdk-python-common
38
38
  Requires-Dist: tencentcloud-sdk-python-asr
39
39
  Requires-Dist: googletrans
40
+ Requires-Dist: aliyun-python-sdk-core>=2.13.3
41
+ Requires-Dist: oss2
40
42
  Requires-Dist: openai-whisper>=20240930
41
43
  Provides-Extra: dev
42
44
  Requires-Dist: pytest; extra == "dev"
@@ -64,7 +66,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
64
66
 
65
67
  ## Quick Start
66
68
 
67
- The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
69
+ The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
68
70
 
69
71
  ### Install and Update the Package
70
72
  Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
@@ -17,6 +17,39 @@ batchalign/version
17
17
  batchalign/cli/__init__.py
18
18
  batchalign/cli/cli.py
19
19
  batchalign/cli/dispatch.py
20
+ batchalign/extern/nls/__init__.py
21
+ batchalign/extern/nls/core.py
22
+ batchalign/extern/nls/exception.py
23
+ batchalign/extern/nls/logging.py
24
+ batchalign/extern/nls/realtime_meeting.py
25
+ batchalign/extern/nls/speech_recognizer.py
26
+ batchalign/extern/nls/speech_synthesizer.py
27
+ batchalign/extern/nls/speech_transcriber.py
28
+ batchalign/extern/nls/stream_input_tts.py
29
+ batchalign/extern/nls/token.py
30
+ batchalign/extern/nls/util.py
31
+ batchalign/extern/nls/version.py
32
+ batchalign/extern/nls/websocket/__init__.py
33
+ batchalign/extern/nls/websocket/_abnf.py
34
+ batchalign/extern/nls/websocket/_app.py
35
+ batchalign/extern/nls/websocket/_cookiejar.py
36
+ batchalign/extern/nls/websocket/_core.py
37
+ batchalign/extern/nls/websocket/_exceptions.py
38
+ batchalign/extern/nls/websocket/_handshake.py
39
+ batchalign/extern/nls/websocket/_http.py
40
+ batchalign/extern/nls/websocket/_logging.py
41
+ batchalign/extern/nls/websocket/_socket.py
42
+ batchalign/extern/nls/websocket/_ssl_compat.py
43
+ batchalign/extern/nls/websocket/_url.py
44
+ batchalign/extern/nls/websocket/_utils.py
45
+ batchalign/extern/nls/websocket/tests/__init__.py
46
+ batchalign/extern/nls/websocket/tests/echo-server.py
47
+ batchalign/extern/nls/websocket/tests/test_abnf.py
48
+ batchalign/extern/nls/websocket/tests/test_app.py
49
+ batchalign/extern/nls/websocket/tests/test_cookiejar.py
50
+ batchalign/extern/nls/websocket/tests/test_http.py
51
+ batchalign/extern/nls/websocket/tests/test_url.py
52
+ batchalign/extern/nls/websocket/tests/test_websocket.py
20
53
  batchalign/formats/__init__.py
21
54
  batchalign/formats/base.py
22
55
  batchalign/formats/chat/__init__.py
@@ -58,6 +91,7 @@ batchalign/pipelines/pipeline.py
58
91
  batchalign/pipelines/analysis/__init__.py
59
92
  batchalign/pipelines/analysis/eval.py
60
93
  batchalign/pipelines/asr/__init__.py
94
+ batchalign/pipelines/asr/aliyun.py
61
95
  batchalign/pipelines/asr/num2chinese.py
62
96
  batchalign/pipelines/asr/oai_whisper.py
63
97
  batchalign/pipelines/asr/rev.py
@@ -27,6 +27,8 @@ sentencepiece
27
27
  tencentcloud-sdk-python-common
28
28
  tencentcloud-sdk-python-asr
29
29
  googletrans
30
+ aliyun-python-sdk-core>=2.13.3
31
+ oss2
30
32
  openai-whisper>=20240930
31
33
 
32
34
  [dev]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post18
3
+ Version: 0.7.19.post20
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
37
37
  Requires-Dist: tencentcloud-sdk-python-common
38
38
  Requires-Dist: tencentcloud-sdk-python-asr
39
39
  Requires-Dist: googletrans
40
+ Requires-Dist: aliyun-python-sdk-core>=2.13.3
41
+ Requires-Dist: oss2
40
42
  Requires-Dist: openai-whisper>=20240930
41
43
  Provides-Extra: dev
42
44
  Requires-Dist: pytest; extra == "dev"
@@ -64,7 +66,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
64
66
 
65
67
  ## Quick Start
66
68
 
67
- The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
69
+ The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
68
70
 
69
71
  ### Install and Update the Package
70
72
  Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
@@ -8,7 +8,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
8
8
 
9
9
  ## Quick Start
10
10
 
11
- The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
11
+ The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
12
12
 
13
13
  ### Install and Update the Package
14
14
  Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
@@ -160,6 +160,8 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
160
160
  default=False, help="Use Tencent instead of Rev.AI (default).")
161
161
  @click.option("--whisperx/--rev",
162
162
  default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
163
+ @click.option("--alibaba/--rev",
164
+ default=False, help="Use Alibaba instead of Rev.AI (default). Superceeds --whisper.")
163
165
  @click.option("--diarize/--nodiarize",
164
166
  default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
165
167
  @click.option("--wor/--nowor",
@@ -188,6 +190,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
188
190
  asr = "tencent"
189
191
  if kwargs["whisper_oai"]:
190
192
  asr = "whisper_oai"
193
+ if kwargs["alibaba"]:
194
+ asr = "aliyun"
191
195
 
192
196
  def writer(doc, output):
193
197
  doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .logging import *
4
+ from .speech_recognizer import *
5
+ from .speech_transcriber import *
6
+ from .speech_synthesizer import *
7
+ from .stream_input_tts import *
8
+ from .realtime_meeting import *
9
+ from .util import *
10
+ from .version import __version__
@@ -0,0 +1,183 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import logging
4
+ import threading
5
+
6
+ from enum import Enum, unique
7
+ from queue import Queue
8
+
9
+ from . import logging, token, websocket
10
+ from .exception import InvalidParameter, ConnectionTimeout, ConnectionUnavailable
11
+
12
+ __URL__ = 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1'
13
+ __HEADER__ = [
14
+ 'Sec-WebSocket-Key: x3JJHMbDL1EzLkh9GBhXDw==',
15
+ 'Sec-WebSocket-Version: 13',
16
+ ]
17
+
18
+ __FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
19
+ #__all__ = ['NlsCore']
20
+
21
+ def core_on_msg(ws, message, args):
22
+ logging.debug('core_on_msg:{}'.format(message))
23
+ if not args:
24
+ logging.error('callback core_on_msg with null args')
25
+ return
26
+ nls = args[0]
27
+ nls._NlsCore__issue_callback('on_message', [message])
28
+
29
+ def core_on_error(ws, message, args):
30
+ logging.debug('core_on_error:{}'.format(message))
31
+ if not args:
32
+ logging.error('callback core_on_error with null args')
33
+ return
34
+ nls = args[0]
35
+ nls._NlsCore__issue_callback('on_error', [message])
36
+
37
+ def core_on_close(ws, close_status_code, close_msg, args):
38
+ logging.debug('core_on_close')
39
+ if not args:
40
+ logging.error('callback core_on_close with null args')
41
+ return
42
+ nls = args[0]
43
+ nls._NlsCore__issue_callback('on_close')
44
+
45
+ def core_on_open(ws, args):
46
+ logging.debug('core_on_open:{}'.format(args))
47
+ if not args:
48
+ logging.debug('callback with null args')
49
+ ws.close()
50
+ elif len(args) != 2:
51
+ logging.debug('callback args not 2')
52
+ ws.close()
53
+ nls = args[0]
54
+ nls._NlsCore__notify_on_open()
55
+ nls.start(args[1], nls._NlsCore__ping_interval, nls._NlsCore__ping_timeout)
56
+ nls._NlsCore__issue_callback('on_open')
57
+
58
+ def core_on_data(ws, data, opcode, flag, args):
59
+ logging.debug('core_on_data opcode={}'.format(opcode))
60
+ if not args:
61
+ logging.error('callback core_on_data with null args')
62
+ return
63
+ nls = args[0]
64
+ nls._NlsCore__issue_callback('on_data', [data, opcode, flag])
65
+
66
+ @unique
67
+ class NlsConnectionStatus(Enum):
68
+ Disconnected = 0
69
+ Connected = 1
70
+
71
+
72
+ class NlsCore:
73
+ """
74
+ NlsCore
75
+ """
76
+ def __init__(self,
77
+ url=__URL__,
78
+ token=None,
79
+ on_open=None, on_message=None, on_close=None,
80
+ on_error=None, on_data=None, asynch=False, callback_args=[]):
81
+ self.__url = url
82
+ self.__async = asynch
83
+ if not token:
84
+ raise InvalidParameter('Must provide a valid token!')
85
+ else:
86
+ self.__token = token
87
+ self.__callbacks = {}
88
+ if on_open:
89
+ self.__callbacks['on_open'] = on_open
90
+ if on_message:
91
+ self.__callbacks['on_message'] = on_message
92
+ if on_close:
93
+ self.__callbacks['on_close'] = on_close
94
+ if on_error:
95
+ self.__callbacks['on_error'] = on_error
96
+ if on_data:
97
+ self.__callbacks['on_data'] = on_data
98
+ if not on_open and not on_message and not on_close and not on_error:
99
+ raise InvalidParameter('Must provide at least one callback')
100
+ logging.debug('callback args:{}'.format(callback_args))
101
+ self.__callback_args = callback_args
102
+ self.__header = __HEADER__ + ['X-NLS-Token: {}'.format(self.__token)]
103
+ websocket.enableTrace(True)
104
+ self.__ws = websocket.WebSocketApp(self.__url,
105
+ self.__header,
106
+ on_message=core_on_msg,
107
+ on_data=core_on_data,
108
+ on_error=core_on_error,
109
+ on_close=core_on_close,
110
+ callback_args=[self])
111
+ self.__ws.on_open = core_on_open
112
+ self.__lock = threading.Lock()
113
+ self.__cond = threading.Condition()
114
+ self.__connection_status = NlsConnectionStatus.Disconnected
115
+
116
+ def start(self, msg, ping_interval, ping_timeout):
117
+ self.__lock.acquire()
118
+ self.__ping_interval = ping_interval
119
+ self.__ping_timeout = ping_timeout
120
+ if self.__connection_status == NlsConnectionStatus.Disconnected:
121
+ self.__ws.update_args(self, msg)
122
+ self.__lock.release()
123
+ self.__connect_before_start(ping_interval, ping_timeout)
124
+ else:
125
+ self.__lock.release()
126
+ self.__ws.send(msg)
127
+
128
+ def __notify_on_open(self):
129
+ logging.debug('notify on open')
130
+ with self.__cond:
131
+ self.__connection_status = NlsConnectionStatus.Connected
132
+ self.__cond.notify()
133
+
134
+ def __issue_callback(self, which, exargs=[]):
135
+ if which not in self.__callbacks:
136
+ logging.error('no such callback:{}'.format(which))
137
+ return
138
+ if which == 'on_close':
139
+ with self.__cond:
140
+ self.__connection_status = NlsConnectionStatus.Disconnected
141
+ self.__cond.notify()
142
+ args = exargs+self.__callback_args
143
+ self.__callbacks[which](*args)
144
+
145
+ def send(self, msg, binary):
146
+ self.__lock.acquire()
147
+ if self.__connection_status == NlsConnectionStatus.Disconnected:
148
+ self.__lock.release()
149
+ logging.error('start before send')
150
+ raise ConnectionUnavailable('Must call start before send!')
151
+ else:
152
+ self.__lock.release()
153
+ if binary:
154
+ self.__ws.send(msg, opcode=websocket.ABNF.OPCODE_BINARY)
155
+ else:
156
+ logging.debug('send {}'.format(msg))
157
+ self.__ws.send(msg)
158
+
159
+ def shutdown(self):
160
+ self.__ws.close()
161
+
162
+ def __run(self, ping_interval, ping_timeout):
163
+ logging.debug('ws run...')
164
+ self.__ws.run_forever(ping_interval=ping_interval,
165
+ ping_timeout=ping_timeout)
166
+ with self.__lock:
167
+ self.__connection_status = NlsConnectionStatus.Disconnected
168
+ logging.debug('ws exit...')
169
+
170
+ def __connect_before_start(self, ping_interval, ping_timeout):
171
+ with self.__cond:
172
+ self.__th = threading.Thread(target=self.__run,
173
+ args=[ping_interval, ping_timeout])
174
+ self.__th.start()
175
+ if self.__connection_status == NlsConnectionStatus.Disconnected:
176
+ logging.debug('wait cond wakeup')
177
+ if not self.__async:
178
+ if self.__cond.wait(timeout=10):
179
+ logging.debug('wakeup without timeout')
180
+ return self.__connection_status == NlsConnectionStatus.Connected
181
+ else:
182
+ logging.debug('wakeup with timeout')
183
+ raise ConnectionTimeout('Wait response timeout! Please check local network!')
@@ -0,0 +1,31 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+
4
+ class InvalidParameter(Exception):
5
+ pass
6
+
7
+ # Token
8
+ class GetTokenFailed(Exception):
9
+ pass
10
+
11
+ # Connection
12
+ class ConnectionTimeout(Exception):
13
+ pass
14
+
15
+ class ConnectionUnavailable(Exception):
16
+ pass
17
+
18
+ class StartTimeoutException(Exception):
19
+ pass
20
+
21
+ class StopTimeoutException(Exception):
22
+ pass
23
+
24
+ class NotStartException(Exception):
25
+ pass
26
+
27
+ class CompleteTimeoutException(Exception):
28
+ pass
29
+
30
+ class WrongStateException(Exception):
31
+ pass
@@ -0,0 +1,65 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ import logging
4
+
5
+ _logger = logging.getLogger('nls')
6
+
7
+ try:
8
+ from logging import NullHandler
9
+ except ImportError:
10
+ class NullHandler(logging.Handler):
11
+ def emit(self, record):
12
+ pass
13
+
14
+ _logger.addHandler(NullHandler())
15
+ _traceEnabled = False
16
+ __LOG_FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
17
+
18
+ __all__=['enableTrace', 'dump', 'error', 'warning', 'debug', 'trace',
19
+ 'isEnabledForError', 'isEnabledForDebug', 'isEnabledForTrace']
20
+
21
+ def enableTrace(traceable, handler=logging.StreamHandler()):
22
+ """
23
+ enable log print
24
+
25
+ Parameters
26
+ ----------
27
+ traceable: bool
28
+ whether enable log print, default log level is logging.DEBUG
29
+ handler: Handler object
30
+ handle how to print out log, default to stdio
31
+ """
32
+ global _traceEnabled
33
+ _traceEnabled = traceable
34
+ if traceable:
35
+ _logger.addHandler(handler)
36
+ _logger.setLevel(logging.DEBUG)
37
+ handler.setFormatter(logging.Formatter(__LOG_FORMAT__))
38
+
39
+ def dump(title, message):
40
+ if _traceEnabled:
41
+ _logger.debug('### ' + title + ' ###')
42
+ _logger.debug(message)
43
+ _logger.debug('########################################')
44
+
45
+ def error(msg):
46
+ _logger.error(msg)
47
+
48
+ def warning(msg):
49
+ _logger.warning(msg)
50
+
51
+ def debug(msg):
52
+ _logger.debug(msg)
53
+
54
+ def trace(msg):
55
+ if _traceEnabled:
56
+ _logger.debug(msg)
57
+
58
+ def isEnabledForError():
59
+ return _logger.isEnabledFor(logging.ERROR)
60
+
61
+ def isEnabledForDebug():
62
+ return _logger.isEnabledFor(logging.Debug)
63
+
64
+ def isEnabledForTrace():
65
+ return _traceEnabled