batchalign 0.7.6a13__tar.gz → 0.7.6a14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a13/batchalign.egg-info → batchalign-0.7.6a14}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/ud.py +12 -2
  3. batchalign-0.7.6a14/batchalign/version +3 -0
  4. {batchalign-0.7.6a13 → batchalign-0.7.6a14/batchalign.egg-info}/PKG-INFO +1 -1
  5. batchalign-0.7.6a13/batchalign/version +0 -3
  6. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/LICENSE +0 -0
  7. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/MANIFEST.in +0 -0
  8. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/README.md +0 -0
  9. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/__init__.py +0 -0
  10. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/__main__.py +0 -0
  11. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/document.py +0 -0
  16. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  71. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  72. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/setup.cfg +0 -0
  109. {batchalign-0.7.6a13 → batchalign-0.7.6a14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a13
3
+ Version: 0.7.6a14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -872,6 +872,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
872
872
  for i,j in enumerate(ut):
873
873
  for k in j.text:
874
874
  ud_chars.append(ReferenceTarget(k, payload=i))
875
+ creaky = False
876
+ collected = ""
875
877
  # brrr
876
878
  aligned = align(chunks_chars, ud_chars, tqdm=False)
877
879
  for i in aligned:
@@ -879,8 +881,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
879
881
  if i.reference_payload not in chunks_backplate[i.payload]:
880
882
  chunks_backplate[i.payload].append(i.reference_payload)
881
883
  elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
882
- # just put it back
883
- chunks_backplate[i.payload].append(i.key)
884
+ if i.key == "*":
885
+ creaky = not creaky
886
+ chunks_backplate[i.payload].append("*"+collected+"*")
887
+ collected = ""
888
+ elif creaky:
889
+ collected += i.key
890
+ elif not creaky:
891
+ chunks_backplate[i.payload].append(i.key)
884
892
  # we want to replace the morphology of forms that are not actually
885
893
  # supposed to be analyzed
886
894
  elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
@@ -915,6 +923,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
915
923
  retokenized_ut = retokenized_ut.replace(" ↑", "↑")
916
924
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
917
925
  retokenized_ut = retokenized_ut.replace(" @", "@")
926
+ retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
927
+ retokenized_ut = re.sub(r"\*(.*?)\*", r"*\1* ", retokenized_ut)
918
928
  # pray to everyone that it works---this will simply crash and ignore
919
929
  # the utterance if it didn't work, so we are doing this as a sanity
920
930
  # check rather than needing the parsed result
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.14
2
+ October 14, 2024
3
+ creaky
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a13
3
+ Version: 0.7.6a14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.13
2
- October 13, 2024
3
- patch bug regarding comma structure
File without changes
File without changes
File without changes
File without changes
File without changes