batchalign 0.7.6a13__tar.gz → 0.7.6a15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a13/batchalign.egg-info → batchalign-0.7.6a15}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/ud.py +17 -3
  3. batchalign-0.7.6a15/batchalign/version +3 -0
  4. {batchalign-0.7.6a13 → batchalign-0.7.6a15/batchalign.egg-info}/PKG-INFO +1 -1
  5. batchalign-0.7.6a13/batchalign/version +0 -3
  6. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/LICENSE +0 -0
  7. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/MANIFEST.in +0 -0
  8. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/README.md +0 -0
  9. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/__init__.py +0 -0
  10. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/__main__.py +0 -0
  11. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/document.py +0 -0
  16. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  71. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  72. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/setup.cfg +0 -0
  109. {batchalign-0.7.6a13 → batchalign-0.7.6a15}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a13
3
+ Version: 0.7.6a15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -213,11 +213,15 @@ def handler__NOUN(word, lang=None):
213
213
  if word.deprel == "obj" and case.strip() == "":
214
214
  case = "Acc"
215
215
 
216
+ ger = ""
217
+ if word.text.endswith("ing") and lang == "en":
218
+ ger += "-Ger"
219
+
216
220
  # clear defaults
217
221
  if gender_str == "-Com,Neut" or gender_str == "-Com" or gender_str == "-ComNeut": gender_str=""
218
222
  if number_str == "-Sing": number_str=""
219
223
 
220
- return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)
224
+ return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)+ger
221
225
 
222
226
  def handler__PROPN(word, lang=None):
223
227
  # code as noun
@@ -872,6 +876,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
872
876
  for i,j in enumerate(ut):
873
877
  for k in j.text:
874
878
  ud_chars.append(ReferenceTarget(k, payload=i))
879
+ creaky = False
880
+ collected = ""
875
881
  # brrr
876
882
  aligned = align(chunks_chars, ud_chars, tqdm=False)
877
883
  for i in aligned:
@@ -879,8 +885,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
879
885
  if i.reference_payload not in chunks_backplate[i.payload]:
880
886
  chunks_backplate[i.payload].append(i.reference_payload)
881
887
  elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
882
- # just put it back
883
- chunks_backplate[i.payload].append(i.key)
888
+ if i.key == "*":
889
+ creaky = not creaky
890
+ chunks_backplate[i.payload].append("*"+collected+"*")
891
+ collected = ""
892
+ elif creaky:
893
+ collected += i.key
894
+ elif not creaky:
895
+ chunks_backplate[i.payload].append(i.key)
884
896
  # we want to replace the morphology of forms that are not actually
885
897
  # supposed to be analyzed
886
898
  elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
@@ -915,6 +927,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
915
927
  retokenized_ut = retokenized_ut.replace(" ↑", "↑")
916
928
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
917
929
  retokenized_ut = retokenized_ut.replace(" @", "@")
930
+ retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
931
+ retokenized_ut = re.sub(r"\*(.*?)\*", r"*\1* ", retokenized_ut)
918
932
  # pray to everyone that it works---this will simply crash and ignore
919
933
  # the utterance if it didn't work, so we are doing this as a sanity
920
934
  # check rather than needing the parsed result
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.15
2
+ October 16, 2024
3
+ gerund support
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a13
3
+ Version: 0.7.6a15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.13
2
- October 13, 2024
3
- patch bug regarding comma structure
File without changes
File without changes
File without changes
File without changes
File without changes