batchalign 0.7.6a11__tar.gz → 0.7.6a13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a11/batchalign.egg-info → batchalign-0.7.6a13}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/document.py +1 -2
  3. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/utils.py +1 -1
  4. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ud.py +14 -5
  5. batchalign-0.7.6a13/batchalign/version +3 -0
  6. {batchalign-0.7.6a11 → batchalign-0.7.6a13/batchalign.egg-info}/PKG-INFO +1 -1
  7. batchalign-0.7.6a11/batchalign/version +0 -3
  8. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/LICENSE +0 -0
  9. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/MANIFEST.in +0 -0
  10. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/README.md +0 -0
  11. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  71. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  72. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/setup.cfg +0 -0
  109. {batchalign-0.7.6a11 → batchalign-0.7.6a13}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a11
3
+ Version: 0.7.6a13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -324,8 +324,7 @@ class Utterance(BaseModel):
324
324
  to_include.append(TokenType.RETRACE)
325
325
  if include_fp:
326
326
  to_include.append(TokenType.FP)
327
- filtered = filter(lambda x:x.type in to_include,
328
- self.content)
327
+ filtered = filter(lambda x:x.type in to_include, self.content)
329
328
  # chain them together
330
329
  if join_with_spaces:
331
330
  return " ".join([i.text for i in filtered])
@@ -146,7 +146,7 @@ def annotation_clean(content, special=False):
146
146
  cleaned_word = cleaned_word.replace("~","").replace("&~","")
147
147
  cleaned_word = cleaned_word.replace(">","").replace("<","")
148
148
  cleaned_word = cleaned_word.replace("〕","").replace("//","").replace(";","")
149
- cleaned_word = re.sub(r"@[^abcefpoqs]", '', cleaned_word)
149
+ cleaned_word = re.sub(r"@[^abcefpoqsw]", '', cleaned_word)
150
150
  cleaned_word = re.sub(r"&.", '', cleaned_word)
151
151
 
152
152
  return cleaned_word
@@ -280,6 +280,8 @@ def handler__PUNCT(word, lang=None):
280
280
  # instead of the lemma, which maybe entirely weird
281
281
  if word.text == "もん":
282
282
  return f"part|{word.text}"
283
+ if word.text == ",":
284
+ return f"cm|cm"
283
285
  else:
284
286
  return f"x|{word.text}"
285
287
 
@@ -835,11 +837,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
835
837
  L.debug(f"Encountered an utterance that's likely devoid of morphological information; skipping... utterance='{doc.content[indx]}'")
836
838
  continue
837
839
 
840
+
838
841
  if retokenize:
839
842
  # rewrite the sentence with our desired tokenizations
840
843
  ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
841
844
  mor, gra,
842
845
  None, None)
846
+ # fix xbxxx
847
+ for i in ut:
848
+ if i.text == "xbxxx" and len(i.morphology) > 0:
849
+ i.text = i.morphology[0].lemma
850
+
843
851
  # split the text up into previous chunks
844
852
  chunks = list(enumerate(doc.content[indx].text.split(" ")))
845
853
  # filter out everything that could not possibly align
@@ -876,11 +884,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
876
884
  # we want to replace the morphology of forms that are not actually
877
885
  # supposed to be analyzed
878
886
  elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
879
- ut[i.payload].morphology = [Morphology(
880
- lemma = sents[0].tokens[i.payload].text,
881
- pos = "x",
882
- feats = ""
883
- )]
887
+ if ut[i.payload].text != ",":
888
+ ut[i.payload].morphology = [Morphology(
889
+ lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
890
+ pos = "x",
891
+ feats = ""
892
+ )]
884
893
 
885
894
  poses = [i.morphology[0].pos.upper() for i in ut
886
895
  if i.morphology
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.13
2
+ October 13, 2024
3
+ patch bug regarding comma structure
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a11
3
+ Version: 0.7.6a13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.11
2
- October 10, 2024
3
- patch urgent UD bug
File without changes
File without changes
File without changes
File without changes
File without changes