batchalign 0.7.6a12__tar.gz → 0.7.6a13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {batchalign-0.7.6a12/batchalign.egg-info → batchalign-0.7.6a13}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/document.py +1 -2
  3. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/utils.py +1 -1
  4. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ud.py +12 -5
  5. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/version +2 -2
  6. {batchalign-0.7.6a12 → batchalign-0.7.6a13/batchalign.egg-info}/PKG-INFO +1 -1
  7. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/LICENSE +0 -0
  8. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/MANIFEST.in +0 -0
  9. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/README.md +0 -0
  10. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/__init__.py +0 -0
  25. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/file.py +0 -0
  26. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/generator.py +0 -0
  27. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/parser.py +0 -0
  28. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/__init__.py +0 -0
  29. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/resolve.py +0 -0
  30. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/__init__.py +0 -0
  31. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/config.yaml +0 -0
  32. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/infer.py +0 -0
  33. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/utils.py +0 -0
  34. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/__init__.py +0 -0
  35. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/run.py +0 -0
  36. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/utils.py +0 -0
  37. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utils.py +0 -0
  38. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/__init__.py +0 -0
  39. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/dataset.py +0 -0
  40. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/execute.py +0 -0
  41. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/infer.py +0 -0
  42. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/prep.py +0 -0
  43. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/train.py +0 -0
  44. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/__init__.py +0 -0
  45. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_asr.py +0 -0
  46. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_fa.py +0 -0
  47. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/__init__.py +0 -0
  48. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/__init__.py +0 -0
  49. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/eval.py +0 -0
  50. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/__init__.py +0 -0
  51. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/rev.py +0 -0
  52. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/utils.py +0 -0
  53. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisper.py +0 -0
  54. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisperx.py +0 -0
  55. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/base.py +0 -0
  56. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/__init__.py +0 -0
  57. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  58. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  59. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  60. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/retrace.py +0 -0
  61. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  62. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  63. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/test.test +0 -0
  64. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/dispatch.py +0 -0
  65. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/fa/__init__.py +0 -0
  66. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  67. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  68. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  69. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  70. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  71. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/pipeline.py +0 -0
  72. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/__init__.py +0 -0
  73. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  74. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/__init__.py +0 -0
  75. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/rev_utr.py +0 -0
  76. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/utils.py +0 -0
  77. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  78. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/__init__.py +0 -0
  79. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  80. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/__init__.py +0 -0
  81. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/conftest.py +0 -0
  82. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  83. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  84. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  85. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  86. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  87. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  88. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  89. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  90. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  91. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  92. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  93. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  94. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fixures.py +0 -0
  95. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  96. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  97. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/test_document.py +0 -0
  98. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/__init__.py +0 -0
  99. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/config.py +0 -0
  100. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/dp.py +0 -0
  101. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/utils.py +0 -0
  102. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/SOURCES.txt +0 -0
  103. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/dependency_links.txt +0 -0
  104. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/entry_points.txt +0 -0
  105. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/requires.txt +0 -0
  106. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/top_level.txt +0 -0
  107. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/setup.cfg +0 -0
  108. {batchalign-0.7.6a12 → batchalign-0.7.6a13}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a12
3
+ Version: 0.7.6a13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -324,8 +324,7 @@ class Utterance(BaseModel):
324
324
  to_include.append(TokenType.RETRACE)
325
325
  if include_fp:
326
326
  to_include.append(TokenType.FP)
327
- filtered = filter(lambda x:x.type in to_include,
328
- self.content)
327
+ filtered = filter(lambda x:x.type in to_include, self.content)
329
328
  # chain them together
330
329
  if join_with_spaces:
331
330
  return " ".join([i.text for i in filtered])
@@ -146,7 +146,7 @@ def annotation_clean(content, special=False):
146
146
  cleaned_word = cleaned_word.replace("~","").replace("&~","")
147
147
  cleaned_word = cleaned_word.replace(">","").replace("<","")
148
148
  cleaned_word = cleaned_word.replace("〕","").replace("//","").replace(";","")
149
- cleaned_word = re.sub(r"@[^abcefpoqs]", '', cleaned_word)
149
+ cleaned_word = re.sub(r"@[^abcefpoqsw]", '', cleaned_word)
150
150
  cleaned_word = re.sub(r"&.", '', cleaned_word)
151
151
 
152
152
  return cleaned_word
@@ -837,11 +837,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
837
837
  L.debug(f"Encountered an utterance that's likely devoid of morphological information; skipping... utterance='{doc.content[indx]}'")
838
838
  continue
839
839
 
840
+
840
841
  if retokenize:
841
842
  # rewrite the sentence with our desired tokenizations
842
843
  ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
843
844
  mor, gra,
844
845
  None, None)
846
+ # fix xbxxx
847
+ for i in ut:
848
+ if i.text == "xbxxx" and len(i.morphology) > 0:
849
+ i.text = i.morphology[0].lemma
850
+
845
851
  # split the text up into previous chunks
846
852
  chunks = list(enumerate(doc.content[indx].text.split(" ")))
847
853
  # filter out everything that could not possibly align
@@ -878,11 +884,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
878
884
  # we want to replace the morphology of forms that are not actually
879
885
  # supposed to be analyzed
880
886
  elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
881
- ut[i.payload].morphology = [Morphology(
882
- lemma = sents[0].tokens[i.payload].text,
883
- pos = "x",
884
- feats = ""
885
- )]
887
+ if ut[i.payload].text != ",":
888
+ ut[i.payload].morphology = [Morphology(
889
+ lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
890
+ pos = "x",
891
+ feats = ""
892
+ )]
886
893
 
887
894
  poses = [i.morphology[0].pos.upper() for i in ut
888
895
  if i.morphology
@@ -1,3 +1,3 @@
1
- 0.7.6-alpha.12
2
- October 10, 2024
1
+ 0.7.6-alpha.13
2
+ October 13, 2024
3
3
  patch bug regarding comma structure
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a12
3
+ Version: 0.7.6a13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
File without changes
File without changes
File without changes
File without changes
File without changes