batchalign 0.7.1b12__tar.gz → 0.7.1b14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {batchalign-0.7.1b12/batchalign.egg-info → batchalign-0.7.1b14}/PKG-INFO +1 -1
  2. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/ud.py +16 -4
  3. batchalign-0.7.1b14/batchalign/version +3 -0
  4. {batchalign-0.7.1b12 → batchalign-0.7.1b14/batchalign.egg-info}/PKG-INFO +1 -1
  5. batchalign-0.7.1b12/batchalign/version +0 -3
  6. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/LICENSE +0 -0
  7. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/MANIFEST.in +0 -0
  8. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/README.md +0 -0
  9. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/__init__.py +0 -0
  10. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/__main__.py +0 -0
  11. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/document.py +0 -0
  16. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  70. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/pipeline.py +0 -0
  71. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/speaker/__init__.py +0 -0
  72. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  73. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utr/__init__.py +0 -0
  74. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utr/rev_utr.py +0 -0
  75. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utr/utils.py +0 -0
  76. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  77. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utterance/__init__.py +0 -0
  78. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  79. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/__init__.py +0 -0
  80. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/conftest.py +0 -0
  81. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  82. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  83. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  84. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  85. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  86. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  87. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  88. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  89. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  90. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  91. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  92. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  93. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/fixures.py +0 -0
  94. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  95. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  96. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/tests/test_document.py +0 -0
  97. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/utils/__init__.py +0 -0
  98. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/utils/config.py +0 -0
  99. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/utils/dp.py +0 -0
  100. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign/utils/utils.py +0 -0
  101. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign.egg-info/SOURCES.txt +0 -0
  102. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign.egg-info/dependency_links.txt +0 -0
  103. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign.egg-info/entry_points.txt +0 -0
  104. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign.egg-info/requires.txt +0 -0
  105. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/batchalign.egg-info/top_level.txt +0 -0
  106. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/setup.cfg +0 -0
  107. {batchalign-0.7.1b12 → batchalign-0.7.1b14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b12
3
+ Version: 0.7.1b14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -131,6 +131,9 @@ def handler(word, lang=None):
131
131
  # fix dash
132
132
  target = target.replace("-", "–")
133
133
 
134
+ if "“" in target:
135
+ target = word.text
136
+
134
137
  return f"{'' if not unknown else '0'}{word.upos.lower()}|{target}"
135
138
 
136
139
  # POS specific handler
@@ -813,14 +816,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
813
816
  chunks = list(enumerate(doc.content[indx].text.split(" ")))
814
817
  # filter out everything that could not possibly align
815
818
  chunks_align = [(i,j) for i,j in chunks
816
- if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
819
+ if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
817
820
  and ("@" not in j)
818
- and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"]]
821
+ and j.strip() not in MOR_PUNCT + CHAT_IGNORE + ["++"]]
819
822
  # hollow out anything we are trying to align, and leave everything else
820
823
  chunks_backplate = [[j]
821
- if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
824
+ if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
822
825
  and ("@" not in j)
823
- and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"])
826
+ and j.strip() not in MOR_PUNCT + CHAT_IGNORE + ["++"])
824
827
  else
825
828
  []
826
829
  for i,j in chunks]
@@ -842,6 +845,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
842
845
  elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
843
846
  # just put it back
844
847
  chunks_backplate[i.payload].append(i.key)
848
+ # we want to replace the morphology of forms that are not actually
849
+ # supposed to be analyzed
850
+ elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
851
+ ut[i.payload].morphology = [Morphology(
852
+ lemma = sents[0].tokens[i.payload].text,
853
+ pos = "x",
854
+ feats = ""
855
+ )]
845
856
  # resolve all the numbers and flatten
846
857
  chunks_backplate = [j if isinstance(j, str) else ut[j].text
847
858
  for i in chunks_backplate
@@ -855,6 +866,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
855
866
  retokenized_ut = retokenized_ut.replace("< ", "<")
856
867
  retokenized_ut = retokenized_ut.replace(" :", ":")
857
868
  retokenized_ut = retokenized_ut.replace(": <", ": <")
869
+ retokenized_ut = retokenized_ut.replace(" ↑", "↑")
858
870
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
859
871
  retokenized_ut = retokenized_ut.replace(" @", "@")
860
872
  # pray to everyone that it works---this will simply crash and ignore
@@ -0,0 +1,3 @@
1
+ 0.7.1-beta.14
2
+ June 02nd, 2024
3
+ more UD bugs, superceeds v0.7.1-beta.13
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b12
3
+ Version: 0.7.1b14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.1-beta.12
2
- May 27th, 2024
3
- patch french bugs
File without changes
File without changes
File without changes
File without changes
File without changes