batchalign 0.7.1b12__tar.gz → 0.7.1b13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {batchalign-0.7.1b12/batchalign.egg-info → batchalign-0.7.1b13}/PKG-INFO +1 -1
  2. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/document.py +1 -0
  3. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/lexer.py +2 -0
  4. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/parser.py +2 -1
  5. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/ud.py +6 -2
  6. batchalign-0.7.1b13/batchalign/version +3 -0
  7. {batchalign-0.7.1b12 → batchalign-0.7.1b13/batchalign.egg-info}/PKG-INFO +1 -1
  8. batchalign-0.7.1b12/batchalign/version +0 -3
  9. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/LICENSE +0 -0
  10. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/MANIFEST.in +0 -0
  11. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/README.md +0 -0
  12. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/cli/cli.py +0 -0
  16. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/cli/dispatch.py +0 -0
  17. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/constants.py +0 -0
  18. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  70. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/pipeline.py +0 -0
  71. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/speaker/__init__.py +0 -0
  72. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  73. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utr/__init__.py +0 -0
  74. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utr/rev_utr.py +0 -0
  75. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utr/utils.py +0 -0
  76. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  77. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utterance/__init__.py +0 -0
  78. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  79. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/__init__.py +0 -0
  80. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/conftest.py +0 -0
  81. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  82. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  83. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  84. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  85. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  86. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  87. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  88. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  89. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  90. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  91. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  92. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  93. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/fixures.py +0 -0
  94. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  95. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  96. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/tests/test_document.py +0 -0
  97. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/utils/__init__.py +0 -0
  98. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/utils/config.py +0 -0
  99. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/utils/dp.py +0 -0
  100. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign/utils/utils.py +0 -0
  101. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign.egg-info/SOURCES.txt +0 -0
  102. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign.egg-info/dependency_links.txt +0 -0
  103. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign.egg-info/entry_points.txt +0 -0
  104. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign.egg-info/requires.txt +0 -0
  105. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/batchalign.egg-info/top_level.txt +0 -0
  106. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/setup.cfg +0 -0
  107. {batchalign-0.7.1b12 → batchalign-0.7.1b13}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b12
3
+ Version: 0.7.1b13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -83,6 +83,7 @@ class TokenType(IntEnum):
83
83
  ANNOT = 4 # &~ject &~head
84
84
  PUNCT = 5 # ‡„,
85
85
  CORRECTION = 6 # test [= test]
86
+ VOCAL = 7 # test@o
86
87
 
87
88
  class CustomLineType(IntEnum):
88
89
  DEPENDENT = 0 # %com
@@ -93,6 +93,8 @@ class UtteranceLexer:
93
93
  self.__clauses.append((form, TokenType.FEAT))
94
94
  elif annotation_clean(form).strip() in CHAT_IGNORE:
95
95
  self.__clauses.append((annotation_clean(form).strip(), TokenType.ANNOT))
96
+ elif "@" in form:
97
+ self.__clauses.append((annotation_clean(form).strip(), TokenType.VOCAL))
96
98
  else:
97
99
  self.__clauses.append((annotation_clean(form).strip(), TokenType.REGULAR))
98
100
 
@@ -90,9 +90,10 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
90
90
  # seperate out main words by whether it should have phonation/morphology and add ending punct
91
91
  words = list(enumerate(tokens))
92
92
  lexed_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
93
- TokenType.PUNCT]]
93
+ TokenType.PUNCT]]
94
94
  phonated_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
95
95
  TokenType.RETRACE,
96
+ TokenType.VOCAL,
96
97
  TokenType.PUNCT,
97
98
  TokenType.FP]]
98
99
  # create base forms
@@ -130,6 +130,8 @@ def handler(word, lang=None):
130
130
 
131
131
  # fix dash
132
132
  target = target.replace("-", "–")
133
+ if target == "“":
134
+ target = word.text
133
135
 
134
136
  return f"{'' if not unknown else '0'}{word.upos.lower()}|{target}"
135
137
 
@@ -739,6 +741,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
739
741
  line_cut = i.strip(join_with_spaces=True)
740
742
  ending = '.'
741
743
 
744
+
742
745
  # clean the sentence
743
746
  line_cut = clean_sentence(line_cut)
744
747
 
@@ -813,12 +816,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
813
816
  chunks = list(enumerate(doc.content[indx].text.split(" ")))
814
817
  # filter out everything that could not possibly align
815
818
  chunks_align = [(i,j) for i,j in chunks
816
- if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
819
+ if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
817
820
  and ("@" not in j)
818
821
  and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"]]
819
822
  # hollow out anything we are trying to align, and leave everything else
820
823
  chunks_backplate = [[j]
821
- if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
824
+ if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
822
825
  and ("@" not in j)
823
826
  and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"])
824
827
  else
@@ -857,6 +860,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
857
860
  retokenized_ut = retokenized_ut.replace(": <", ": <")
858
861
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
859
862
  retokenized_ut = retokenized_ut.replace(" @", "@")
863
+ retokenized_ut = re.sub(r" ↑", "↑", retokenized_ut)
860
864
  # pray to everyone that it works---this will simply crash and ignore
861
865
  # the utterance if it didn't work, so we are doing this as a sanity
862
866
  # check rather than needing the parsed result
@@ -0,0 +1,3 @@
1
+ 0.7.1-beta.13
2
+ June 2nd, 2024
3
+ more ud fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.1b12
3
+ Version: 0.7.1b13
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.1-beta.12
2
- May 27th, 2024
3
- patch french bugs
File without changes
File without changes
File without changes
File without changes
File without changes