batchalign 0.7.1b13__tar.gz → 0.7.1b14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.1b13/batchalign.egg-info → batchalign-0.7.1b14}/PKG-INFO +1 -1
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/document.py +0 -1
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/lexer.py +0 -2
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/parser.py +1 -2
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/ud.py +13 -5
- batchalign-0.7.1b14/batchalign/version +3 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.1b13/batchalign/version +0 -3
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/LICENSE +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/MANIFEST.in +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/README.md +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/__main__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/constants.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/errors.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/setup.cfg +0 -0
- {batchalign-0.7.1b13 → batchalign-0.7.1b14}/setup.py +0 -0
@@ -93,8 +93,6 @@ class UtteranceLexer:
|
|
93
93
|
self.__clauses.append((form, TokenType.FEAT))
|
94
94
|
elif annotation_clean(form).strip() in CHAT_IGNORE:
|
95
95
|
self.__clauses.append((annotation_clean(form).strip(), TokenType.ANNOT))
|
96
|
-
elif "@" in form:
|
97
|
-
self.__clauses.append((annotation_clean(form).strip(), TokenType.VOCAL))
|
98
96
|
else:
|
99
97
|
self.__clauses.append((annotation_clean(form).strip(), TokenType.REGULAR))
|
100
98
|
|
@@ -90,10 +90,9 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
|
|
90
90
|
# seperate out main words by whether it should have phonation/morphology and add ending punct
|
91
91
|
words = list(enumerate(tokens))
|
92
92
|
lexed_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
|
93
|
-
|
93
|
+
TokenType.PUNCT]]
|
94
94
|
phonated_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
|
95
95
|
TokenType.RETRACE,
|
96
|
-
TokenType.VOCAL,
|
97
96
|
TokenType.PUNCT,
|
98
97
|
TokenType.FP]]
|
99
98
|
# create base forms
|
@@ -130,7 +130,8 @@ def handler(word, lang=None):
|
|
130
130
|
|
131
131
|
# fix dash
|
132
132
|
target = target.replace("-", "–")
|
133
|
-
|
133
|
+
|
134
|
+
if "“" in target:
|
134
135
|
target = word.text
|
135
136
|
|
136
137
|
return f"{'' if not unknown else '0'}{word.upos.lower()}|{target}"
|
@@ -741,7 +742,6 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
741
742
|
line_cut = i.strip(join_with_spaces=True)
|
742
743
|
ending = '.'
|
743
744
|
|
744
|
-
|
745
745
|
# clean the sentence
|
746
746
|
line_cut = clean_sentence(line_cut)
|
747
747
|
|
@@ -818,12 +818,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
818
818
|
chunks_align = [(i,j) for i,j in chunks
|
819
819
|
if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
|
820
820
|
and ("@" not in j)
|
821
|
-
and j.strip() not in
|
821
|
+
and j.strip() not in MOR_PUNCT + CHAT_IGNORE + ["++"]]
|
822
822
|
# hollow out anything we are trying to align, and leave everything else
|
823
823
|
chunks_backplate = [[j]
|
824
824
|
if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
|
825
825
|
and ("@" not in j)
|
826
|
-
and j.strip() not in
|
826
|
+
and j.strip() not in MOR_PUNCT + CHAT_IGNORE + ["++"])
|
827
827
|
else
|
828
828
|
[]
|
829
829
|
for i,j in chunks]
|
@@ -845,6 +845,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
845
845
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
|
846
846
|
# just put it back
|
847
847
|
chunks_backplate[i.payload].append(i.key)
|
848
|
+
# we want to replace the morphology of forms that are not actually
|
849
|
+
# supposed to be analyzed
|
850
|
+
elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
|
851
|
+
ut[i.payload].morphology = [Morphology(
|
852
|
+
lemma = sents[0].tokens[i.payload].text,
|
853
|
+
pos = "x",
|
854
|
+
feats = ""
|
855
|
+
)]
|
848
856
|
# resolve all the numbers and flatten
|
849
857
|
chunks_backplate = [j if isinstance(j, str) else ut[j].text
|
850
858
|
for i in chunks_backplate
|
@@ -858,9 +866,9 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
858
866
|
retokenized_ut = retokenized_ut.replace("< ", "<")
|
859
867
|
retokenized_ut = retokenized_ut.replace(" :", ":")
|
860
868
|
retokenized_ut = retokenized_ut.replace(": <", ": <")
|
869
|
+
retokenized_ut = retokenized_ut.replace(" ↑", "↑")
|
861
870
|
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
862
871
|
retokenized_ut = retokenized_ut.replace(" @", "@")
|
863
|
-
retokenized_ut = re.sub(r" ↑", "↑", retokenized_ut)
|
864
872
|
# pray to everyone that it works---this will simply crash and ignore
|
865
873
|
# the utterance if it didn't work, so we are doing this as a sanity
|
866
874
|
# check rather than needing the parsed result
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b13 → batchalign-0.7.1b14}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|