batchalign 0.7.1b11__tar.gz → 0.7.1b13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.1b11/batchalign.egg-info → batchalign-0.7.1b13}/PKG-INFO +1 -1
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/document.py +1 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/lexer.py +2 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/parser.py +2 -1
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/dispatch.py +1 -1
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/ud.py +6 -2
- batchalign-0.7.1b13/batchalign/version +3 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.1b11/batchalign/version +0 -3
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/LICENSE +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/MANIFEST.in +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/README.md +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/__main__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/constants.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/errors.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/setup.cfg +0 -0
- {batchalign-0.7.1b11 → batchalign-0.7.1b13}/setup.py +0 -0
@@ -93,6 +93,8 @@ class UtteranceLexer:
|
|
93
93
|
self.__clauses.append((form, TokenType.FEAT))
|
94
94
|
elif annotation_clean(form).strip() in CHAT_IGNORE:
|
95
95
|
self.__clauses.append((annotation_clean(form).strip(), TokenType.ANNOT))
|
96
|
+
elif "@" in form:
|
97
|
+
self.__clauses.append((annotation_clean(form).strip(), TokenType.VOCAL))
|
96
98
|
else:
|
97
99
|
self.__clauses.append((annotation_clean(form).strip(), TokenType.REGULAR))
|
98
100
|
|
@@ -90,9 +90,10 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
|
|
90
90
|
# seperate out main words by whether it should have phonation/morphology and add ending punct
|
91
91
|
words = list(enumerate(tokens))
|
92
92
|
lexed_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
|
93
|
-
|
93
|
+
TokenType.PUNCT]]
|
94
94
|
phonated_words = [tok for tok in words if tok[1][1] in [TokenType.REGULAR,
|
95
95
|
TokenType.RETRACE,
|
96
|
+
TokenType.VOCAL,
|
96
97
|
TokenType.PUNCT,
|
97
98
|
TokenType.FP]]
|
98
99
|
# create base forms
|
@@ -73,7 +73,7 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
73
73
|
packages.append("disfluency")
|
74
74
|
if "retracing" not in packages:
|
75
75
|
packages.append("retracing")
|
76
|
-
if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb"]:
|
76
|
+
if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb", "fra"]:
|
77
77
|
packages.append("utterance")
|
78
78
|
if "fa" in packages:
|
79
79
|
if "utr" not in packages:
|
@@ -130,6 +130,8 @@ def handler(word, lang=None):
|
|
130
130
|
|
131
131
|
# fix dash
|
132
132
|
target = target.replace("-", "–")
|
133
|
+
if target == "“":
|
134
|
+
target = word.text
|
133
135
|
|
134
136
|
return f"{'' if not unknown else '0'}{word.upos.lower()}|{target}"
|
135
137
|
|
@@ -739,6 +741,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
739
741
|
line_cut = i.strip(join_with_spaces=True)
|
740
742
|
ending = '.'
|
741
743
|
|
744
|
+
|
742
745
|
# clean the sentence
|
743
746
|
line_cut = clean_sentence(line_cut)
|
744
747
|
|
@@ -813,12 +816,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
813
816
|
chunks = list(enumerate(doc.content[indx].text.split(" ")))
|
814
817
|
# filter out everything that could not possibly align
|
815
818
|
chunks_align = [(i,j) for i,j in chunks
|
816
|
-
if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
|
819
|
+
if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
|
817
820
|
and ("@" not in j)
|
818
821
|
and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"]]
|
819
822
|
# hollow out anything we are trying to align, and leave everything else
|
820
823
|
chunks_backplate = [[j]
|
821
|
-
if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
|
824
|
+
if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15", "(", ")"]) and (j[-1] not in ["]"])
|
822
825
|
and ("@" not in j)
|
823
826
|
and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"])
|
824
827
|
else
|
@@ -857,6 +860,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
857
860
|
retokenized_ut = retokenized_ut.replace(": <", ": <")
|
858
861
|
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
859
862
|
retokenized_ut = retokenized_ut.replace(" @", "@")
|
863
|
+
retokenized_ut = re.sub(r" ↑", "↑", retokenized_ut)
|
860
864
|
# pray to everyone that it works---this will simply crash and ignore
|
861
865
|
# the utterance if it didn't work, so we are doing this as a sanity
|
862
866
|
# check rather than needing the parsed result
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b11 → batchalign-0.7.1b13}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|