batchalign 0.7.6a11__tar.gz → 0.7.6a13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a11/batchalign.egg-info → batchalign-0.7.6a13}/PKG-INFO +1 -1
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/document.py +1 -2
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/utils.py +1 -1
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ud.py +14 -5
- batchalign-0.7.6a13/batchalign/version +3 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.6a11/batchalign/version +0 -3
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/LICENSE +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/MANIFEST.in +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/README.md +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/setup.cfg +0 -0
- {batchalign-0.7.6a11 → batchalign-0.7.6a13}/setup.py +0 -0
@@ -324,8 +324,7 @@ class Utterance(BaseModel):
|
|
324
324
|
to_include.append(TokenType.RETRACE)
|
325
325
|
if include_fp:
|
326
326
|
to_include.append(TokenType.FP)
|
327
|
-
filtered = filter(lambda x:x.type in to_include,
|
328
|
-
self.content)
|
327
|
+
filtered = filter(lambda x:x.type in to_include, self.content)
|
329
328
|
# chain them together
|
330
329
|
if join_with_spaces:
|
331
330
|
return " ".join([i.text for i in filtered])
|
@@ -146,7 +146,7 @@ def annotation_clean(content, special=False):
|
|
146
146
|
cleaned_word = cleaned_word.replace("~","").replace("&~","")
|
147
147
|
cleaned_word = cleaned_word.replace(">","").replace("<","")
|
148
148
|
cleaned_word = cleaned_word.replace("〕","").replace("//","").replace(";","")
|
149
|
-
cleaned_word = re.sub(r"@[^
|
149
|
+
cleaned_word = re.sub(r"@[^abcefpoqsw]", '', cleaned_word)
|
150
150
|
cleaned_word = re.sub(r"&.", '', cleaned_word)
|
151
151
|
|
152
152
|
return cleaned_word
|
@@ -280,6 +280,8 @@ def handler__PUNCT(word, lang=None):
|
|
280
280
|
# instead of the lemma, which maybe entirely weird
|
281
281
|
if word.text == "もん":
|
282
282
|
return f"part|{word.text}"
|
283
|
+
if word.text == ",":
|
284
|
+
return f"cm|cm"
|
283
285
|
else:
|
284
286
|
return f"x|{word.text}"
|
285
287
|
|
@@ -835,11 +837,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
835
837
|
L.debug(f"Encountered an utterance that's likely devoid of morphological information; skipping... utterance='{doc.content[indx]}'")
|
836
838
|
continue
|
837
839
|
|
840
|
+
|
838
841
|
if retokenize:
|
839
842
|
# rewrite the sentence with our desired tokenizations
|
840
843
|
ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
|
841
844
|
mor, gra,
|
842
845
|
None, None)
|
846
|
+
# fix xbxxx
|
847
|
+
for i in ut:
|
848
|
+
if i.text == "xbxxx" and len(i.morphology) > 0:
|
849
|
+
i.text = i.morphology[0].lemma
|
850
|
+
|
843
851
|
# split the text up into previous chunks
|
844
852
|
chunks = list(enumerate(doc.content[indx].text.split(" ")))
|
845
853
|
# filter out everything that could not possibly align
|
@@ -876,11 +884,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
876
884
|
# we want to replace the morphology of forms that are not actually
|
877
885
|
# supposed to be analyzed
|
878
886
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
|
879
|
-
ut[i.payload].
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
887
|
+
if ut[i.payload].text != ",":
|
888
|
+
ut[i.payload].morphology = [Morphology(
|
889
|
+
lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
|
890
|
+
pos = "x",
|
891
|
+
feats = ""
|
892
|
+
)]
|
884
893
|
|
885
894
|
poses = [i.morphology[0].pos.upper() for i in ut
|
886
895
|
if i.morphology
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a11 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|