batchalign 0.7.6a12__tar.gz → 0.7.6a13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a12/batchalign.egg-info → batchalign-0.7.6a13}/PKG-INFO +1 -1
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/document.py +1 -2
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/utils.py +1 -1
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ud.py +12 -5
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/version +2 -2
- {batchalign-0.7.6a12 → batchalign-0.7.6a13/batchalign.egg-info}/PKG-INFO +1 -1
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/LICENSE +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/MANIFEST.in +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/README.md +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/setup.cfg +0 -0
- {batchalign-0.7.6a12 → batchalign-0.7.6a13}/setup.py +0 -0
@@ -324,8 +324,7 @@ class Utterance(BaseModel):
|
|
324
324
|
to_include.append(TokenType.RETRACE)
|
325
325
|
if include_fp:
|
326
326
|
to_include.append(TokenType.FP)
|
327
|
-
filtered = filter(lambda x:x.type in to_include,
|
328
|
-
self.content)
|
327
|
+
filtered = filter(lambda x:x.type in to_include, self.content)
|
329
328
|
# chain them together
|
330
329
|
if join_with_spaces:
|
331
330
|
return " ".join([i.text for i in filtered])
|
@@ -146,7 +146,7 @@ def annotation_clean(content, special=False):
|
|
146
146
|
cleaned_word = cleaned_word.replace("~","").replace("&~","")
|
147
147
|
cleaned_word = cleaned_word.replace(">","").replace("<","")
|
148
148
|
cleaned_word = cleaned_word.replace("〕","").replace("//","").replace(";","")
|
149
|
-
cleaned_word = re.sub(r"@[^
|
149
|
+
cleaned_word = re.sub(r"@[^abcefpoqsw]", '', cleaned_word)
|
150
150
|
cleaned_word = re.sub(r"&.", '', cleaned_word)
|
151
151
|
|
152
152
|
return cleaned_word
|
@@ -837,11 +837,17 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
837
837
|
L.debug(f"Encountered an utterance that's likely devoid of morphological information; skipping... utterance='{doc.content[indx]}'")
|
838
838
|
continue
|
839
839
|
|
840
|
+
|
840
841
|
if retokenize:
|
841
842
|
# rewrite the sentence with our desired tokenizations
|
842
843
|
ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
|
843
844
|
mor, gra,
|
844
845
|
None, None)
|
846
|
+
# fix xbxxx
|
847
|
+
for i in ut:
|
848
|
+
if i.text == "xbxxx" and len(i.morphology) > 0:
|
849
|
+
i.text = i.morphology[0].lemma
|
850
|
+
|
845
851
|
# split the text up into previous chunks
|
846
852
|
chunks = list(enumerate(doc.content[indx].text.split(" ")))
|
847
853
|
# filter out everything that could not possibly align
|
@@ -878,11 +884,12 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
878
884
|
# we want to replace the morphology of forms that are not actually
|
879
885
|
# supposed to be analyzed
|
880
886
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
|
881
|
-
ut[i.payload].
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
887
|
+
if ut[i.payload].text != ",":
|
888
|
+
ut[i.payload].morphology = [Morphology(
|
889
|
+
lemma = sents[0].tokens[i.payload].text if len(sents) > 0 and len(sents[0].tokens) > i.payload and sents[0].tokens[i.payload].text != "xbxxx" else ut[i.payload].text,
|
890
|
+
pos = "x",
|
891
|
+
feats = ""
|
892
|
+
)]
|
886
893
|
|
887
894
|
poses = [i.morphology[0].pos.upper() for i in ut
|
888
895
|
if i.morphology
|
@@ -1,3 +1,3 @@
|
|
1
|
-
0.7.6-alpha.
|
2
|
-
October
|
1
|
+
0.7.6-alpha.13
|
2
|
+
October 13, 2024
|
3
3
|
patch bug regarding comma structure
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a12 → batchalign-0.7.6a13}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|