batchalign 0.7.6a13__tar.gz → 0.7.6a15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a13/batchalign.egg-info → batchalign-0.7.6a15}/PKG-INFO +1 -1
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/ud.py +17 -3
- batchalign-0.7.6a15/batchalign/version +3 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.6a13/batchalign/version +0 -3
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/LICENSE +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/MANIFEST.in +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/README.md +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/document.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/setup.cfg +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a15}/setup.py +0 -0
@@ -213,11 +213,15 @@ def handler__NOUN(word, lang=None):
|
|
213
213
|
if word.deprel == "obj" and case.strip() == "":
|
214
214
|
case = "Acc"
|
215
215
|
|
216
|
+
ger = ""
|
217
|
+
if word.text.endswith("ing") and lang == "en":
|
218
|
+
ger += "-Ger"
|
219
|
+
|
216
220
|
# clear defaults
|
217
221
|
if gender_str == "-Com,Neut" or gender_str == "-Com" or gender_str == "-ComNeut": gender_str=""
|
218
222
|
if number_str == "-Sing": number_str=""
|
219
223
|
|
220
|
-
return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)
|
224
|
+
return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)+ger
|
221
225
|
|
222
226
|
def handler__PROPN(word, lang=None):
|
223
227
|
# code as noun
|
@@ -872,6 +876,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
872
876
|
for i,j in enumerate(ut):
|
873
877
|
for k in j.text:
|
874
878
|
ud_chars.append(ReferenceTarget(k, payload=i))
|
879
|
+
creaky = False
|
880
|
+
collected = ""
|
875
881
|
# brrr
|
876
882
|
aligned = align(chunks_chars, ud_chars, tqdm=False)
|
877
883
|
for i in aligned:
|
@@ -879,8 +885,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
879
885
|
if i.reference_payload not in chunks_backplate[i.payload]:
|
880
886
|
chunks_backplate[i.payload].append(i.reference_payload)
|
881
887
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
|
882
|
-
|
883
|
-
|
888
|
+
if i.key == "*":
|
889
|
+
creaky = not creaky
|
890
|
+
chunks_backplate[i.payload].append("*"+collected+"*")
|
891
|
+
collected = ""
|
892
|
+
elif creaky:
|
893
|
+
collected += i.key
|
894
|
+
elif not creaky:
|
895
|
+
chunks_backplate[i.payload].append(i.key)
|
884
896
|
# we want to replace the morphology of forms that are not actually
|
885
897
|
# supposed to be analyzed
|
886
898
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
|
@@ -915,6 +927,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
915
927
|
retokenized_ut = retokenized_ut.replace(" ↑", "↑")
|
916
928
|
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
917
929
|
retokenized_ut = retokenized_ut.replace(" @", "@")
|
930
|
+
retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
|
931
|
+
retokenized_ut = re.sub(r"\*(.*?)\*", r"*\1* ", retokenized_ut)
|
918
932
|
# pray to everyone that it works---this will simply crash and ignore
|
919
933
|
# the utterance if it didn't work, so we are doing this as a sanity
|
920
934
|
# check rather than needing the parsed result
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a15}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|