batchalign 0.7.5a5__tar.gz → 0.7.5a7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.5a5/batchalign.egg-info → batchalign-0.7.5a7}/PKG-INFO +1 -1
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/infer.py +22 -12
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/analysis/eval.py +14 -1
- batchalign-0.7.5a7/batchalign/version +3 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.5a5/batchalign/version +0 -3
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/LICENSE +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/MANIFEST.in +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/README.md +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/__main__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/constants.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/document.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/errors.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/setup.cfg +0 -0
- {batchalign-0.7.5a5 → batchalign-0.7.5a7}/setup.py +0 -0
@@ -35,6 +35,7 @@ class BertUtteranceModel(object):
|
|
35
35
|
self.model.eval()
|
36
36
|
|
37
37
|
def __call__(self, passage):
|
38
|
+
print(passage)
|
38
39
|
# input passage words removed of all preexisting punctuation
|
39
40
|
passage = passage.lower()
|
40
41
|
passage = passage.replace('.','')
|
@@ -67,7 +68,8 @@ class BertUtteranceModel(object):
|
|
67
68
|
prev_word_idx = None
|
68
69
|
|
69
70
|
# for each word, perform the action
|
70
|
-
|
71
|
+
wids = tokd.word_ids(0)
|
72
|
+
for indx, elem in enumerate(wids):
|
71
73
|
# if its none, append nothing or if we have
|
72
74
|
# seen it before, do nothing
|
73
75
|
if elem is None or elem == prev_word_idx:
|
@@ -81,23 +83,31 @@ class BertUtteranceModel(object):
|
|
81
83
|
# set the working variable
|
82
84
|
w = input_tokenized[elem]
|
83
85
|
|
84
|
-
#
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
86
|
+
# fix one word hanging issue
|
87
|
+
will_action = False
|
88
|
+
if indx < len(wids)-2 and classified_targets[0][indx+1] > 0:
|
89
|
+
will_action = True
|
90
|
+
|
91
|
+
if not will_action:
|
92
|
+
# perform the edit actions
|
93
|
+
if action == 1:
|
94
|
+
w = w[0].upper() + w[1:]
|
95
|
+
elif action == 2:
|
96
|
+
w = w+'.'
|
97
|
+
elif action == 3:
|
98
|
+
w = w+'?'
|
99
|
+
elif action == 4:
|
100
|
+
w = w+'!'
|
101
|
+
elif action == 5:
|
102
|
+
w = w+','
|
103
|
+
|
95
104
|
|
96
105
|
# append
|
97
106
|
res_toks.append(w)
|
98
107
|
|
99
108
|
# compose final passage
|
100
109
|
final_passage = self.tokenizer.convert_tokens_to_string(res_toks)
|
110
|
+
print(final_passage)
|
101
111
|
try:
|
102
112
|
split_passage = sent_tokenize(final_passage)
|
103
113
|
except LookupError:
|
@@ -8,7 +8,7 @@ from batchalign.pipelines.base import *
|
|
8
8
|
from batchalign.pipelines.asr.utils import *
|
9
9
|
from batchalign.utils.config import config_read
|
10
10
|
|
11
|
-
from batchalign.utils.dp import align, ExtraType, Extra
|
11
|
+
from batchalign.utils.dp import align, ExtraType, Extra, Match
|
12
12
|
|
13
13
|
import logging
|
14
14
|
L = logging.getLogger("batchalign")
|
@@ -22,6 +22,9 @@ class EvaluationEngine(BatchalignEngine):
|
|
22
22
|
forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
|
23
23
|
gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
|
24
24
|
|
25
|
+
forms = [i for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
|
26
|
+
gold_forms = [i for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
|
27
|
+
|
25
28
|
# dp!
|
26
29
|
alignment = align(forms, gold_forms, False)
|
27
30
|
|
@@ -35,8 +38,16 @@ class EvaluationEngine(BatchalignEngine):
|
|
35
38
|
# ie: if we have <extra.payload> <extra.reference> +> substitution
|
36
39
|
# but if we have <extra.reference> <extra.reference> this is 2 insertions
|
37
40
|
|
41
|
+
cleaned_alignment = []
|
42
|
+
|
38
43
|
for i in alignment:
|
44
|
+
|
39
45
|
if isinstance(i, Extra):
|
46
|
+
if len(cleaned_alignment) > 0 and i.extra_type == ExtraType.REFERENCE and "name" in i.key and i.key[:4] != "name":
|
47
|
+
cleaned_alignment.pop(-1)
|
48
|
+
cleaned_alignment.append(Match(i.key, None, None))
|
49
|
+
continue
|
50
|
+
|
40
51
|
if prev_error != None and prev_error != i.extra_type:
|
41
52
|
# this is a substitution: we have different "extra"s in
|
42
53
|
# reference vs. playload
|
@@ -61,6 +72,8 @@ class EvaluationEngine(BatchalignEngine):
|
|
61
72
|
else:
|
62
73
|
prev_error = None
|
63
74
|
|
75
|
+
cleaned_alignment.append(i)
|
76
|
+
|
64
77
|
diff = []
|
65
78
|
for i in alignment:
|
66
79
|
if isinstance(i, Extra):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a5 → batchalign-0.7.5a7}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|