batchalign 0.7.6a14__tar.gz → 0.7.6a16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a14/batchalign.egg-info → batchalign-0.7.6a16}/PKG-INFO +1 -1
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/dispatch.py +1 -1
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_asr.py +10 -79
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ud.py +5 -1
- batchalign-0.7.6a16/batchalign/version +3 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.6a14/batchalign/version +0 -3
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/LICENSE +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/MANIFEST.in +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/README.md +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/document.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/setup.cfg +0 -0
- {batchalign-0.7.6a14 → batchalign-0.7.6a16}/setup.py +0 -0
@@ -65,7 +65,7 @@ def _dispatch(command, lang, num_speakers,
|
|
65
65
|
for basedir, _, fs in os.walk(in_dir):
|
66
66
|
for f in fs:
|
67
67
|
path = Path(os.path.join(basedir, f))
|
68
|
-
ext = path.suffix.strip(".").strip()
|
68
|
+
ext = path.suffix.strip(".").strip().lower()
|
69
69
|
|
70
70
|
# calculate input path, convert if needed
|
71
71
|
inp_path = str(path)
|
@@ -67,85 +67,16 @@ class WhisperASRModel(object):
|
|
67
67
|
self.__config = GenerationConfig.from_pretrained(base)
|
68
68
|
self.__config.no_repeat_ngram_size = 4
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
)
|
81
|
-
self.__config = GenerationConfig.from_model_config(self.pipe.model.config)
|
82
|
-
self.__config.no_repeat_ngram_size = 4
|
83
|
-
self.__config.use_cache = False
|
84
|
-
|
85
|
-
forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language="yue", task="transcribe")
|
86
|
-
|
87
|
-
suppress_tokens = []
|
88
|
-
|
89
|
-
# Define other parameters
|
90
|
-
return_attention_mask = False
|
91
|
-
pad_token_id = 50257
|
92
|
-
bos_token_id = 50257
|
93
|
-
eos_token_id = 50257
|
94
|
-
decoder_start_token_id = 50258
|
95
|
-
begin_suppress_tokens = [
|
96
|
-
220,
|
97
|
-
50257
|
98
|
-
],
|
99
|
-
alignment_heads = [
|
100
|
-
[5, 3],
|
101
|
-
[5, 9],
|
102
|
-
[8, 0],
|
103
|
-
[8, 4],
|
104
|
-
[8, 8],
|
105
|
-
[9, 0],
|
106
|
-
[9, 7],
|
107
|
-
[9, 9],
|
108
|
-
[10, 5]
|
109
|
-
]
|
110
|
-
lang_to_id = {"<|yue|>": 50325}
|
111
|
-
task_to_id = {"transcribe": 50359}
|
112
|
-
is_multilingual = True
|
113
|
-
max_initial_timestamp_index = 50
|
114
|
-
no_timestamps_token_id = 50363
|
115
|
-
prev_sot_token_id = 50361
|
116
|
-
max_length = 448
|
117
|
-
|
118
|
-
# Assign values to generation config
|
119
|
-
self.__config.forced_decoder_ids = forced_decoder_ids
|
120
|
-
self.__config.suppress_tokens = suppress_tokens
|
121
|
-
self.__config.pad_token_id = pad_token_id
|
122
|
-
self.__config.bos_token_id = bos_token_id
|
123
|
-
self.__config.eos_token_id = eos_token_id
|
124
|
-
self.__config.decoder_start_token_id = decoder_start_token_id
|
125
|
-
self.__config.lang_to_id = lang_to_id
|
126
|
-
self.__config.task_to_id = task_to_id
|
127
|
-
self.__config.alignment_heads = alignment_heads
|
128
|
-
self.__config.alignment_heads = alignment_heads
|
129
|
-
self.__config.begin_suppress_tokens = begin_suppress_tokens
|
130
|
-
self.__config.is_multilingual = is_multilingual
|
131
|
-
self.__config.max_initial_timestamp_index = max_initial_timestamp_index
|
132
|
-
self.__config.no_timestamps_token_id = no_timestamps_token_id
|
133
|
-
self.__config.prev_sot_token_id = prev_sot_token_id
|
134
|
-
self.__config.max_length =max_length
|
135
|
-
|
136
|
-
self.pipe.model.generation_config = self.__config
|
137
|
-
|
138
|
-
else:
|
139
|
-
self.pipe = pipeline(
|
140
|
-
"automatic-speech-recognition",
|
141
|
-
model=model,
|
142
|
-
tokenizer=WhisperTokenizer.from_pretrained(base),
|
143
|
-
chunk_length_s=25,
|
144
|
-
stride_length_s=3,
|
145
|
-
device=DEVICE,
|
146
|
-
torch_dtype=torch.float32,
|
147
|
-
return_timestamps="word",
|
148
|
-
)
|
70
|
+
self.pipe = pipeline(
|
71
|
+
"automatic-speech-recognition",
|
72
|
+
model=model,
|
73
|
+
tokenizer=WhisperTokenizer.from_pretrained(base),
|
74
|
+
chunk_length_s=25,
|
75
|
+
stride_length_s=3,
|
76
|
+
device=DEVICE,
|
77
|
+
torch_dtype=torch.float32,
|
78
|
+
return_timestamps="word",
|
79
|
+
)
|
149
80
|
L.debug("Done, initalizing processor and config...")
|
150
81
|
processor = WhisperProcessor.from_pretrained(base)
|
151
82
|
L.debug("Whisper initialization done.")
|
@@ -213,11 +213,15 @@ def handler__NOUN(word, lang=None):
|
|
213
213
|
if word.deprel == "obj" and case.strip() == "":
|
214
214
|
case = "Acc"
|
215
215
|
|
216
|
+
ger = ""
|
217
|
+
if word.text.endswith("ing") and lang == "en":
|
218
|
+
ger += "-Ger"
|
219
|
+
|
216
220
|
# clear defaults
|
217
221
|
if gender_str == "-Com,Neut" or gender_str == "-Com" or gender_str == "-ComNeut": gender_str=""
|
218
222
|
if number_str == "-Sing": number_str=""
|
219
223
|
|
220
|
-
return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)
|
224
|
+
return handler(word, lang)+gender_str+number_str+stringify_feats(case, type)+ger
|
221
225
|
|
222
226
|
def handler__PROPN(word, lang=None):
|
223
227
|
# code as noun
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a14 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|