batchalign 0.7.17.post1__tar.gz → 0.7.17.post3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.17.post1/batchalign.egg-info → batchalign-0.7.17.post3}/PKG-INFO +1 -1
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/whisper/infer_asr.py +22 -10
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/utils.py +5 -11
- batchalign-0.7.17.post3/batchalign/version +3 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.17.post1/batchalign/version +0 -3
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/LICENSE +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/MANIFEST.in +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/README.md +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/__main__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/constants.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/document.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/errors.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/setup.cfg +0 -0
- {batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/setup.py +0 -0
@@ -84,16 +84,28 @@ class WhisperASRModel(object):
|
|
84
84
|
[10, 5]
|
85
85
|
]
|
86
86
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
87
|
+
try:
|
88
|
+
self.pipe = pipeline(
|
89
|
+
"automatic-speech-recognition",
|
90
|
+
model=model,
|
91
|
+
tokenizer=WhisperTokenizer.from_pretrained(base),
|
92
|
+
chunk_length_s=25,
|
93
|
+
stride_length_s=3,
|
94
|
+
device=DEVICE,
|
95
|
+
torch_dtype=torch.bfloat16,
|
96
|
+
return_timestamps="word",
|
97
|
+
)
|
98
|
+
except TypeError:
|
99
|
+
self.pipe = pipeline(
|
100
|
+
"automatic-speech-recognition",
|
101
|
+
model=model,
|
102
|
+
tokenizer=WhisperTokenizer.from_pretrained(base),
|
103
|
+
chunk_length_s=25,
|
104
|
+
stride_length_s=3,
|
105
|
+
device=DEVICE,
|
106
|
+
torch_dtype=torch.float16,
|
107
|
+
return_timestamps="word",
|
108
|
+
)
|
97
109
|
L.debug("Done, initalizing processor and config...")
|
98
110
|
processor = WhisperProcessor.from_pretrained(base)
|
99
111
|
L.debug("Whisper initialization done.")
|
@@ -98,18 +98,12 @@ def retokenize_with_engine(intermediate_output, engine):
|
|
98
98
|
tmp = []
|
99
99
|
|
100
100
|
for s in new_ut:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
else:
|
106
|
-
# Append with default timestamp if utterance is exhausted
|
107
|
-
tmp.append((s, [None, None]))
|
108
|
-
|
109
|
-
if current_index >= len(utterance):
|
110
|
-
tmp.append((delim, [None, None])) # Append the punctuation
|
101
|
+
try:
|
102
|
+
tmp.append((s, utterance.pop(0)[1]))
|
103
|
+
except IndexError:
|
104
|
+
continue
|
111
105
|
|
112
|
-
final_outputs.append((speaker, tmp))
|
106
|
+
final_outputs.append((speaker, tmp+[[delim, [None, None]]]))
|
113
107
|
|
114
108
|
return final_outputs
|
115
109
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/formats/textgrid/generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/models/utterance/cantonese_infer.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/analysis/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/__init__.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/cleanup/support/test.test
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/speaker/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/translate/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/translate/seamless.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utterance/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.17.post1 → batchalign-0.7.17.post3}/batchalign.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|