batchalign 0.7.6a15__tar.gz → 0.7.6a16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a15/batchalign.egg-info → batchalign-0.7.6a16}/PKG-INFO +1 -1
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/dispatch.py +1 -1
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_asr.py +10 -79
- batchalign-0.7.6a16/batchalign/version +3 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.6a15/batchalign/version +0 -3
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/LICENSE +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/MANIFEST.in +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/README.md +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/document.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/setup.cfg +0 -0
- {batchalign-0.7.6a15 → batchalign-0.7.6a16}/setup.py +0 -0
@@ -65,7 +65,7 @@ def _dispatch(command, lang, num_speakers,
|
|
65
65
|
for basedir, _, fs in os.walk(in_dir):
|
66
66
|
for f in fs:
|
67
67
|
path = Path(os.path.join(basedir, f))
|
68
|
-
ext = path.suffix.strip(".").strip()
|
68
|
+
ext = path.suffix.strip(".").strip().lower()
|
69
69
|
|
70
70
|
# calculate input path, convert if needed
|
71
71
|
inp_path = str(path)
|
@@ -67,85 +67,16 @@ class WhisperASRModel(object):
|
|
67
67
|
self.__config = GenerationConfig.from_pretrained(base)
|
68
68
|
self.__config.no_repeat_ngram_size = 4
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
)
|
81
|
-
self.__config = GenerationConfig.from_model_config(self.pipe.model.config)
|
82
|
-
self.__config.no_repeat_ngram_size = 4
|
83
|
-
self.__config.use_cache = False
|
84
|
-
|
85
|
-
forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language="yue", task="transcribe")
|
86
|
-
|
87
|
-
suppress_tokens = []
|
88
|
-
|
89
|
-
# Define other parameters
|
90
|
-
return_attention_mask = False
|
91
|
-
pad_token_id = 50257
|
92
|
-
bos_token_id = 50257
|
93
|
-
eos_token_id = 50257
|
94
|
-
decoder_start_token_id = 50258
|
95
|
-
begin_suppress_tokens = [
|
96
|
-
220,
|
97
|
-
50257
|
98
|
-
],
|
99
|
-
alignment_heads = [
|
100
|
-
[5, 3],
|
101
|
-
[5, 9],
|
102
|
-
[8, 0],
|
103
|
-
[8, 4],
|
104
|
-
[8, 8],
|
105
|
-
[9, 0],
|
106
|
-
[9, 7],
|
107
|
-
[9, 9],
|
108
|
-
[10, 5]
|
109
|
-
]
|
110
|
-
lang_to_id = {"<|yue|>": 50325}
|
111
|
-
task_to_id = {"transcribe": 50359}
|
112
|
-
is_multilingual = True
|
113
|
-
max_initial_timestamp_index = 50
|
114
|
-
no_timestamps_token_id = 50363
|
115
|
-
prev_sot_token_id = 50361
|
116
|
-
max_length = 448
|
117
|
-
|
118
|
-
# Assign values to generation config
|
119
|
-
self.__config.forced_decoder_ids = forced_decoder_ids
|
120
|
-
self.__config.suppress_tokens = suppress_tokens
|
121
|
-
self.__config.pad_token_id = pad_token_id
|
122
|
-
self.__config.bos_token_id = bos_token_id
|
123
|
-
self.__config.eos_token_id = eos_token_id
|
124
|
-
self.__config.decoder_start_token_id = decoder_start_token_id
|
125
|
-
self.__config.lang_to_id = lang_to_id
|
126
|
-
self.__config.task_to_id = task_to_id
|
127
|
-
self.__config.alignment_heads = alignment_heads
|
128
|
-
self.__config.alignment_heads = alignment_heads
|
129
|
-
self.__config.begin_suppress_tokens = begin_suppress_tokens
|
130
|
-
self.__config.is_multilingual = is_multilingual
|
131
|
-
self.__config.max_initial_timestamp_index = max_initial_timestamp_index
|
132
|
-
self.__config.no_timestamps_token_id = no_timestamps_token_id
|
133
|
-
self.__config.prev_sot_token_id = prev_sot_token_id
|
134
|
-
self.__config.max_length =max_length
|
135
|
-
|
136
|
-
self.pipe.model.generation_config = self.__config
|
137
|
-
|
138
|
-
else:
|
139
|
-
self.pipe = pipeline(
|
140
|
-
"automatic-speech-recognition",
|
141
|
-
model=model,
|
142
|
-
tokenizer=WhisperTokenizer.from_pretrained(base),
|
143
|
-
chunk_length_s=25,
|
144
|
-
stride_length_s=3,
|
145
|
-
device=DEVICE,
|
146
|
-
torch_dtype=torch.float32,
|
147
|
-
return_timestamps="word",
|
148
|
-
)
|
70
|
+
self.pipe = pipeline(
|
71
|
+
"automatic-speech-recognition",
|
72
|
+
model=model,
|
73
|
+
tokenizer=WhisperTokenizer.from_pretrained(base),
|
74
|
+
chunk_length_s=25,
|
75
|
+
stride_length_s=3,
|
76
|
+
device=DEVICE,
|
77
|
+
torch_dtype=torch.float32,
|
78
|
+
return_timestamps="word",
|
79
|
+
)
|
149
80
|
L.debug("Done, initalizing processor and config...")
|
150
81
|
processor = WhisperProcessor.from_pretrained(base)
|
151
82
|
L.debug("Whisper initialization done.")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a15 → batchalign-0.7.6a16}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|