easytranscriber 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {easytranscriber-0.2.3/src/easytranscriber.egg-info → easytranscriber-0.2.4}/PKG-INFO +1 -1
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/pyproject.toml +1 -1
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/asr/cohere.py +5 -3
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/asr/ct2.py +4 -3
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/asr/hf.py +1 -1
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/pipelines.py +11 -3
- {easytranscriber-0.2.3 → easytranscriber-0.2.4/src/easytranscriber.egg-info}/PKG-INFO +1 -1
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/LICENSE +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/README.md +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/setup.cfg +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/audio.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/data/__init__.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/data/collators.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/data/datamodel.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/data/dataset.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/search/__init__.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/search/__main__.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/search/app.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/search/db.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/search/indexer.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/text/normalization.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber/utils.py +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/SOURCES.txt +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/dependency_links.txt +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/entry_points.txt +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/requires.txt +0 -0
- {easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/top_level.txt +0 -0
|
@@ -127,9 +127,11 @@ def transcribe(
|
|
|
127
127
|
transcription = processor.batch_decode(outputs, skip_special_tokens=True)
|
|
128
128
|
transcription_texts.extend(transcription)
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
130
|
+
global_chunk_idx = 0
|
|
131
|
+
for speech in metadata.speeches:
|
|
132
|
+
for chunk in speech.chunks:
|
|
133
|
+
chunk.text = transcription_texts[global_chunk_idx].strip()
|
|
134
|
+
global_chunk_idx += 1
|
|
133
135
|
|
|
134
136
|
output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")
|
|
135
137
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -11,10 +11,11 @@ from pathlib import Path
|
|
|
11
11
|
import ctranslate2
|
|
12
12
|
import torch
|
|
13
13
|
from easyaligner.utils import save_metadata_json
|
|
14
|
-
from easytranscriber.data.collators import transcribe_collate_fn
|
|
15
14
|
from tqdm import tqdm
|
|
16
15
|
from transformers import WhisperProcessor
|
|
17
16
|
|
|
17
|
+
from easytranscriber.data.collators import transcribe_collate_fn
|
|
18
|
+
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
20
21
|
|
|
@@ -90,7 +91,7 @@ def transcribe(
|
|
|
90
91
|
batch_size=batch_size,
|
|
91
92
|
num_workers=num_workers,
|
|
92
93
|
prefetch_factor=prefetch_factor,
|
|
93
|
-
pin_memory=
|
|
94
|
+
pin_memory=False,
|
|
94
95
|
collate_fn=transcribe_collate_fn,
|
|
95
96
|
)
|
|
96
97
|
|
|
@@ -198,7 +199,7 @@ def lang_detect_only(
|
|
|
198
199
|
batch_size=batch_size,
|
|
199
200
|
num_workers=num_workers,
|
|
200
201
|
prefetch_factor=prefetch_factor,
|
|
201
|
-
pin_memory=
|
|
202
|
+
pin_memory=False,
|
|
202
203
|
collate_fn=transcribe_collate_fn,
|
|
203
204
|
)
|
|
204
205
|
|
|
@@ -76,7 +76,7 @@ def transcribe(
|
|
|
76
76
|
|
|
77
77
|
for batch in feature_dataloader:
|
|
78
78
|
with torch.inference_mode():
|
|
79
|
-
batch = batch["features"].to(device
|
|
79
|
+
batch = batch["features"].to(device=device, dtype=model.dtype)
|
|
80
80
|
predicted_ids = model.generate(
|
|
81
81
|
batch,
|
|
82
82
|
return_dict_in_generate=True,
|
|
@@ -230,6 +230,9 @@ def pipeline(
|
|
|
230
230
|
# TODO: Support msgpack throughout the pipeline
|
|
231
231
|
json_paths = [Path(p).with_suffix(".json") for p in audio_paths]
|
|
232
232
|
|
|
233
|
+
# fp16 is a GPU optimization; most fp16 ops are unimplemented or slow on CPU.
|
|
234
|
+
torch_dtype = torch.float16 if "cuda" in str(device) else torch.float32
|
|
235
|
+
|
|
233
236
|
if streaming:
|
|
234
237
|
DatasetClass = StreamingAudioFileDataset
|
|
235
238
|
else:
|
|
@@ -290,7 +293,7 @@ def pipeline(
|
|
|
290
293
|
logger.info(f"Loading Cohere ASR model from {transcription_model}...")
|
|
291
294
|
model = (
|
|
292
295
|
CohereAsrForConditionalGeneration.from_pretrained(
|
|
293
|
-
transcription_model, torch_dtype=
|
|
296
|
+
transcription_model, torch_dtype=torch_dtype, cache_dir=cache_dir
|
|
294
297
|
)
|
|
295
298
|
.to(device)
|
|
296
299
|
.eval()
|
|
@@ -320,7 +323,7 @@ def pipeline(
|
|
|
320
323
|
else:
|
|
321
324
|
logger.info(f"Loading Hugging Face model from {transcription_model}...")
|
|
322
325
|
model = WhisperForConditionalGeneration.from_pretrained(
|
|
323
|
-
transcription_model, torch_dtype=
|
|
326
|
+
transcription_model, torch_dtype=torch_dtype, cache_dir=cache_dir
|
|
324
327
|
).to(device)
|
|
325
328
|
|
|
326
329
|
processor = WhisperProcessor.from_pretrained(transcription_model, cache_dir=cache_dir)
|
|
@@ -368,7 +371,11 @@ def pipeline(
|
|
|
368
371
|
json_paths=[str(Path(output_transcriptions_dir) / p) for p in json_paths]
|
|
369
372
|
)
|
|
370
373
|
|
|
371
|
-
model =
|
|
374
|
+
model = (
|
|
375
|
+
AutoModelForCTC.from_pretrained(emissions_model, cache_dir=cache_dir)
|
|
376
|
+
.to(device)
|
|
377
|
+
.to(torch_dtype)
|
|
378
|
+
)
|
|
372
379
|
processor = Wav2Vec2Processor.from_pretrained(emissions_model, cache_dir=cache_dir)
|
|
373
380
|
|
|
374
381
|
if blank_id is None:
|
|
@@ -394,6 +401,7 @@ def pipeline(
|
|
|
394
401
|
save_emissions=save_emissions,
|
|
395
402
|
return_emissions=False,
|
|
396
403
|
output_dir=output_emissions_dir,
|
|
404
|
+
device=device,
|
|
397
405
|
)
|
|
398
406
|
|
|
399
407
|
# Step 4: Perform Alignment
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{easytranscriber-0.2.3 → easytranscriber-0.2.4}/src/easytranscriber.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|