PyPI - easytranscriber - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

easytranscriber 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{easytranscriber-0.2.2/src/easytranscriber.egg-info → easytranscriber-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: easytranscriber
-Version: 0.2.2
+Version: 0.2.3
 Summary: Speech recognition with accurate word-level timestamps.
 Author: Faton Rekathati
 Project-URL: Repository, https://github.com/kb-labb/easytranscriber

{easytranscriber-0.2.2 → easytranscriber-0.2.3}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools>=67.0.0"]
 build-backend = "setuptools.build_meta"
 [project]
-version = "0.2.2"
+version = "0.2.3"
 name = "easytranscriber"
 requires-python = ">= 3.10"
 description = "Speech recognition with accurate word-level timestamps."

{easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/asr/ct2.py RENAMED Viewed

@@ -9,9 +9,8 @@ import logging
 from pathlib import Path
 import ctranslate2
-import numpy as np
 import torch
-from easyaligner.utils import save_metadata_json, save_metadata_msgpack
+from easyaligner.utils import save_metadata_json
 from easytranscriber.data.collators import transcribe_collate_fn
 from tqdm import tqdm
 from transformers import WhisperProcessor
@@ -148,12 +147,14 @@ def transcribe(
             transcription_texts.extend(transcription)
         # Update metadata with transcriptions
-        for i, speech in enumerate(metadata.speeches):
-            for j, chunk in enumerate(speech.chunks):
-                chunk.text = transcription_texts[j].strip()
+        global_chunk_idx = 0
+        for speech in metadata.speeches:
+            for chunk in speech.chunks:
+                chunk.text = transcription_texts[global_chunk_idx].strip()
                 if len(language_detections) > 0:
-                    chunk.language = language_detections[j]["language"]
-                    chunk.language_prob = language_detections[j]["probability"]
+                    chunk.language = language_detections[global_chunk_idx]["language"]
+                    chunk.language_prob = language_detections[global_chunk_idx]["probability"]
+                global_chunk_idx += 1
         # Save transcription to file
         output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")
@@ -205,12 +206,14 @@ def lang_detect_only(
             features_ct2 = batch["features"].numpy()
             features_ct2 = ctranslate2.StorageView.from_array(features_ct2)
             languages = detect_language(model, features_ct2)
-            language_detections.append(languages)
-        for i, speech in enumerate(metadata.speeches):
-            for j, chunk in enumerate(speech.chunks):
-                chunk.language = language_detections[j]["language"]
-                chunk.language_probability = language_detections[j]["probability"]
+            language_detections.extend(languages)
+        global_chunk_idx = 0
+        for speech in metadata.speeches:
+            for chunk in speech.chunks:
+                chunk.language = language_detections[global_chunk_idx]["language"]
+                chunk.language_probability = language_detections[global_chunk_idx]["probability"]
+                global_chunk_idx += 1
         # Save transcription to file
         output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")

{easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/asr/hf.py RENAMED Viewed

@@ -96,9 +96,11 @@ def transcribe(
                 transcription_texts.extend(transcription)
-        for i, speech in enumerate(metadata.speeches):
-            for j, chunk in enumerate(speech.chunks):
-                chunk.text = transcription_texts[j].strip()
+        global_chunk_idx = 0
+        for speech in metadata.speeches:
+            for chunk in speech.chunks:
+                chunk.text = transcription_texts[global_chunk_idx].strip()
+                global_chunk_idx += 1
         # Write final transcription to file with msgspec serialization
         output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")

{easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/pipelines.py RENAMED Viewed

@@ -80,7 +80,6 @@ def pipeline(
     word_boundary: str | None = None,
     indent: int = 2,
     ndigits: int = 5,
-    batch_size_files: int = 1,
     num_workers_files: int = 2,
     prefetch_factor_files: int = 2,
     batch_size_features: int = 8,
@@ -163,8 +162,6 @@ def pipeline(
         JSON indentation.
     ndigits : int, optional
         Number of digits for rounding.
-    batch_size_files : int, optional
-        Batch size for files. Recommended to set to 1.
     num_workers_files : int, optional
         Number of workers for file loading.
     prefetch_factor_files : int, optional
@@ -257,7 +254,6 @@ def pipeline(
         speeches=speeches,
         chunk_size=chunk_size,
         sample_rate=sample_rate,
-        batch_size=batch_size_files,
         num_workers=num_workers_files,
         prefetch_factor=prefetch_factor_files,
         save_json=save_json,
@@ -345,7 +341,7 @@ def pipeline(
     file_dataloader = torch.utils.data.DataLoader(
         file_dataset,
-        batch_size=batch_size_files,
+        batch_size=1,
         shuffle=False,
         collate_fn=audiofile_collate_fn,
         num_workers=num_workers_files,
@@ -388,7 +384,6 @@ def pipeline(
         sample_rate=sample_rate,
         chunk_size=chunk_size,
         alignment_strategy=alignment_strategy,
-        batch_size_files=batch_size_files,
         num_workers_files=num_workers_files,
         prefetch_factor_files=prefetch_factor_files,
         batch_size_features=batch_size_features,
@@ -407,7 +402,7 @@ def pipeline(
     )
     json_dataloader = torch.utils.data.DataLoader(
         json_dataset,
-        batch_size=batch_size_files,
+        batch_size=1,
         shuffle=False,
         collate_fn=metadata_collate_fn,
         num_workers=num_workers_files,

{easytranscriber-0.2.2 → easytranscriber-0.2.3/src/easytranscriber.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: easytranscriber
-Version: 0.2.2
+Version: 0.2.3
 Summary: Speech recognition with accurate word-level timestamps.
 Author: Faton Rekathati
 Project-URL: Repository, https://github.com/kb-labb/easytranscriber