easytranscriber 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {easytranscriber-0.2.2/src/easytranscriber.egg-info → easytranscriber-0.2.3}/PKG-INFO +1 -1
  2. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/pyproject.toml +1 -1
  3. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/asr/ct2.py +16 -13
  4. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/asr/hf.py +5 -3
  5. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/pipelines.py +2 -7
  6. {easytranscriber-0.2.2 → easytranscriber-0.2.3/src/easytranscriber.egg-info}/PKG-INFO +1 -1
  7. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/LICENSE +0 -0
  8. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/README.md +0 -0
  9. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/setup.cfg +0 -0
  10. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/asr/cohere.py +0 -0
  11. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/audio.py +0 -0
  12. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/data/__init__.py +0 -0
  13. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/data/collators.py +0 -0
  14. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/data/datamodel.py +0 -0
  15. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/data/dataset.py +0 -0
  16. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/search/__init__.py +0 -0
  17. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/search/__main__.py +0 -0
  18. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/search/app.py +0 -0
  19. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/search/db.py +0 -0
  20. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/search/indexer.py +0 -0
  21. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/text/normalization.py +0 -0
  22. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber/utils.py +0 -0
  23. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber.egg-info/SOURCES.txt +0 -0
  24. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber.egg-info/dependency_links.txt +0 -0
  25. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber.egg-info/entry_points.txt +0 -0
  26. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber.egg-info/requires.txt +0 -0
  27. {easytranscriber-0.2.2 → easytranscriber-0.2.3}/src/easytranscriber.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easytranscriber
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Speech recognition with accurate word-level timestamps.
5
5
  Author: Faton Rekathati
6
6
  Project-URL: Repository, https://github.com/kb-labb/easytranscriber
@@ -3,7 +3,7 @@ requires = ["setuptools>=67.0.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
- version = "0.2.2"
6
+ version = "0.2.3"
7
7
  name = "easytranscriber"
8
8
  requires-python = ">= 3.10"
9
9
  description = "Speech recognition with accurate word-level timestamps."
@@ -9,9 +9,8 @@ import logging
9
9
  from pathlib import Path
10
10
 
11
11
  import ctranslate2
12
- import numpy as np
13
12
  import torch
14
- from easyaligner.utils import save_metadata_json, save_metadata_msgpack
13
+ from easyaligner.utils import save_metadata_json
15
14
  from easytranscriber.data.collators import transcribe_collate_fn
16
15
  from tqdm import tqdm
17
16
  from transformers import WhisperProcessor
@@ -148,12 +147,14 @@ def transcribe(
148
147
  transcription_texts.extend(transcription)
149
148
 
150
149
  # Update metadata with transcriptions
151
- for i, speech in enumerate(metadata.speeches):
152
- for j, chunk in enumerate(speech.chunks):
153
- chunk.text = transcription_texts[j].strip()
150
+ global_chunk_idx = 0
151
+ for speech in metadata.speeches:
152
+ for chunk in speech.chunks:
153
+ chunk.text = transcription_texts[global_chunk_idx].strip()
154
154
  if len(language_detections) > 0:
155
- chunk.language = language_detections[j]["language"]
156
- chunk.language_prob = language_detections[j]["probability"]
155
+ chunk.language = language_detections[global_chunk_idx]["language"]
156
+ chunk.language_prob = language_detections[global_chunk_idx]["probability"]
157
+ global_chunk_idx += 1
157
158
 
158
159
  # Save transcription to file
159
160
  output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")
@@ -205,12 +206,14 @@ def lang_detect_only(
205
206
  features_ct2 = batch["features"].numpy()
206
207
  features_ct2 = ctranslate2.StorageView.from_array(features_ct2)
207
208
  languages = detect_language(model, features_ct2)
208
- language_detections.append(languages)
209
-
210
- for i, speech in enumerate(metadata.speeches):
211
- for j, chunk in enumerate(speech.chunks):
212
- chunk.language = language_detections[j]["language"]
213
- chunk.language_probability = language_detections[j]["probability"]
209
+ language_detections.extend(languages)
210
+
211
+ global_chunk_idx = 0
212
+ for speech in metadata.speeches:
213
+ for chunk in speech.chunks:
214
+ chunk.language = language_detections[global_chunk_idx]["language"]
215
+ chunk.language_probability = language_detections[global_chunk_idx]["probability"]
216
+ global_chunk_idx += 1
214
217
 
215
218
  # Save transcription to file
216
219
  output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")
@@ -96,9 +96,11 @@ def transcribe(
96
96
 
97
97
  transcription_texts.extend(transcription)
98
98
 
99
- for i, speech in enumerate(metadata.speeches):
100
- for j, chunk in enumerate(speech.chunks):
101
- chunk.text = transcription_texts[j].strip()
99
+ global_chunk_idx = 0
100
+ for speech in metadata.speeches:
101
+ for chunk in speech.chunks:
102
+ chunk.text = transcription_texts[global_chunk_idx].strip()
103
+ global_chunk_idx += 1
102
104
 
103
105
  # Write final transcription to file with msgspec serialization
104
106
  output_path = Path(output_dir) / Path(metadata.audio_path).with_suffix(".json")
@@ -80,7 +80,6 @@ def pipeline(
80
80
  word_boundary: str | None = None,
81
81
  indent: int = 2,
82
82
  ndigits: int = 5,
83
- batch_size_files: int = 1,
84
83
  num_workers_files: int = 2,
85
84
  prefetch_factor_files: int = 2,
86
85
  batch_size_features: int = 8,
@@ -163,8 +162,6 @@ def pipeline(
163
162
  JSON indentation.
164
163
  ndigits : int, optional
165
164
  Number of digits for rounding.
166
- batch_size_files : int, optional
167
- Batch size for files. Recommended to set to 1.
168
165
  num_workers_files : int, optional
169
166
  Number of workers for file loading.
170
167
  prefetch_factor_files : int, optional
@@ -257,7 +254,6 @@ def pipeline(
257
254
  speeches=speeches,
258
255
  chunk_size=chunk_size,
259
256
  sample_rate=sample_rate,
260
- batch_size=batch_size_files,
261
257
  num_workers=num_workers_files,
262
258
  prefetch_factor=prefetch_factor_files,
263
259
  save_json=save_json,
@@ -345,7 +341,7 @@ def pipeline(
345
341
 
346
342
  file_dataloader = torch.utils.data.DataLoader(
347
343
  file_dataset,
348
- batch_size=batch_size_files,
344
+ batch_size=1,
349
345
  shuffle=False,
350
346
  collate_fn=audiofile_collate_fn,
351
347
  num_workers=num_workers_files,
@@ -388,7 +384,6 @@ def pipeline(
388
384
  sample_rate=sample_rate,
389
385
  chunk_size=chunk_size,
390
386
  alignment_strategy=alignment_strategy,
391
- batch_size_files=batch_size_files,
392
387
  num_workers_files=num_workers_files,
393
388
  prefetch_factor_files=prefetch_factor_files,
394
389
  batch_size_features=batch_size_features,
@@ -407,7 +402,7 @@ def pipeline(
407
402
  )
408
403
  json_dataloader = torch.utils.data.DataLoader(
409
404
  json_dataset,
410
- batch_size=batch_size_files,
405
+ batch_size=1,
411
406
  shuffle=False,
412
407
  collate_fn=metadata_collate_fn,
413
408
  num_workers=num_workers_files,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easytranscriber
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Speech recognition with accurate word-level timestamps.
5
5
  Author: Faton Rekathati
6
6
  Project-URL: Repository, https://github.com/kb-labb/easytranscriber
File without changes