audio-scribe 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- audio_scribe/__init__.py +12 -20
- audio_scribe/auth.py +119 -0
- audio_scribe/config.py +24 -0
- audio_scribe/models.py +196 -0
- audio_scribe/transcriber.py +131 -0
- audio_scribe/utils.py +93 -0
- {audio_scribe-0.1.1.dist-info → audio_scribe-0.1.3.dist-info}/METADATA +9 -15
- audio_scribe-0.1.3.dist-info/RECORD +11 -0
- audio_scribe-0.1.3.dist-info/entry_points.txt +2 -0
- {audio_scribe-0.1.1.dist-info → audio_scribe-0.1.3.dist-info}/top_level.txt +0 -1
- audio_scribe/cli.py +0 -567
- audio_scribe-0.1.1.dist-info/RECORD +0 -9
- audio_scribe-0.1.1.dist-info/entry_points.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_audio_scribe_main.py +0 -468
- {audio_scribe-0.1.1.dist-info → audio_scribe-0.1.3.dist-info}/WHEEL +0 -0
audio_scribe/cli.py
DELETED
@@ -1,567 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Audio Scribe
|
4
|
-
-----------------
|
5
|
-
A command-line script for transcribing audio files with speaker diarization
|
6
|
-
using Whisper and Pyannote. The script uses a Hugging Face token for
|
7
|
-
downloading Pyannote speaker-diarization models and displays a progress bar
|
8
|
-
with resource usage while transcribing.
|
9
|
-
"""
|
10
|
-
|
11
|
-
print("Initializing environment... Please wait while we load dependencies and models.")
|
12
|
-
import sys
|
13
|
-
sys.stdout.flush()
|
14
|
-
|
15
|
-
import os
|
16
|
-
import glob
|
17
|
-
import wave
|
18
|
-
import json
|
19
|
-
import logging
|
20
|
-
import warnings
|
21
|
-
import argparse
|
22
|
-
import readline
|
23
|
-
from pathlib import Path
|
24
|
-
from datetime import datetime
|
25
|
-
from typing import Optional, Dict
|
26
|
-
from dataclasses import dataclass
|
27
|
-
import base64
|
28
|
-
|
29
|
-
# Core dependencies
|
30
|
-
from cryptography.fernet import Fernet
|
31
|
-
from cryptography.hazmat.primitives import hashes
|
32
|
-
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
33
|
-
import torch
|
34
|
-
import whisper
|
35
|
-
import importlib.metadata
|
36
|
-
from importlib.metadata import PackageNotFoundError
|
37
|
-
from pyannote.audio import Pipeline
|
38
|
-
|
39
|
-
# Progress bar dependencies - imported via HAVE_PROGRESS_SUPPORT from __init__
|
40
|
-
try:
|
41
|
-
from alive_progress import alive_bar
|
42
|
-
import psutil
|
43
|
-
import GPUtil
|
44
|
-
HAVE_PROGRESS_SUPPORT = True
|
45
|
-
except ImportError:
|
46
|
-
HAVE_PROGRESS_SUPPORT = False
|
47
|
-
|
48
|
-
|
49
|
-
# Configure logging
|
50
|
-
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
|
51
|
-
logging.basicConfig(
|
52
|
-
level=logging.INFO,
|
53
|
-
format=LOG_FORMAT,
|
54
|
-
handlers=[
|
55
|
-
logging.StreamHandler(),
|
56
|
-
logging.FileHandler("transcription.log", mode="a", encoding="utf-8"),
|
57
|
-
],
|
58
|
-
)
|
59
|
-
logger = logging.getLogger(__name__)
|
60
|
-
|
61
|
-
# ---------- FILE PATH TAB-COMPLETION SNIPPET ----------
|
62
|
-
def complete_path(text, state):
|
63
|
-
"""
|
64
|
-
Return the 'state'-th completion for 'text'.
|
65
|
-
This function will be used by 'readline' to enable file path autocompletion.
|
66
|
-
"""
|
67
|
-
# If the user typed a glob pattern (with * or ?)
|
68
|
-
if '*' in text or '?' in text:
|
69
|
-
matches = glob.glob(text)
|
70
|
-
else:
|
71
|
-
# Split off the directory name and partial file/directory name
|
72
|
-
directory, partial = os.path.split(text)
|
73
|
-
if not directory:
|
74
|
-
directory = '.'
|
75
|
-
try:
|
76
|
-
# List everything in 'directory' that starts with 'partial'
|
77
|
-
entries = os.listdir(directory)
|
78
|
-
except OSError:
|
79
|
-
# If directory doesn't exist or we lack permission, no matches
|
80
|
-
entries = []
|
81
|
-
|
82
|
-
matches = []
|
83
|
-
for entry in entries:
|
84
|
-
if entry.startswith(partial):
|
85
|
-
full_path = os.path.join(directory, entry)
|
86
|
-
# If it's a directory, add a trailing slash to indicate that
|
87
|
-
if os.path.isdir(full_path) and not full_path.endswith(os.path.sep):
|
88
|
-
full_path += os.path.sep
|
89
|
-
matches.append(full_path)
|
90
|
-
|
91
|
-
# Sort matches to have a consistent order
|
92
|
-
matches.sort()
|
93
|
-
|
94
|
-
# If 'state' is beyond last match, return None
|
95
|
-
return matches[state] if state < len(matches) else None
|
96
|
-
|
97
|
-
|
98
|
-
@dataclass
|
99
|
-
class TranscriptionConfig:
|
100
|
-
"""
|
101
|
-
Configuration settings for the transcription pipeline.
|
102
|
-
"""
|
103
|
-
output_directory: Path
|
104
|
-
whisper_model: str = "base.en"
|
105
|
-
diarization_model: str = "pyannote/speaker-diarization-3.1"
|
106
|
-
temp_directory: Optional[Path] = None
|
107
|
-
device: Optional[str] = None
|
108
|
-
|
109
|
-
def __post_init__(self):
|
110
|
-
# Use CUDA if available, else fall back to CPU
|
111
|
-
self.device = self.device or ("cuda" if torch.cuda.is_available() else "cpu")
|
112
|
-
# Default temp directory inside the output directory
|
113
|
-
self.temp_directory = self.temp_directory or (self.output_directory / "temp")
|
114
|
-
# Ensure directories exist
|
115
|
-
self.temp_directory.mkdir(parents=True, exist_ok=True)
|
116
|
-
self.output_directory.mkdir(parents=True, exist_ok=True)
|
117
|
-
|
118
|
-
|
119
|
-
class TokenManager:
|
120
|
-
"""
|
121
|
-
Handles secure storage and retrieval of the Hugging Face authentication token.
|
122
|
-
"""
|
123
|
-
def __init__(self):
|
124
|
-
# Store config in ~/.pyannote/config.json
|
125
|
-
self.config_dir = Path.home() / ".pyannote"
|
126
|
-
self.config_file = self.config_dir / "config.json"
|
127
|
-
self._initialize_config()
|
128
|
-
|
129
|
-
def _initialize_config(self) -> None:
|
130
|
-
"""
|
131
|
-
Initialize configuration directory and file with secure permissions.
|
132
|
-
"""
|
133
|
-
self.config_dir.mkdir(exist_ok=True)
|
134
|
-
if not self.config_file.exists():
|
135
|
-
self._save_config({})
|
136
|
-
|
137
|
-
# Set secure file and directory permissions on POSIX systems
|
138
|
-
if os.name == "posix":
|
139
|
-
os.chmod(self.config_dir, 0o700)
|
140
|
-
os.chmod(self.config_file, 0o600)
|
141
|
-
|
142
|
-
def _get_encryption_key(self) -> bytes:
|
143
|
-
"""
|
144
|
-
Generate an encryption key from system-specific data.
|
145
|
-
"""
|
146
|
-
salt = b"pyannote-audio-salt"
|
147
|
-
kdf = PBKDF2HMAC(
|
148
|
-
algorithm=hashes.SHA256(),
|
149
|
-
length=32,
|
150
|
-
salt=salt,
|
151
|
-
iterations=100000,
|
152
|
-
)
|
153
|
-
key = kdf.derive(str(Path.home()).encode())
|
154
|
-
return base64.urlsafe_b64encode(key)
|
155
|
-
|
156
|
-
def _save_config(self, config: dict) -> None:
|
157
|
-
"""
|
158
|
-
Securely save configuration to file.
|
159
|
-
"""
|
160
|
-
with open(self.config_file, "w", encoding="utf-8") as f:
|
161
|
-
json.dump(config, f)
|
162
|
-
|
163
|
-
def _load_config(self) -> dict:
|
164
|
-
"""
|
165
|
-
Load configuration from file.
|
166
|
-
"""
|
167
|
-
try:
|
168
|
-
with open(self.config_file, "r", encoding="utf-8") as f:
|
169
|
-
return json.load(f)
|
170
|
-
except Exception:
|
171
|
-
return {}
|
172
|
-
|
173
|
-
def store_token(self, token: str) -> bool:
|
174
|
-
"""
|
175
|
-
Securely store authentication token.
|
176
|
-
"""
|
177
|
-
try:
|
178
|
-
fernet = Fernet(self._get_encryption_key())
|
179
|
-
encrypted_token = fernet.encrypt(token.encode())
|
180
|
-
|
181
|
-
config = self._load_config()
|
182
|
-
config["token"] = encrypted_token.decode()
|
183
|
-
|
184
|
-
self._save_config(config)
|
185
|
-
return True
|
186
|
-
except Exception as e:
|
187
|
-
logger.error(f"Failed to store token: {e}")
|
188
|
-
return False
|
189
|
-
|
190
|
-
def retrieve_token(self) -> Optional[str]:
|
191
|
-
"""
|
192
|
-
Retrieve stored authentication token.
|
193
|
-
"""
|
194
|
-
try:
|
195
|
-
config = self._load_config()
|
196
|
-
if "token" in config:
|
197
|
-
fernet = Fernet(self._get_encryption_key())
|
198
|
-
return fernet.decrypt(config["token"].encode()).decode()
|
199
|
-
except Exception as e:
|
200
|
-
logger.error(f"Failed to retrieve token: {e}")
|
201
|
-
return None
|
202
|
-
|
203
|
-
def delete_token(self) -> bool:
|
204
|
-
"""
|
205
|
-
Delete stored authentication token.
|
206
|
-
"""
|
207
|
-
try:
|
208
|
-
config = self._load_config()
|
209
|
-
if "token" in config:
|
210
|
-
del config["token"]
|
211
|
-
self._save_config(config)
|
212
|
-
return True
|
213
|
-
except Exception as e:
|
214
|
-
logger.error(f"Failed to delete token: {e}")
|
215
|
-
return False
|
216
|
-
|
217
|
-
|
218
|
-
class DependencyManager:
|
219
|
-
"""
|
220
|
-
Manages and verifies system dependencies using importlib.metadata.
|
221
|
-
"""
|
222
|
-
REQUIRED_PACKAGES = {
|
223
|
-
"torch": None,
|
224
|
-
"pyannote.audio": None,
|
225
|
-
"openai-whisper": None,
|
226
|
-
"pytorch-lightning": None,
|
227
|
-
"keyring": None,
|
228
|
-
}
|
229
|
-
|
230
|
-
@classmethod
|
231
|
-
def verify_dependencies(cls) -> bool:
|
232
|
-
"""
|
233
|
-
Verify all required dependencies are installed with correct versions
|
234
|
-
(if specified). Returns True if all are installed and correct, False otherwise.
|
235
|
-
"""
|
236
|
-
missing = []
|
237
|
-
outdated = []
|
238
|
-
|
239
|
-
for package, required_version in cls.REQUIRED_PACKAGES.items():
|
240
|
-
try:
|
241
|
-
installed_version = importlib.metadata.version(package)
|
242
|
-
if required_version and installed_version != required_version:
|
243
|
-
outdated.append(
|
244
|
-
f"{package} (installed: {installed_version}, required: {required_version})"
|
245
|
-
)
|
246
|
-
except PackageNotFoundError:
|
247
|
-
missing.append(package)
|
248
|
-
|
249
|
-
if missing or outdated:
|
250
|
-
if missing:
|
251
|
-
logger.error("Missing packages: %s", ", ".join(missing))
|
252
|
-
if outdated:
|
253
|
-
logger.error("Outdated packages: %s", ", ".join(outdated))
|
254
|
-
logger.info(
|
255
|
-
"Install required packages: pip install %s",
|
256
|
-
" ".join(
|
257
|
-
f"{pkg}=={ver}" if ver else pkg
|
258
|
-
for pkg, ver in cls.REQUIRED_PACKAGES.items()
|
259
|
-
),
|
260
|
-
)
|
261
|
-
return False
|
262
|
-
return True
|
263
|
-
|
264
|
-
|
265
|
-
class AudioProcessor:
|
266
|
-
"""
|
267
|
-
Handles audio file processing and segmentation using the `wave` module.
|
268
|
-
"""
|
269
|
-
def __init__(self, config: TranscriptionConfig):
|
270
|
-
self.config = config
|
271
|
-
|
272
|
-
def load_audio_segment(
|
273
|
-
self,
|
274
|
-
audio_path: Path,
|
275
|
-
start_time: float,
|
276
|
-
end_time: float,
|
277
|
-
output_path: Path,
|
278
|
-
) -> bool:
|
279
|
-
"""
|
280
|
-
Extract and save the audio segment from `start_time` to `end_time`.
|
281
|
-
"""
|
282
|
-
try:
|
283
|
-
with wave.open(str(audio_path), "rb") as infile:
|
284
|
-
params = infile.getparams()
|
285
|
-
frame_rate = params.framerate
|
286
|
-
start_frame = int(start_time * frame_rate)
|
287
|
-
end_frame = min(int(end_time * frame_rate), infile.getnframes())
|
288
|
-
|
289
|
-
infile.setpos(start_frame)
|
290
|
-
frames = infile.readframes(end_frame - start_frame)
|
291
|
-
|
292
|
-
with wave.open(str(output_path), "wb") as outfile:
|
293
|
-
outfile.setparams(params)
|
294
|
-
outfile.writeframes(frames)
|
295
|
-
return True
|
296
|
-
except Exception as e:
|
297
|
-
logger.error(f"Failed to process audio segment: {e}")
|
298
|
-
return False
|
299
|
-
|
300
|
-
|
301
|
-
class TranscriptionPipeline:
|
302
|
-
"""
|
303
|
-
Main pipeline for audio transcription (Whisper) and speaker diarization (Pyannote).
|
304
|
-
"""
|
305
|
-
def __init__(self, config: TranscriptionConfig):
|
306
|
-
self.config = config
|
307
|
-
self.diarization_pipeline = None
|
308
|
-
self.whisper_model = None
|
309
|
-
self.token_manager = TokenManager()
|
310
|
-
self._running = False # used for resource monitor thread
|
311
|
-
|
312
|
-
def initialize_models(self, auth_token: str) -> bool:
|
313
|
-
"""
|
314
|
-
Initialize the Pyannote diarization pipeline and the Whisper model.
|
315
|
-
"""
|
316
|
-
try:
|
317
|
-
# Load Whisper model (set download root to avoid clutter in home directory)
|
318
|
-
self.whisper_model = whisper.load_model(
|
319
|
-
self.config.whisper_model,
|
320
|
-
device=self.config.device,
|
321
|
-
download_root=str(self.config.output_directory / "models"),
|
322
|
-
)
|
323
|
-
|
324
|
-
# Load Pyannote diarization pipeline
|
325
|
-
self.diarization_pipeline = Pipeline.from_pretrained(
|
326
|
-
self.config.diarization_model, use_auth_token=auth_token
|
327
|
-
)
|
328
|
-
self.diarization_pipeline.to(torch.device(self.config.device))
|
329
|
-
|
330
|
-
if self.config.device == "cpu":
|
331
|
-
warnings.warn("Running on CPU. GPU is recommended for better performance.")
|
332
|
-
|
333
|
-
return True
|
334
|
-
except Exception as e:
|
335
|
-
logger.error(f"Model initialization failed: {e}")
|
336
|
-
logger.error("Please ensure you have accepted the model conditions at:")
|
337
|
-
logger.error(" 1. https://huggingface.co/pyannote/segmentation-3.0")
|
338
|
-
logger.error(" 2. https://huggingface.co/pyannote/speaker-diarization-3.1")
|
339
|
-
return False
|
340
|
-
|
341
|
-
def _update_resources(self, bar):
|
342
|
-
"""
|
343
|
-
Continuously update progress bar text with CPU/MEM/GPU usage, until self._running is False.
|
344
|
-
"""
|
345
|
-
while self._running:
|
346
|
-
try:
|
347
|
-
import time
|
348
|
-
time.sleep(0.5)
|
349
|
-
|
350
|
-
cpu_usage = psutil.cpu_percent(interval=None) if HAVE_PROGRESS_SUPPORT else 0
|
351
|
-
memory_usage = psutil.virtual_memory().percent if HAVE_PROGRESS_SUPPORT else 0
|
352
|
-
|
353
|
-
if HAVE_PROGRESS_SUPPORT and GPUtil.getGPUs():
|
354
|
-
gpus = GPUtil.getGPUs()
|
355
|
-
gpu_mem_used = f"{gpus[0].memoryUsed:.0f}"
|
356
|
-
gpu_mem_total = f"{gpus[0].memoryTotal:.0f}"
|
357
|
-
gpu_usage_text = f"{gpu_mem_used}/{gpu_mem_total} MB"
|
358
|
-
else:
|
359
|
-
gpu_usage_text = "N/A"
|
360
|
-
|
361
|
-
resource_text = f"CPU: {cpu_usage}%, MEM: {memory_usage}%, GPU Mem: {gpu_usage_text}"
|
362
|
-
bar.text(resource_text)
|
363
|
-
except Exception as e:
|
364
|
-
logger.error(f"Resource monitoring error: {e}")
|
365
|
-
|
366
|
-
def process_file(self, audio_path: Path) -> bool:
|
367
|
-
"""
|
368
|
-
Diarize, segment, and transcribe using Whisper + Pyannote with progress feedback.
|
369
|
-
"""
|
370
|
-
try:
|
371
|
-
logger.info("Starting audio processing...")
|
372
|
-
diarization = self.diarization_pipeline(str(audio_path))
|
373
|
-
segments = list(diarization.itertracks(yield_label=True))
|
374
|
-
total_segments = len(segments)
|
375
|
-
|
376
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
377
|
-
output_file = self.config.output_directory / f"transcript_{timestamp}.txt"
|
378
|
-
audio_processor = AudioProcessor(self.config)
|
379
|
-
|
380
|
-
if not HAVE_PROGRESS_SUPPORT:
|
381
|
-
# No alive_progress, psutil, or GPUtil installed
|
382
|
-
logger.info("Processing audio without progress bar (missing optional packages).")
|
383
|
-
with output_file.open("w", encoding="utf-8") as f:
|
384
|
-
for turn, _, speaker in segments:
|
385
|
-
segment_path = (
|
386
|
-
self.config.temp_directory
|
387
|
-
/ f"segment_{speaker}_{turn.start:.2f}_{turn.end:.2f}.wav"
|
388
|
-
)
|
389
|
-
if self.audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
|
390
|
-
transcription = self.whisper_model.transcribe(str(segment_path))["text"]
|
391
|
-
segment_path.unlink(missing_ok=True)
|
392
|
-
|
393
|
-
line = f"[{turn.start:.2f}s - {turn.end:.2f}s] Speaker {speaker}: {transcription.strip()}\n"
|
394
|
-
f.write(line)
|
395
|
-
logger.info(line.strip())
|
396
|
-
return True
|
397
|
-
else:
|
398
|
-
# Use a progress bar to track segment transcription
|
399
|
-
from alive_progress import alive_bar
|
400
|
-
import threading
|
401
|
-
|
402
|
-
self._running = True
|
403
|
-
with output_file.open("w", encoding="utf-8") as f, alive_bar(
|
404
|
-
total_segments,
|
405
|
-
title="Transcribing Audio",
|
406
|
-
spinner="pulse",
|
407
|
-
theme="classic",
|
408
|
-
stats=False,
|
409
|
-
elapsed=True,
|
410
|
-
monitor=True,
|
411
|
-
) as bar:
|
412
|
-
|
413
|
-
# Start a background thread for resource monitoring
|
414
|
-
resource_thread = threading.Thread(target=self._update_resources, args=(bar,))
|
415
|
-
resource_thread.start()
|
416
|
-
|
417
|
-
for turn, _, speaker in segments:
|
418
|
-
segment_path = (
|
419
|
-
self.config.temp_directory
|
420
|
-
/ f"segment_{speaker}_{turn.start:.2f}_{turn.end:.2f}.wav"
|
421
|
-
)
|
422
|
-
if audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
|
423
|
-
transcription = self.whisper_model.transcribe(str(segment_path))["text"]
|
424
|
-
segment_path.unlink(missing_ok=True)
|
425
|
-
|
426
|
-
line = f"[{turn.start:.2f}s - {turn.end:.2f}s] Speaker {speaker}: {transcription.strip()}\n"
|
427
|
-
f.write(line)
|
428
|
-
logger.info(line.strip())
|
429
|
-
|
430
|
-
# Update the progress bar
|
431
|
-
bar()
|
432
|
-
|
433
|
-
# Stop resource monitoring
|
434
|
-
self._running = False
|
435
|
-
resource_thread.join()
|
436
|
-
|
437
|
-
logger.info(f"Transcription completed. Output saved to: {output_file}")
|
438
|
-
return True
|
439
|
-
|
440
|
-
except Exception as e:
|
441
|
-
logger.error(f"Processing failed: {e}")
|
442
|
-
return False
|
443
|
-
|
444
|
-
|
445
|
-
def get_token(token_manager: TokenManager) -> Optional[str]:
|
446
|
-
"""
|
447
|
-
Get authentication token from storage or user input.
|
448
|
-
"""
|
449
|
-
stored_token = token_manager.retrieve_token()
|
450
|
-
if stored_token:
|
451
|
-
choice = input("\nUse the stored Hugging Face token? (y/n): ").lower().strip()
|
452
|
-
if choice == "y":
|
453
|
-
return stored_token
|
454
|
-
|
455
|
-
print("\nA HuggingFace token is required for speaker diarization.")
|
456
|
-
print("Get your token at: https://huggingface.co/settings/tokens")
|
457
|
-
print("\nEnsure you have accepted:")
|
458
|
-
print(" 1. pyannote/segmentation-3.0 conditions")
|
459
|
-
print(" 2. pyannote/speaker-diarization-3.1 conditions")
|
460
|
-
|
461
|
-
token = input("\nEnter HuggingFace token: ").strip()
|
462
|
-
if token:
|
463
|
-
choice = input("Save token for future use? (y/n): ").lower().strip()
|
464
|
-
if choice == "y":
|
465
|
-
if token_manager.store_token(token):
|
466
|
-
print("Token saved successfully.")
|
467
|
-
else:
|
468
|
-
print("Failed to save token. It will be used for this session only.")
|
469
|
-
return token if token else None
|
470
|
-
|
471
|
-
|
472
|
-
def main():
|
473
|
-
parser = argparse.ArgumentParser(
|
474
|
-
description="Audio Transcription Pipeline using Whisper + Pyannote, with optional progress bar."
|
475
|
-
)
|
476
|
-
parser.add_argument(
|
477
|
-
"--audio",
|
478
|
-
type=Path,
|
479
|
-
help="Path to the audio file to transcribe."
|
480
|
-
)
|
481
|
-
parser.add_argument(
|
482
|
-
"--token",
|
483
|
-
help="HuggingFace API token. Overrides any saved token."
|
484
|
-
)
|
485
|
-
parser.add_argument(
|
486
|
-
"--output",
|
487
|
-
type=Path,
|
488
|
-
help="Path to the output directory for transcripts and temporary files.",
|
489
|
-
)
|
490
|
-
parser.add_argument(
|
491
|
-
"--delete-token",
|
492
|
-
action="store_true",
|
493
|
-
help="Delete any stored Hugging Face token and exit.",
|
494
|
-
)
|
495
|
-
parser.add_argument(
|
496
|
-
"--show-warnings",
|
497
|
-
action="store_true",
|
498
|
-
help="Enable user warnings (e.g., from pyannote.audio). Disabled by default.",
|
499
|
-
)
|
500
|
-
parser.add_argument(
|
501
|
-
"--whisper-model",
|
502
|
-
default="base.en",
|
503
|
-
help="Specify the Whisper model to use (default: 'base.en').",
|
504
|
-
)
|
505
|
-
args = parser.parse_args()
|
506
|
-
|
507
|
-
# Manage user warnings
|
508
|
-
if not args.show_warnings:
|
509
|
-
warnings.filterwarnings("ignore", category=UserWarning, module=r"pyannote\.audio")
|
510
|
-
warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
|
511
|
-
else:
|
512
|
-
warnings.resetwarnings()
|
513
|
-
|
514
|
-
# Check dependencies
|
515
|
-
if not DependencyManager.verify_dependencies():
|
516
|
-
sys.exit(1)
|
517
|
-
|
518
|
-
# Initialize tab-completion for file paths (Unix-like only, or with pyreadline on Windows)
|
519
|
-
readline.set_completer_delims(' \t\n;')
|
520
|
-
readline.set_completer(complete_path)
|
521
|
-
readline.parse_and_bind("tab: complete")
|
522
|
-
|
523
|
-
# Initialize the token manager
|
524
|
-
token_manager = TokenManager()
|
525
|
-
|
526
|
-
# If user wants to delete the stored token, do so and exit
|
527
|
-
if args.delete_token:
|
528
|
-
success = token_manager.delete_token()
|
529
|
-
sys.exit(0 if success else 1)
|
530
|
-
|
531
|
-
# Prepare configuration
|
532
|
-
output_dir = args.output or (Path("transcripts") / datetime.now().strftime("%Y%m%d"))
|
533
|
-
config = TranscriptionConfig(
|
534
|
-
output_directory=output_dir,
|
535
|
-
whisper_model=args.whisper_model
|
536
|
-
)
|
537
|
-
|
538
|
-
# Initialize pipeline
|
539
|
-
pipeline = TranscriptionPipeline(config)
|
540
|
-
hf_token = args.token or get_token(token_manager)
|
541
|
-
if not hf_token:
|
542
|
-
logger.error("No Hugging Face token provided. Exiting.")
|
543
|
-
sys.exit(1)
|
544
|
-
|
545
|
-
# Initialize models
|
546
|
-
if not pipeline.initialize_models(hf_token):
|
547
|
-
logger.error("Failed to initialize pipeline. Exiting.")
|
548
|
-
sys.exit(1)
|
549
|
-
|
550
|
-
# Prompt user for audio file path if not passed in
|
551
|
-
audio_path = args.audio
|
552
|
-
while not audio_path or not audio_path.exists():
|
553
|
-
audio_path_str = input("\nEnter path to audio file (Tab for autocomplete): ").strip()
|
554
|
-
audio_path = Path(audio_path_str)
|
555
|
-
if not audio_path.exists():
|
556
|
-
print(f"File '{audio_path}' not found. Please try again.")
|
557
|
-
|
558
|
-
print("Audio file path accepted. Preparing to process the audio...")
|
559
|
-
sys.stdout.flush()
|
560
|
-
|
561
|
-
# Process the audio file
|
562
|
-
if not pipeline.process_file(audio_path):
|
563
|
-
sys.exit(1)
|
564
|
-
|
565
|
-
|
566
|
-
if __name__ == "__main__":
|
567
|
-
main()
|
@@ -1,9 +0,0 @@
|
|
1
|
-
audio_scribe/__init__.py,sha256=19NLfiVus01TtbB1SFwJ3Q-vFvN9nLzNYGIZiNB45qM,587
|
2
|
-
audio_scribe/cli.py,sha256=LToGAiCHHXDitsXBuMqKMHkH_HzSARX0C06-Ha74jKU,20287
|
3
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
tests/test_audio_scribe_main.py,sha256=Jv5XixrhsK49RJBh6HlghnIxxDiCq52PUv0lf8ljpJY,15571
|
5
|
-
audio_scribe-0.1.1.dist-info/METADATA,sha256=CBHEE3qzCnRWQ-7ljDGMbY8i5awoGYYDcgbPUH6Za-M,10455
|
6
|
-
audio_scribe-0.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
7
|
-
audio_scribe-0.1.1.dist-info/entry_points.txt,sha256=eaO9r_zAFnrWseKyJcBpGUHQq-P7NXBw5er8sZaPfFU,55
|
8
|
-
audio_scribe-0.1.1.dist-info/top_level.txt,sha256=K08EDnZLtXcJJ9RxLnzDUz-AmnUo5vGRyYmS3wSirtE,19
|
9
|
-
audio_scribe-0.1.1.dist-info/RECORD,,
|
tests/__init__.py
DELETED
File without changes
|