audio-scribe 0.1.0__tar.gz → 0.1.2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. {audio_scribe-0.1.0 → audio_scribe-0.1.2}/PKG-INFO +9 -2
  2. {audio_scribe-0.1.0 → audio_scribe-0.1.2}/README.md +8 -1
  3. {audio_scribe-0.1.0 → audio_scribe-0.1.2}/setup.py +5 -6
  4. audio_scribe-0.1.2/src/audio_scribe/__init__.py +24 -0
  5. audio_scribe-0.1.2/src/audio_scribe/auth.py +119 -0
  6. audio_scribe-0.1.2/src/audio_scribe/config.py +24 -0
  7. audio_scribe-0.1.2/src/audio_scribe/models.py +196 -0
  8. audio_scribe-0.1.2/src/audio_scribe/transcriber.py +131 -0
  9. audio_scribe-0.1.2/src/audio_scribe/utils.py +93 -0
  10. {audio_scribe-0.1.0 → audio_scribe-0.1.2/src}/audio_scribe.egg-info/PKG-INFO +9 -2
  11. audio_scribe-0.1.2/src/audio_scribe.egg-info/SOURCES.txt +19 -0
  12. audio_scribe-0.1.2/src/audio_scribe.egg-info/entry_points.txt +2 -0
  13. audio_scribe-0.1.2/tests/test_auth.py +92 -0
  14. audio_scribe-0.1.2/tests/test_config.py +47 -0
  15. audio_scribe-0.1.2/tests/test_models.py +350 -0
  16. audio_scribe-0.1.2/tests/test_transcriber.py +166 -0
  17. audio_scribe-0.1.2/tests/test_utils.py +83 -0
  18. audio_scribe-0.1.0/audio_scribe/__init__.py +0 -0
  19. audio_scribe-0.1.0/audio_scribe/cli.py +0 -567
  20. audio_scribe-0.1.0/audio_scribe.egg-info/SOURCES.txt +0 -10
  21. audio_scribe-0.1.0/audio_scribe.egg-info/entry_points.txt +0 -2
  22. {audio_scribe-0.1.0 → audio_scribe-0.1.2}/setup.cfg +0 -0
  23. {audio_scribe-0.1.0 → audio_scribe-0.1.2/src}/audio_scribe.egg-info/dependency_links.txt +0 -0
  24. {audio_scribe-0.1.0 → audio_scribe-0.1.2/src}/audio_scribe.egg-info/requires.txt +0 -0
  25. {audio_scribe-0.1.0 → audio_scribe-0.1.2/src}/audio_scribe.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: audio_scribe
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A command-line tool for audio transcription with Whisper and Pyannote.
5
5
  Home-page: https://gitlab.genomicops.cloud/genomicops/audio-scribe
6
6
  Author: Gurasis Osahan
@@ -46,9 +46,16 @@ Dynamic: summary
46
46
 
47
47
  # Audio Scribe
48
48
 
49
- **A Command-Line Tool for Audio Transcription (Audio Scribe) and Speaker Diarization Using OpenAI Whisper and Pyannote**
49
+ **A Command-Line Tool for Audio Transcription and Speaker Diarization Using OpenAI Whisper and Pyannote**
50
50
 
51
51
  [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
52
+ ![Coverage](https://img.shields.io/badge/coverage-94.3%25-brightgreen)
53
+ [![Pipeline Status](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/pipeline.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
54
+ [![PyPI Version](https://badge.fury.io/py/audio-scribe.svg)](https://badge.fury.io/py/audio-scribe)
55
+ [![Python Versions](https://img.shields.io/pypi/pyversions/audio-scribe)](https://pypi.org/project/audio-scribe/)
56
+ [![PyPI Downloads](https://img.shields.io/pypi/dm/audio-scribe)](https://pypi.org/project/audio-scribe/)
57
+ [![PyPI License](https://img.shields.io/pypi/l/audio-scribe)](https://pypi.org/project/audio-scribe/)
58
+ <!-- [![Coverage Report](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/coverage.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main) -->
52
59
 
53
60
  ## Overview
54
61
 
@@ -1,8 +1,15 @@
1
1
  # Audio Scribe
2
2
 
3
- **A Command-Line Tool for Audio Transcription (Audio Scribe) and Speaker Diarization Using OpenAI Whisper and Pyannote**
3
+ **A Command-Line Tool for Audio Transcription and Speaker Diarization Using OpenAI Whisper and Pyannote**
4
4
 
5
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
6
+ ![Coverage](https://img.shields.io/badge/coverage-94.3%25-brightgreen)
7
+ [![Pipeline Status](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/pipeline.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
8
+ [![PyPI Version](https://badge.fury.io/py/audio-scribe.svg)](https://badge.fury.io/py/audio-scribe)
9
+ [![Python Versions](https://img.shields.io/pypi/pyversions/audio-scribe)](https://pypi.org/project/audio-scribe/)
10
+ [![PyPI Downloads](https://img.shields.io/pypi/dm/audio-scribe)](https://pypi.org/project/audio-scribe/)
11
+ [![PyPI License](https://img.shields.io/pypi/l/audio-scribe)](https://pypi.org/project/audio-scribe/)
12
+ <!-- [![Coverage Report](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/coverage.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main) -->
6
13
 
7
14
  ## Overview
8
15
 
@@ -5,14 +5,15 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="audio_scribe",
8
- version="0.1.0",
8
+ version="0.1.2",
9
9
  author="Gurasis Osahan",
10
10
  author_email="contact@genomicops.com",
11
11
  description="A command-line tool for audio transcription with Whisper and Pyannote.",
12
12
  long_description=long_description,
13
13
  long_description_content_type="text/markdown",
14
14
  url="https://gitlab.genomicops.cloud/genomicops/audio-scribe",
15
- packages=setuptools.find_packages(),
15
+ package_dir={"": "src"},
16
+ packages=setuptools.find_packages(where="src"),
16
17
  python_requires=">=3.8",
17
18
  install_requires=[
18
19
  "torch",
@@ -26,9 +27,7 @@ setuptools.setup(
26
27
  "GPUtil",
27
28
  ],
28
29
  entry_points={
29
- "console_scripts": [
30
- "audio-scribe=audio_scribe.cli:main",
31
- ]
30
+ "console_scripts": ["audio-scribe=audio_scribe.transcriber:main"]
32
31
  },
33
32
  keywords="whisper pyannote transcription audio diarization",
34
33
  license="Apache-2.0",
@@ -49,4 +48,4 @@ setuptools.setup(
49
48
  "Programming Language :: Python :: 3.10",
50
49
  "Operating System :: OS Independent",
51
50
  ],
52
- )
51
+ )
@@ -0,0 +1,24 @@
1
+ """
2
+ Audio Scribe
3
+ -----------------
4
+ A Python package for transcribing audio files with speaker diarization
5
+ using Whisper and Pyannote.
6
+ """
7
+
8
+ from .transcriber import main
9
+ from .models import TranscriptionPipeline, AudioProcessor
10
+ from .config import TranscriptionConfig
11
+ from .auth import TokenManager
12
+ from .utils import DependencyManager, complete_path
13
+
14
+ __version__ = "0.1.2"
15
+
16
+ __all__ = [
17
+ "main",
18
+ "TranscriptionPipeline",
19
+ "TranscriptionConfig",
20
+ "AudioProcessor",
21
+ "TokenManager",
22
+ "DependencyManager",
23
+ "complete_path",
24
+ ]
@@ -0,0 +1,119 @@
1
+ """Authentication and token management for Audio Scribe."""
2
+
3
+ import os
4
+ import json
5
+ import base64
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Optional
9
+ from cryptography.fernet import Fernet
10
+ from cryptography.hazmat.primitives import hashes
11
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class TokenManager:
16
+ """Handles secure storage and retrieval of the Hugging Face authentication token."""
17
+ def __init__(self):
18
+ # Store config in ~/.pyannote/config.json
19
+ self.config_dir = Path.home() / ".pyannote"
20
+ self.config_file = self.config_dir / "config.json"
21
+ self._initialize_config()
22
+
23
+ def _initialize_config(self) -> None:
24
+ """Initialize configuration directory and file with secure permissions."""
25
+ self.config_dir.mkdir(exist_ok=True)
26
+ if not self.config_file.exists():
27
+ self._save_config({})
28
+
29
+ # Set secure file and directory permissions on POSIX systems
30
+ if os.name == "posix":
31
+ os.chmod(self.config_dir, 0o700)
32
+ os.chmod(self.config_file, 0o600)
33
+
34
+ def _get_encryption_key(self) -> bytes:
35
+ """Generate an encryption key from system-specific data."""
36
+ salt = b"pyannote-audio-salt"
37
+ kdf = PBKDF2HMAC(
38
+ algorithm=hashes.SHA256(),
39
+ length=32,
40
+ salt=salt,
41
+ iterations=100000,
42
+ )
43
+ key = kdf.derive(str(Path.home()).encode())
44
+ return base64.urlsafe_b64encode(key)
45
+
46
+ def _save_config(self, config: dict) -> None:
47
+ """Securely save configuration to file."""
48
+ with open(self.config_file, "w", encoding="utf-8") as f:
49
+ json.dump(config, f)
50
+
51
+ def _load_config(self) -> dict:
52
+ """Load configuration from file."""
53
+ try:
54
+ with open(self.config_file, "r", encoding="utf-8") as f:
55
+ return json.load(f)
56
+ except Exception:
57
+ return {}
58
+
59
+ def store_token(self, token: str) -> bool:
60
+ """Securely store authentication token."""
61
+ try:
62
+ fernet = Fernet(self._get_encryption_key())
63
+ encrypted_token = fernet.encrypt(token.encode())
64
+
65
+ config = self._load_config()
66
+ config["token"] = encrypted_token.decode()
67
+
68
+ self._save_config(config)
69
+ return True
70
+ except Exception as e:
71
+ logger.error(f"Failed to store token: {e}")
72
+ return False
73
+
74
+ def retrieve_token(self) -> Optional[str]:
75
+ """Retrieve stored authentication token."""
76
+ try:
77
+ config = self._load_config()
78
+ if "token" in config:
79
+ fernet = Fernet(self._get_encryption_key())
80
+ return fernet.decrypt(config["token"].encode()).decode()
81
+ except Exception as e:
82
+ logger.error(f"Failed to retrieve token: {e}")
83
+ return None
84
+
85
+ def delete_token(self) -> bool:
86
+ """Delete stored authentication token."""
87
+ try:
88
+ config = self._load_config()
89
+ if "token" in config:
90
+ del config["token"]
91
+ self._save_config(config)
92
+ return True
93
+ except Exception as e:
94
+ logger.error(f"Failed to delete token: {e}")
95
+ return False
96
+
97
+ def get_token(token_manager: TokenManager) -> Optional[str]:
98
+ """Get authentication token from storage or user input."""
99
+ stored_token = token_manager.retrieve_token()
100
+ if stored_token:
101
+ choice = input("\nUse the stored Hugging Face token? (y/n): ").lower().strip()
102
+ if choice == "y":
103
+ return stored_token
104
+
105
+ print("\nA HuggingFace token is required for speaker diarization.")
106
+ print("Get your token at: https://huggingface.co/settings/tokens")
107
+ print("\nEnsure you have accepted:")
108
+ print(" 1. pyannote/segmentation-3.0 conditions")
109
+ print(" 2. pyannote/speaker-diarization-3.1 conditions")
110
+
111
+ token = input("\nEnter HuggingFace token: ").strip()
112
+ if token:
113
+ choice = input("Save token for future use? (y/n): ").lower().strip()
114
+ if choice == "y":
115
+ if token_manager.store_token(token):
116
+ print("Token saved successfully.")
117
+ else:
118
+ print("Failed to save token. It will be used for this session only.")
119
+ return token if token else None
@@ -0,0 +1,24 @@
1
+ """Configuration management for Audio Scribe."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ import torch
7
+
8
+ @dataclass
9
+ class TranscriptionConfig:
10
+ """Configuration settings for the transcription pipeline."""
11
+ output_directory: Path
12
+ whisper_model: str = "base.en"
13
+ diarization_model: str = "pyannote/speaker-diarization-3.1"
14
+ temp_directory: Optional[Path] = None
15
+ device: Optional[str] = None
16
+
17
+ def __post_init__(self):
18
+ # Use CUDA if available, else fall back to CPU
19
+ self.device = self.device or ("cuda" if torch.cuda.is_available() else "cpu")
20
+ # Default temp directory inside the output directory
21
+ self.temp_directory = self.temp_directory or (self.output_directory / "temp")
22
+ # Ensure directories exist
23
+ self.temp_directory.mkdir(parents=True, exist_ok=True)
24
+ self.output_directory.mkdir(parents=True, exist_ok=True)
@@ -0,0 +1,196 @@
1
+ """Model handling and audio processing for Audio Scribe."""
2
+
3
+ import wave
4
+ import torch
5
+ import whisper
6
+ import logging
7
+ import warnings
8
+ import threading
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Optional
12
+ from pyannote.audio import Pipeline
13
+
14
+ from .config import TranscriptionConfig
15
+ from .auth import TokenManager
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ try:
20
+ from alive_progress import alive_bar
21
+ import psutil
22
+ import GPUtil
23
+ HAVE_PROGRESS_SUPPORT = True
24
+ except ImportError:
25
+ HAVE_PROGRESS_SUPPORT = False
26
+
27
+
28
+ class AudioProcessor:
29
+ """Handles audio file processing and segmentation."""
30
+
31
+ def __init__(self, config: TranscriptionConfig):
32
+ self.config = config
33
+
34
+ def load_audio_segment(
35
+ self,
36
+ audio_path: Path,
37
+ start_time: float,
38
+ end_time: float,
39
+ output_path: Path,
40
+ ) -> bool:
41
+ """Extract and save the audio segment from start_time to end_time."""
42
+ try:
43
+ with wave.open(str(audio_path), "rb") as infile:
44
+ params = infile.getparams()
45
+ frame_rate = params.framerate
46
+ start_frame = int(start_time * frame_rate)
47
+ end_frame = min(int(end_time * frame_rate), infile.getnframes())
48
+
49
+ infile.setpos(start_frame)
50
+ frames = infile.readframes(end_frame - start_frame)
51
+
52
+ with wave.open(str(output_path), "wb") as outfile:
53
+ outfile.setparams(params)
54
+ outfile.writeframes(frames)
55
+ return True
56
+ except Exception as e:
57
+ logger.error(f"Failed to process audio segment: {e}")
58
+ return False
59
+
60
+
61
+ class TranscriptionPipeline:
62
+ """Main pipeline for audio transcription and speaker diarization."""
63
+
64
+ def __init__(self, config: TranscriptionConfig):
65
+ self.config = config
66
+ self.diarization_pipeline = None
67
+ self.whisper_model = None
68
+ self.token_manager = TokenManager()
69
+ self._running = False # used for resource monitor thread
70
+
71
+ def initialize_models(self, auth_token: str) -> bool:
72
+ """Initialize the Pyannote diarization pipeline and Whisper model."""
73
+ try:
74
+ # Load Whisper model
75
+ self.whisper_model = whisper.load_model(
76
+ self.config.whisper_model,
77
+ device=self.config.device,
78
+ download_root=str(self.config.output_directory / "models"),
79
+ )
80
+
81
+ # Load Pyannote diarization pipeline
82
+ self.diarization_pipeline = Pipeline.from_pretrained(
83
+ self.config.diarization_model,
84
+ use_auth_token=auth_token
85
+ )
86
+ self.diarization_pipeline.to(torch.device(self.config.device))
87
+
88
+ if self.config.device == "cpu":
89
+ warnings.warn("Running on CPU. GPU is recommended for better performance.")
90
+
91
+ return True
92
+ except Exception as e:
93
+ logger.error(f"Model initialization failed: {e}")
94
+ logger.error("Please ensure you have accepted the model conditions at:")
95
+ logger.error(" 1. https://huggingface.co/pyannote/segmentation-3.0")
96
+ logger.error(" 2. https://huggingface.co/pyannote/speaker-diarization-3.1")
97
+ return False
98
+
99
+ def _update_resources(self, bar):
100
+ """Update progress bar with resource usage information."""
101
+ while self._running:
102
+ try:
103
+ import time
104
+ time.sleep(0.5)
105
+
106
+ cpu_usage = psutil.cpu_percent(interval=None) if HAVE_PROGRESS_SUPPORT else 0
107
+ memory_usage = psutil.virtual_memory().percent if HAVE_PROGRESS_SUPPORT else 0
108
+
109
+ if HAVE_PROGRESS_SUPPORT and GPUtil.getGPUs():
110
+ gpus = GPUtil.getGPUs()
111
+ gpu_mem_used = f"{gpus[0].memoryUsed:.0f}"
112
+ gpu_mem_total = f"{gpus[0].memoryTotal:.0f}"
113
+ gpu_usage_text = f"{gpu_mem_used}/{gpu_mem_total} MB"
114
+ else:
115
+ gpu_usage_text = "N/A"
116
+
117
+ resource_text = f"CPU: {cpu_usage}%, MEM: {memory_usage}%, GPU Mem: {gpu_usage_text}"
118
+ bar.text(resource_text)
119
+ except Exception as e:
120
+ logger.error(f"Resource monitoring error: {e}")
121
+
122
+ def process_file(self, audio_path: Path) -> bool:
123
+ """Diarize, segment, and transcribe using Whisper + Pyannote with progress feedback."""
124
+ try:
125
+ logger.info("Starting audio processing...")
126
+ diarization = self.diarization_pipeline(str(audio_path))
127
+ segments = list(diarization.itertracks(yield_label=True))
128
+ total_segments = len(segments)
129
+
130
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
131
+ output_file = self.config.output_directory / f"transcript_{timestamp}.txt"
132
+ audio_processor = AudioProcessor(self.config)
133
+
134
+ if not HAVE_PROGRESS_SUPPORT:
135
+ # No alive_progress, psutil, or GPUtil installed
136
+ logger.info("Processing audio without progress bar (missing optional packages).")
137
+ with output_file.open("w", encoding="utf-8") as f:
138
+ for turn, _, speaker in segments:
139
+ segment_path = (
140
+ self.config.temp_directory
141
+ / f"segment_{speaker}_{turn.start:.2f}_{turn.end:.2f}.wav"
142
+ )
143
+ if audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
144
+ transcription = self.whisper_model.transcribe(str(segment_path))["text"]
145
+ segment_path.unlink(missing_ok=True)
146
+
147
+ line = f"[{turn.start:.2f}s - {turn.end:.2f}s] Speaker {speaker}: {transcription.strip()}\n"
148
+ f.write(line)
149
+ logger.info(line.strip())
150
+ return True
151
+ else:
152
+ # Use a progress bar to track segment transcription
153
+ from alive_progress import alive_bar
154
+ import threading
155
+
156
+ self._running = True
157
+ with output_file.open("w", encoding="utf-8") as f, alive_bar(
158
+ total_segments,
159
+ title="Transcribing Audio",
160
+ spinner="pulse",
161
+ theme="classic",
162
+ stats=False,
163
+ elapsed=True,
164
+ monitor=True,
165
+ ) as bar:
166
+
167
+ # Start a background thread for resource monitoring
168
+ resource_thread = threading.Thread(target=self._update_resources, args=(bar,))
169
+ resource_thread.start()
170
+
171
+ for turn, _, speaker in segments:
172
+ segment_path = (
173
+ self.config.temp_directory
174
+ / f"segment_{speaker}_{turn.start:.2f}_{turn.end:.2f}.wav"
175
+ )
176
+ if audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
177
+ transcription = self.whisper_model.transcribe(str(segment_path))["text"]
178
+ segment_path.unlink(missing_ok=True)
179
+
180
+ line = f"[{turn.start:.2f}s - {turn.end:.2f}s] Speaker {speaker}: {transcription.strip()}\n"
181
+ f.write(line)
182
+ logger.info(line.strip())
183
+
184
+ # Update the progress bar
185
+ bar()
186
+
187
+ # Stop resource monitoring
188
+ self._running = False
189
+ resource_thread.join()
190
+
191
+ logger.info(f"Transcription completed. Output saved to: {output_file}")
192
+ return True
193
+
194
+ except Exception as e:
195
+ logger.error(f"Processing failed: {e}")
196
+ return False
@@ -0,0 +1,131 @@
1
+ """
2
+ Main entry point for Audio Scribe transcription tool.
3
+ Handles CLI interface and orchestrates the transcription process.
4
+ """
5
+
6
+ import sys
7
+ import logging
8
+ import warnings
9
+ import argparse
10
+ import readline
11
+ from pathlib import Path
12
+ from datetime import datetime
13
+
14
+ from .config import TranscriptionConfig
15
+ from .models import TranscriptionPipeline
16
+ from .auth import TokenManager, get_token
17
+ from .utils import DependencyManager, complete_path
18
+
19
+ # Configure logging
20
+ LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format=LOG_FORMAT,
24
+ handlers=[
25
+ logging.StreamHandler(),
26
+ logging.FileHandler("transcription.log", mode="a", encoding="utf-8"),
27
+ ],
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def main():
33
+ """Main entry point for the Audio Scribe CLI."""
34
+ print("Initializing environment... Please wait while we load dependencies and models.")
35
+ sys.stdout.flush()
36
+
37
+ parser = argparse.ArgumentParser(
38
+ description="Audio Transcription Pipeline using Whisper + Pyannote, with optional progress bar."
39
+ )
40
+ parser.add_argument(
41
+ "--audio",
42
+ type=Path,
43
+ help="Path to the audio file to transcribe."
44
+ )
45
+ parser.add_argument(
46
+ "--token",
47
+ help="HuggingFace API token. Overrides any saved token."
48
+ )
49
+ parser.add_argument(
50
+ "--output",
51
+ type=Path,
52
+ help="Path to the output directory for transcripts and temporary files.",
53
+ )
54
+ parser.add_argument(
55
+ "--delete-token",
56
+ action="store_true",
57
+ help="Delete any stored Hugging Face token and exit.",
58
+ )
59
+ parser.add_argument(
60
+ "--show-warnings",
61
+ action="store_true",
62
+ help="Enable user warnings (e.g., from pyannote.audio). Disabled by default.",
63
+ )
64
+ parser.add_argument(
65
+ "--whisper-model",
66
+ default="base.en",
67
+ help="Specify the Whisper model to use (default: 'base.en').",
68
+ )
69
+ args = parser.parse_args()
70
+
71
+ # Manage user warnings
72
+ if not args.show_warnings:
73
+ warnings.filterwarnings("ignore", category=UserWarning, module=r"pyannote\.audio")
74
+ warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
75
+ else:
76
+ warnings.resetwarnings()
77
+
78
+ # Check dependencies
79
+ if not DependencyManager.verify_dependencies():
80
+ sys.exit(1)
81
+
82
+ # Initialize tab-completion for file paths
83
+ readline.set_completer_delims(' \t\n;')
84
+ readline.set_completer(complete_path)
85
+ readline.parse_and_bind("tab: complete")
86
+
87
+ # Initialize the token manager
88
+ token_manager = TokenManager()
89
+
90
+ # If user wants to delete the stored token, do so and exit
91
+ if args.delete_token:
92
+ success = token_manager.delete_token()
93
+ sys.exit(0 if success else 1)
94
+
95
+ # Prepare configuration
96
+ output_dir = args.output or (Path("transcripts") / datetime.now().strftime("%Y%m%d"))
97
+ config = TranscriptionConfig(
98
+ output_directory=output_dir,
99
+ whisper_model=args.whisper_model
100
+ )
101
+
102
+ # Initialize pipeline
103
+ pipeline = TranscriptionPipeline(config)
104
+ hf_token = args.token or get_token(token_manager)
105
+ if not hf_token:
106
+ logger.error("No Hugging Face token provided. Exiting.")
107
+ sys.exit(1)
108
+
109
+ # Initialize models
110
+ if not pipeline.initialize_models(hf_token):
111
+ logger.error("Failed to initialize pipeline. Exiting.")
112
+ sys.exit(1)
113
+
114
+ # Prompt user for audio file path if not passed in
115
+ audio_path = args.audio
116
+ while not audio_path or not audio_path.exists():
117
+ audio_path_str = input("\nEnter path to audio file (Tab for autocomplete): ").strip()
118
+ audio_path = Path(audio_path_str)
119
+ if not audio_path.exists():
120
+ print(f"File '{audio_path}' not found. Please try again.")
121
+
122
+ print("Audio file path accepted. Preparing to process the audio...")
123
+ sys.stdout.flush()
124
+
125
+ # Process the audio file
126
+ if not pipeline.process_file(audio_path):
127
+ sys.exit(1)
128
+
129
+
130
+ if __name__ == "__main__":
131
+ main()
@@ -0,0 +1,93 @@
1
+ """Utility functions and classes for Audio Scribe."""
2
+
3
+ import os
4
+ import glob
5
+ import logging
6
+ import importlib.metadata
7
+ from importlib.metadata import PackageNotFoundError
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def complete_path(text, state):
12
+ """
13
+ Return the 'state'-th completion for 'text'.
14
+ This function will be used by 'readline' to enable file path autocompletion.
15
+ """
16
+ # If the user typed a glob pattern (with * or ?)
17
+ if '*' in text or '?' in text:
18
+ matches = sorted(glob.glob(text))
19
+ else:
20
+ # Split off the directory name and partial file/directory name
21
+ directory, partial = os.path.split(text)
22
+ if not directory:
23
+ directory = '.'
24
+ try:
25
+ # List everything in 'directory' that starts with 'partial'
26
+ entries = sorted(os.listdir(directory))
27
+ except OSError:
28
+ # If directory doesn't exist or we lack permission, no matches
29
+ entries = []
30
+
31
+ matches = []
32
+ for entry in entries:
33
+ if entry.startswith(partial):
34
+ if directory == '.':
35
+ # Don't prefix current directory paths
36
+ full_path = entry
37
+ else:
38
+ # Keep the directory prefix for subdirectories
39
+ full_path = os.path.join(directory, entry)
40
+
41
+ # If it's a directory, add a trailing slash to indicate that
42
+ if os.path.isdir(full_path) and not full_path.endswith(os.path.sep):
43
+ full_path += os.path.sep
44
+ matches.append(full_path)
45
+
46
+ # If 'state' is beyond last match, return None
47
+ return matches[state] if state < len(matches) else None
48
+
49
+
50
+ class DependencyManager:
51
+ """Manages and verifies system dependencies."""
52
+
53
+ REQUIRED_PACKAGES = {
54
+ "torch": None,
55
+ "pyannote.audio": None,
56
+ "openai-whisper": None,
57
+ "pytorch-lightning": None,
58
+ "keyring": None,
59
+ }
60
+
61
+ @classmethod
62
+ def verify_dependencies(cls) -> bool:
63
+ """
64
+ Verify all required dependencies are installed with correct versions.
65
+ Returns True if all are installed and correct, False otherwise.
66
+ """
67
+ missing = []
68
+ outdated = []
69
+
70
+ for package, required_version in cls.REQUIRED_PACKAGES.items():
71
+ try:
72
+ installed_version = importlib.metadata.version(package)
73
+ if required_version and installed_version != required_version:
74
+ outdated.append(
75
+ f"{package} (installed: {installed_version}, required: {required_version})"
76
+ )
77
+ except PackageNotFoundError:
78
+ missing.append(package)
79
+
80
+ if missing or outdated:
81
+ if missing:
82
+ logger.error("Missing packages: %s", ", ".join(missing))
83
+ if outdated:
84
+ logger.error("Outdated packages: %s", ", ".join(outdated))
85
+ logger.info(
86
+ "Install required packages: pip install %s",
87
+ " ".join(
88
+ f"{pkg}=={ver}" if ver else pkg
89
+ for pkg, ver in cls.REQUIRED_PACKAGES.items()
90
+ ),
91
+ )
92
+ return False
93
+ return True