audio-scribe 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audio_scribe/__init__.py +6 -6
- audio_scribe/models.py +2 -2
- audio_scribe/transcriber.py +296 -58
- {audio_scribe-0.1.5.dist-info → audio_scribe-0.1.6.dist-info}/METADATA +96 -23
- audio_scribe-0.1.6.dist-info/RECORD +12 -0
- {audio_scribe-0.1.5.dist-info → audio_scribe-0.1.6.dist-info}/WHEEL +1 -1
- {audio_scribe-0.1.5.dist-info → audio_scribe-0.1.6.dist-info}/entry_points.txt +1 -0
- audio_scribe-0.1.5.dist-info/RECORD +0 -12
- {audio_scribe-0.1.5.dist-info → audio_scribe-0.1.6.dist-info/licenses}/LICENSE +0 -0
- {audio_scribe-0.1.5.dist-info → audio_scribe-0.1.6.dist-info}/top_level.txt +0 -0
audio_scribe/__init__.py
CHANGED
@@ -5,13 +5,13 @@ A Python package for transcribing audio files with speaker diarization
|
|
5
5
|
using Whisper and Pyannote.
|
6
6
|
"""
|
7
7
|
|
8
|
-
from .transcriber import main
|
9
|
-
from .models import TranscriptionPipeline, AudioProcessor
|
10
|
-
from .config import TranscriptionConfig
|
11
|
-
from .auth import TokenManager
|
12
|
-
from .utils import DependencyManager, complete_path
|
8
|
+
from audio_scribe.transcriber import main
|
9
|
+
from audio_scribe.models import TranscriptionPipeline, AudioProcessor
|
10
|
+
from audio_scribe.config import TranscriptionConfig
|
11
|
+
from audio_scribe.auth import TokenManager
|
12
|
+
from audio_scribe.utils import DependencyManager, complete_path
|
13
13
|
|
14
|
-
__version__ = "0.1.
|
14
|
+
__version__ = "0.1.6"
|
15
15
|
|
16
16
|
__all__ = [
|
17
17
|
"main",
|
audio_scribe/models.py
CHANGED
@@ -11,8 +11,8 @@ from datetime import datetime
|
|
11
11
|
from pathlib import Path
|
12
12
|
from pyannote.audio import Pipeline # type: ignore
|
13
13
|
|
14
|
-
from .config import TranscriptionConfig
|
15
|
-
from .auth import TokenManager
|
14
|
+
from audio_scribe.config import TranscriptionConfig
|
15
|
+
from audio_scribe.auth import TokenManager
|
16
16
|
|
17
17
|
logger = logging.getLogger(__name__)
|
18
18
|
|
audio_scribe/transcriber.py
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
|
2
|
+
Audio Scribe - Professional Audio Transcription Tool
|
3
|
+
|
4
|
+
Main entry point for the Audio Scribe transcription system.
|
5
|
+
Provides a comprehensive CLI interface for audio transcription using
|
6
|
+
Whisper speech recognition and Pyannote speaker diarization.
|
7
|
+
|
8
|
+
Author: Gurasis Osahan
|
9
|
+
Organization: GenomicOps
|
10
|
+
License: Apache-2.0
|
4
11
|
"""
|
5
12
|
|
6
13
|
import sys
|
@@ -11,123 +18,354 @@ import readline
|
|
11
18
|
from pathlib import Path
|
12
19
|
from datetime import datetime
|
13
20
|
|
14
|
-
from .config import TranscriptionConfig
|
15
|
-
from .models import TranscriptionPipeline
|
16
|
-
from .auth import TokenManager, get_token
|
17
|
-
from .utils import DependencyManager, complete_path
|
21
|
+
from audio_scribe.config import TranscriptionConfig
|
22
|
+
from audio_scribe.models import TranscriptionPipeline
|
23
|
+
from audio_scribe.auth import TokenManager, get_token
|
24
|
+
from audio_scribe.utils import DependencyManager, complete_path
|
25
|
+
|
26
|
+
# Import version information
|
27
|
+
try:
|
28
|
+
from audio_scribe import __version__
|
29
|
+
except ImportError:
|
30
|
+
__version__ = "unknown"
|
31
|
+
|
32
|
+
# Configure professional logging
|
33
|
+
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
34
|
+
LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
18
35
|
|
19
|
-
# Configure logging
|
20
|
-
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
|
21
36
|
logging.basicConfig(
|
22
37
|
level=logging.INFO,
|
23
38
|
format=LOG_FORMAT,
|
39
|
+
datefmt=LOG_DATE_FORMAT,
|
24
40
|
handlers=[
|
25
|
-
logging.StreamHandler(),
|
26
|
-
logging.FileHandler("
|
41
|
+
logging.StreamHandler(sys.stdout),
|
42
|
+
logging.FileHandler("audio_scribe.log", mode="a", encoding="utf-8"),
|
27
43
|
],
|
28
44
|
)
|
45
|
+
|
29
46
|
logger = logging.getLogger(__name__)
|
30
47
|
|
31
48
|
|
32
|
-
def
|
33
|
-
"""
|
34
|
-
|
35
|
-
"Initializing environment... Please wait while we load dependencies and models."
|
36
|
-
)
|
37
|
-
sys.stdout.flush()
|
49
|
+
def setup_argument_parser():
|
50
|
+
"""
|
51
|
+
Configure and return the command-line argument parser.
|
38
52
|
|
53
|
+
Returns:
|
54
|
+
argparse.ArgumentParser: Configured argument parser
|
55
|
+
"""
|
39
56
|
parser = argparse.ArgumentParser(
|
40
|
-
|
57
|
+
prog="audio-scribe",
|
58
|
+
description=(
|
59
|
+
"Audio Scribe - Professional audio transcription tool utilizing "
|
60
|
+
"OpenAI Whisper for speech recognition and Pyannote for speaker diarization. "
|
61
|
+
"Processes audio files to generate accurate, timestamped transcripts with "
|
62
|
+
"speaker identification."
|
63
|
+
),
|
64
|
+
epilog=(
|
65
|
+
"For more information and documentation, visit: "
|
66
|
+
"https://gitlab.genomicops.cloud/innovation-hub/audio-scribe"
|
67
|
+
),
|
68
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
41
69
|
)
|
70
|
+
|
71
|
+
# Version information
|
42
72
|
parser.add_argument(
|
43
|
-
"--
|
73
|
+
"--version",
|
74
|
+
action="version",
|
75
|
+
version=f"%(prog)s {__version__}",
|
76
|
+
help="Display version information and exit",
|
44
77
|
)
|
78
|
+
|
79
|
+
# Core functionality arguments
|
45
80
|
parser.add_argument(
|
46
|
-
"--
|
81
|
+
"--audio",
|
82
|
+
type=Path,
|
83
|
+
metavar="PATH",
|
84
|
+
help=(
|
85
|
+
"Path to the input audio file for transcription. "
|
86
|
+
"Supports common audio formats (WAV, MP3, MP4, FLAC, etc.). "
|
87
|
+
"If not provided, you will be prompted to enter the path interactively."
|
88
|
+
),
|
47
89
|
)
|
90
|
+
|
48
91
|
parser.add_argument(
|
49
92
|
"--output",
|
50
93
|
type=Path,
|
51
|
-
|
94
|
+
metavar="DIRECTORY",
|
95
|
+
help=(
|
96
|
+
"Output directory for transcription results and temporary files. "
|
97
|
+
"Creates timestamped subdirectories to organize outputs. "
|
98
|
+
"Default: ./transcripts/YYYYMMDD/"
|
99
|
+
),
|
52
100
|
)
|
101
|
+
|
102
|
+
# Authentication and configuration
|
103
|
+
parser.add_argument(
|
104
|
+
"--token",
|
105
|
+
metavar="TOKEN",
|
106
|
+
help=(
|
107
|
+
"HuggingFace API token for accessing Pyannote models. "
|
108
|
+
"Required for speaker diarization functionality. "
|
109
|
+
"Overrides any previously saved token. "
|
110
|
+
"Obtain from: https://huggingface.co/settings/tokens"
|
111
|
+
),
|
112
|
+
)
|
113
|
+
|
53
114
|
parser.add_argument(
|
54
115
|
"--delete-token",
|
55
116
|
action="store_true",
|
56
|
-
help=
|
117
|
+
help=(
|
118
|
+
"Remove any stored HuggingFace token from the system keyring and exit. "
|
119
|
+
"Useful for switching between different HuggingFace accounts or "
|
120
|
+
"clearing credentials for security purposes."
|
121
|
+
),
|
122
|
+
)
|
123
|
+
|
124
|
+
# Model and processing options
|
125
|
+
parser.add_argument(
|
126
|
+
"--whisper-model",
|
127
|
+
default="base.en",
|
128
|
+
choices=[
|
129
|
+
"tiny",
|
130
|
+
"tiny.en",
|
131
|
+
"base",
|
132
|
+
"base.en",
|
133
|
+
"small",
|
134
|
+
"small.en",
|
135
|
+
"medium",
|
136
|
+
"medium.en",
|
137
|
+
"large",
|
138
|
+
"turbo",
|
139
|
+
],
|
140
|
+
metavar="MODEL",
|
141
|
+
help=(
|
142
|
+
"Whisper model for speech recognition (default: base.en). "
|
143
|
+
"Larger models provide better accuracy but require more processing time and memory. "
|
144
|
+
"English-specific models (.en) are optimized for English-only content. "
|
145
|
+
"Available: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, turbo"
|
146
|
+
),
|
57
147
|
)
|
148
|
+
|
149
|
+
# Debug and development options
|
58
150
|
parser.add_argument(
|
59
151
|
"--show-warnings",
|
60
152
|
action="store_true",
|
61
|
-
help=
|
153
|
+
help=(
|
154
|
+
"Enable display of library warnings during processing. "
|
155
|
+
"Warnings are suppressed by default to provide cleaner output. "
|
156
|
+
"Enable for debugging or development purposes."
|
157
|
+
),
|
62
158
|
)
|
159
|
+
|
63
160
|
parser.add_argument(
|
64
|
-
"--
|
65
|
-
|
66
|
-
|
161
|
+
"--verbose",
|
162
|
+
"-v",
|
163
|
+
action="store_true",
|
164
|
+
help="Enable verbose logging output for detailed processing information",
|
67
165
|
)
|
166
|
+
|
167
|
+
parser.add_argument(
|
168
|
+
"--quiet",
|
169
|
+
"-q",
|
170
|
+
action="store_true",
|
171
|
+
help="Suppress non-essential output messages",
|
172
|
+
)
|
173
|
+
|
174
|
+
return parser
|
175
|
+
|
176
|
+
|
177
|
+
def configure_logging(verbose: bool, quiet: bool):
|
178
|
+
"""
|
179
|
+
Configure logging levels based on user preferences.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
verbose (bool): Enable verbose logging
|
183
|
+
quiet (bool): Enable quiet mode
|
184
|
+
"""
|
185
|
+
if quiet and verbose:
|
186
|
+
logger.warning("Both --quiet and --verbose specified. Using verbose mode.")
|
187
|
+
quiet = False
|
188
|
+
|
189
|
+
if verbose:
|
190
|
+
logging.getLogger().setLevel(logging.DEBUG)
|
191
|
+
logger.debug("Verbose logging enabled")
|
192
|
+
elif quiet:
|
193
|
+
logging.getLogger().setLevel(logging.WARNING)
|
194
|
+
else:
|
195
|
+
logging.getLogger().setLevel(logging.INFO)
|
196
|
+
|
197
|
+
|
198
|
+
def initialize_environment():
|
199
|
+
"""
|
200
|
+
Initialize the application environment and dependencies.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
bool: True if initialization successful, False otherwise
|
204
|
+
"""
|
205
|
+
logger.info("Initializing Audio Scribe environment")
|
206
|
+
|
207
|
+
# Verify system dependencies
|
208
|
+
if not DependencyManager.verify_dependencies():
|
209
|
+
logger.error("Dependency verification failed")
|
210
|
+
return False
|
211
|
+
|
212
|
+
# Configure tab completion for file paths
|
213
|
+
try:
|
214
|
+
readline.set_completer_delims(" \t\n;")
|
215
|
+
readline.set_completer(complete_path)
|
216
|
+
readline.parse_and_bind("tab: complete")
|
217
|
+
logger.debug("Tab completion configured successfully")
|
218
|
+
except Exception as e:
|
219
|
+
logger.warning(f"Failed to configure tab completion: {e}")
|
220
|
+
|
221
|
+
return True
|
222
|
+
|
223
|
+
|
224
|
+
def get_audio_file_path(provided_path: Path | None = None) -> Path:
|
225
|
+
"""
|
226
|
+
Get and validate the audio file path from user input or arguments.
|
227
|
+
|
228
|
+
Args:
|
229
|
+
provided_path (Path, optional): Path provided via command line
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
Path: Validated audio file path
|
233
|
+
"""
|
234
|
+
audio_path = provided_path
|
235
|
+
|
236
|
+
while not audio_path or not audio_path.exists():
|
237
|
+
try:
|
238
|
+
audio_path_str = input(
|
239
|
+
"\nEnter path to audio file (Tab for autocomplete): "
|
240
|
+
).strip()
|
241
|
+
|
242
|
+
if not audio_path_str:
|
243
|
+
logger.warning("No path provided. Please enter a valid file path.")
|
244
|
+
continue
|
245
|
+
|
246
|
+
audio_path = Path(audio_path_str)
|
247
|
+
|
248
|
+
if not audio_path.exists():
|
249
|
+
logger.error(
|
250
|
+
f"File '{audio_path}' not found. Please verify the path and try again."
|
251
|
+
)
|
252
|
+
|
253
|
+
except KeyboardInterrupt:
|
254
|
+
logger.info("\nOperation cancelled by user")
|
255
|
+
sys.exit(0)
|
256
|
+
except Exception as e:
|
257
|
+
logger.error(f"Error processing file path: {e}")
|
258
|
+
|
259
|
+
logger.info(f"Audio file validated: {audio_path}")
|
260
|
+
return audio_path
|
261
|
+
|
262
|
+
|
263
|
+
def main():
|
264
|
+
"""
|
265
|
+
Main entry point for the Audio Scribe CLI application.
|
266
|
+
|
267
|
+
Orchestrates the complete transcription workflow including:
|
268
|
+
- Argument parsing and validation
|
269
|
+
- Environment initialization
|
270
|
+
- Authentication management
|
271
|
+
- Model initialization
|
272
|
+
- Audio file processing
|
273
|
+
"""
|
274
|
+
# Parse command line arguments
|
275
|
+
parser = setup_argument_parser()
|
68
276
|
args = parser.parse_args()
|
69
277
|
|
70
|
-
#
|
278
|
+
# Configure logging based on user preferences
|
279
|
+
configure_logging(args.verbose, args.quiet)
|
280
|
+
|
281
|
+
# Handle token deletion request
|
282
|
+
if args.delete_token:
|
283
|
+
token_manager = TokenManager()
|
284
|
+
success = token_manager.delete_token()
|
285
|
+
if success:
|
286
|
+
logger.info("HuggingFace token successfully removed")
|
287
|
+
else:
|
288
|
+
logger.error("Failed to remove HuggingFace token")
|
289
|
+
sys.exit(0 if success else 1)
|
290
|
+
|
291
|
+
# Display startup information
|
292
|
+
if not args.quiet:
|
293
|
+
print(f"Audio Scribe v{__version__}")
|
294
|
+
print("Initializing transcription environment...")
|
295
|
+
sys.stdout.flush()
|
296
|
+
|
297
|
+
# Configure warning display
|
71
298
|
if not args.show_warnings:
|
72
299
|
warnings.filterwarnings(
|
73
300
|
"ignore", category=UserWarning, module=r"pyannote\.audio"
|
74
301
|
)
|
75
302
|
warnings.filterwarnings("ignore", category=FutureWarning, module="whisper")
|
303
|
+
logger.debug("Library warnings suppressed")
|
76
304
|
else:
|
77
305
|
warnings.resetwarnings()
|
306
|
+
logger.debug("Library warnings enabled")
|
78
307
|
|
79
|
-
#
|
80
|
-
if not
|
308
|
+
# Initialize environment
|
309
|
+
if not initialize_environment():
|
310
|
+
logger.critical("Environment initialization failed")
|
81
311
|
sys.exit(1)
|
82
312
|
|
83
|
-
#
|
84
|
-
readline.set_completer_delims(" \t\n;")
|
85
|
-
readline.set_completer(complete_path)
|
86
|
-
readline.parse_and_bind("tab: complete")
|
87
|
-
|
88
|
-
# Initialize the token manager
|
89
|
-
token_manager = TokenManager()
|
90
|
-
|
91
|
-
# If user wants to delete the stored token, do so and exit
|
92
|
-
if args.delete_token:
|
93
|
-
success = token_manager.delete_token()
|
94
|
-
sys.exit(0 if success else 1)
|
95
|
-
|
96
|
-
# Prepare configuration
|
313
|
+
# Configure output directory
|
97
314
|
output_dir = args.output or (
|
98
315
|
Path("transcripts") / datetime.now().strftime("%Y%m%d")
|
99
316
|
)
|
317
|
+
logger.info(f"Output directory: {output_dir}")
|
318
|
+
|
319
|
+
# Initialize transcription configuration
|
100
320
|
config = TranscriptionConfig(
|
101
321
|
output_directory=output_dir, whisper_model=args.whisper_model
|
102
322
|
)
|
103
323
|
|
104
|
-
# Initialize pipeline
|
324
|
+
# Initialize transcription pipeline
|
325
|
+
logger.info("Initializing transcription pipeline")
|
105
326
|
pipeline = TranscriptionPipeline(config)
|
327
|
+
|
328
|
+
# Handle authentication
|
329
|
+
token_manager = TokenManager()
|
106
330
|
hf_token = args.token or get_token(token_manager)
|
331
|
+
|
107
332
|
if not hf_token:
|
108
|
-
logger.error(
|
333
|
+
logger.error(
|
334
|
+
"HuggingFace token required for speaker diarization. "
|
335
|
+
"Provide via --token argument or interactive prompt."
|
336
|
+
)
|
109
337
|
sys.exit(1)
|
110
338
|
|
111
339
|
# Initialize models
|
340
|
+
logger.info("Loading speech recognition and diarization models")
|
112
341
|
if not pipeline.initialize_models(hf_token):
|
113
|
-
logger.error("
|
342
|
+
logger.error("Model initialization failed")
|
114
343
|
sys.exit(1)
|
115
344
|
|
116
|
-
#
|
117
|
-
audio_path = args.audio
|
118
|
-
while not audio_path or not audio_path.exists():
|
119
|
-
audio_path_str = input(
|
120
|
-
"\nEnter path to audio file (Tab for autocomplete): "
|
121
|
-
).strip()
|
122
|
-
audio_path = Path(audio_path_str)
|
123
|
-
if not audio_path.exists():
|
124
|
-
print(f"File '{audio_path}' not found. Please try again.")
|
125
|
-
|
126
|
-
print("Audio file path accepted. Preparing to process the audio...")
|
127
|
-
sys.stdout.flush()
|
345
|
+
# Get and validate audio file
|
346
|
+
audio_path = get_audio_file_path(args.audio)
|
128
347
|
|
129
348
|
# Process the audio file
|
130
|
-
|
349
|
+
logger.info("Starting transcription process")
|
350
|
+
if not args.quiet:
|
351
|
+
print("Processing audio file. This may take several minutes...")
|
352
|
+
sys.stdout.flush()
|
353
|
+
|
354
|
+
try:
|
355
|
+
success = pipeline.process_file(audio_path)
|
356
|
+
if success:
|
357
|
+
logger.info("Transcription completed successfully")
|
358
|
+
if not args.quiet:
|
359
|
+
print(f"Transcription completed. Results saved to: {output_dir}")
|
360
|
+
else:
|
361
|
+
logger.error("Transcription process failed")
|
362
|
+
sys.exit(1)
|
363
|
+
|
364
|
+
except KeyboardInterrupt:
|
365
|
+
logger.info("Transcription interrupted by user")
|
366
|
+
sys.exit(0)
|
367
|
+
except Exception as e:
|
368
|
+
logger.critical(f"Unexpected error during transcription: {e}")
|
131
369
|
sys.exit(1)
|
132
370
|
|
133
371
|
|
@@ -1,13 +1,13 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: audio_scribe
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: A command-line tool for audio transcription with Whisper and Pyannote.
|
5
|
-
Home-page: https://gitlab.genomicops.cloud/
|
5
|
+
Home-page: https://gitlab.genomicops.cloud/innovation-hub/audio-scribe
|
6
6
|
Author: Gurasis Osahan
|
7
7
|
Author-email: contact@genomicops.com
|
8
8
|
License: Apache-2.0
|
9
|
-
Project-URL: Source, https://gitlab.genomicops.cloud/
|
10
|
-
Project-URL: Tracker, https://gitlab.genomicops.cloud/
|
9
|
+
Project-URL: Source, https://gitlab.genomicops.cloud/innovation-hub/audio-scribe
|
10
|
+
Project-URL: Tracker, https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/issues
|
11
11
|
Keywords: whisper pyannote transcription audio diarization
|
12
12
|
Classifier: Development Status :: 3 - Alpha
|
13
13
|
Classifier: Intended Audience :: Developers
|
@@ -23,15 +23,15 @@ Classifier: Operating System :: OS Independent
|
|
23
23
|
Requires-Python: >=3.8
|
24
24
|
Description-Content-Type: text/markdown
|
25
25
|
License-File: LICENSE
|
26
|
-
Requires-Dist: torch
|
26
|
+
Requires-Dist: torch>=2.7.1
|
27
27
|
Requires-Dist: openai-whisper
|
28
|
-
Requires-Dist: pyannote.audio
|
28
|
+
Requires-Dist: pyannote.audio>=3.3.2
|
29
29
|
Requires-Dist: pytorch-lightning
|
30
|
-
Requires-Dist: keyring
|
30
|
+
Requires-Dist: keyring>=25.6.0
|
31
31
|
Requires-Dist: cryptography
|
32
|
-
Requires-Dist: alive-progress
|
33
|
-
Requires-Dist: psutil
|
34
|
-
Requires-Dist: GPUtil
|
32
|
+
Requires-Dist: alive-progress>=3.2.0
|
33
|
+
Requires-Dist: psutil>=7.0.0
|
34
|
+
Requires-Dist: GPUtil>=1.4.0
|
35
35
|
Dynamic: author
|
36
36
|
Dynamic: author-email
|
37
37
|
Dynamic: classifier
|
@@ -40,6 +40,7 @@ Dynamic: description-content-type
|
|
40
40
|
Dynamic: home-page
|
41
41
|
Dynamic: keywords
|
42
42
|
Dynamic: license
|
43
|
+
Dynamic: license-file
|
43
44
|
Dynamic: project-url
|
44
45
|
Dynamic: requires-dist
|
45
46
|
Dynamic: requires-python
|
@@ -107,11 +108,13 @@ This repository is licensed under the [Apache License 2.0](#license).
|
|
107
108
|
- [Usage](#usage)
|
108
109
|
- [Dependencies](#dependencies)
|
109
110
|
- [Sample `requirements.txt`](#sample-requirementstxt)
|
111
|
+
- [Troubleshooting](#troubleshooting)
|
112
|
+
- [IndexError: list index out of range](#indexerror-list-index-out-of-range)
|
113
|
+
- [Option 1: System-level Installation (requires sudo access)](#option-1-system-level-installation-requires-sudo-access)
|
114
|
+
- [Option 2: Conda-only Installation (no sudo required)](#option-2-conda-only-installation-no-sudo-required)
|
110
115
|
- [Contributing](#contributing)
|
111
116
|
- [License](#license)
|
112
117
|
|
113
|
-
---
|
114
|
-
|
115
118
|
## Features
|
116
119
|
|
117
120
|
- **Whisper Transcription**
|
@@ -127,8 +130,6 @@ This repository is licensed under the [Apache License 2.0](#license).
|
|
127
130
|
- **Configurable Models**
|
128
131
|
Default is `base.en` but you can specify any other Whisper model using `--whisper-model`.
|
129
132
|
|
130
|
-
---
|
131
|
-
|
132
133
|
## Installation
|
133
134
|
|
134
135
|
### Installing from PyPI
|
@@ -157,8 +158,6 @@ pip install -r requirements.txt
|
|
157
158
|
|
158
159
|
This approach is particularly useful if you want the newest changes or plan to contribute.
|
159
160
|
|
160
|
-
---
|
161
|
-
|
162
161
|
## Quick Start
|
163
162
|
|
164
163
|
1. **Obtain a Hugging Face Token**
|
@@ -174,7 +173,6 @@ This approach is particularly useful if you want the newest changes or plan to c
|
|
174
173
|
3. **Watch the Progress Bar**
|
175
174
|
- The tool displays a progress bar for each diarized speaker turn, along with real-time CPU, GPU, and memory usage.
|
176
175
|
|
177
|
-
---
|
178
176
|
|
179
177
|
## Usage
|
180
178
|
|
@@ -222,7 +220,6 @@ optional arguments:
|
|
222
220
|
# When prompted for an audio file path, press Tab to autocomplete
|
223
221
|
```
|
224
222
|
|
225
|
-
---
|
226
223
|
|
227
224
|
## Dependencies
|
228
225
|
|
@@ -258,11 +255,89 @@ GPUtil
|
|
258
255
|
pyreadline3; sys_platform == "win32"
|
259
256
|
```
|
260
257
|
|
261
|
-
> Note:
|
258
|
+
> Note:
|
262
259
|
> - `pyreadline3` is appended with a [PEP 508 marker](https://peps.python.org/pep-0508/) (`; sys_platform == "win32"`) so it only installs on Windows.
|
263
260
|
> - For GPU support, ensure you install a compatible PyTorch version with CUDA.
|
264
261
|
|
265
|
-
|
262
|
+
## Troubleshooting
|
263
|
+
|
264
|
+
### IndexError: list index out of range
|
265
|
+
|
266
|
+
**Symptom**
|
267
|
+
|
268
|
+
You encounter the following error when running `audio-scribe` or importing `pyannote.audio`:
|
269
|
+
|
270
|
+
```
|
271
|
+
IndexError: list index out of range
|
272
|
+
File ".../pyannote/audio/core/io.py", line 214, in __init__
|
273
|
+
backend = "soundfile" if "soundfile" in backends else backends[0]
|
274
|
+
```
|
275
|
+
|
276
|
+
This occurs when `pyannote.audio` is unable to detect any supported audio backend. Most commonly, the `soundfile` module is missing or its dependency `libsndfile` is not properly installed.
|
277
|
+
|
278
|
+
**Solution**
|
279
|
+
|
280
|
+
You have two ways to resolve this issue:
|
281
|
+
|
282
|
+
#### Option 1: System-level Installation (requires sudo access)
|
283
|
+
|
284
|
+
Install the system-level audio backend library:
|
285
|
+
|
286
|
+
```bash
|
287
|
+
sudo apt-get update
|
288
|
+
sudo apt-get install libsndfile1
|
289
|
+
```
|
290
|
+
|
291
|
+
Then reinstall the `soundfile` Python package inside your environment:
|
292
|
+
|
293
|
+
```bash
|
294
|
+
# If using conda
|
295
|
+
conda activate your-environment-name
|
296
|
+
pip uninstall soundfile -y
|
297
|
+
pip install soundfile
|
298
|
+
|
299
|
+
# If using pip/virtualenv
|
300
|
+
source your-venv/bin/activate # or equivalent activation command
|
301
|
+
pip uninstall soundfile -y
|
302
|
+
pip install soundfile
|
303
|
+
```
|
304
|
+
|
305
|
+
#### Option 2: Conda-only Installation (no sudo required)
|
306
|
+
|
307
|
+
Inside your Conda environment:
|
308
|
+
|
309
|
+
```bash
|
310
|
+
conda activate your-environment-name
|
311
|
+
conda install -c conda-forge libsndfile
|
312
|
+
```
|
313
|
+
|
314
|
+
Then ensure Python uses the correct bindings:
|
315
|
+
|
316
|
+
```bash
|
317
|
+
pip uninstall soundfile -y
|
318
|
+
pip install soundfile
|
319
|
+
```
|
320
|
+
|
321
|
+
**Verification**
|
322
|
+
|
323
|
+
Test that audio backends are now available:
|
324
|
+
|
325
|
+
```bash
|
326
|
+
python -c "import soundfile as sf; print(sf.available_formats())"
|
327
|
+
```
|
328
|
+
|
329
|
+
Expected output:
|
330
|
+
```python
|
331
|
+
{'WAV': 'Microsoft WAV format (little endian)', 'FLAC': 'FLAC format', ...}
|
332
|
+
```
|
333
|
+
|
334
|
+
Then re-run `audio-scribe`:
|
335
|
+
|
336
|
+
```bash
|
337
|
+
audio-scribe --audio path/to/your/audio.wav
|
338
|
+
```
|
339
|
+
|
340
|
+
The tool should now initialize without error.
|
266
341
|
|
267
342
|
## Contributing
|
268
343
|
|
@@ -275,8 +350,6 @@ We welcome contributions to **Audio Scribe**!
|
|
275
350
|
|
276
351
|
Please read any available guidelines or templates in our repository (such as `CONTRIBUTING.md` or `CODE_OF_CONDUCT.md`) before submitting.
|
277
352
|
|
278
|
-
---
|
279
|
-
|
280
353
|
## License
|
281
354
|
|
282
355
|
This project is licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
|
@@ -0,0 +1,12 @@
|
|
1
|
+
audio_scribe/__init__.py,sha256=uhgTrVKmnNPgbtuHM3J7mj649-toDl5A7rHH4cwFSmU,604
|
2
|
+
audio_scribe/auth.py,sha256=XR26nTvhof9yvkNgKESy5oWjZRS8mmGuZ-MJ7UysTHE,4355
|
3
|
+
audio_scribe/config.py,sha256=lKiBamkPf7YEx04P6zQX9uJydRkids8h2kWmuxMJWYM,938
|
4
|
+
audio_scribe/models.py,sha256=v-q22gEro6fwE486sxrzDVe8Zr76l3aj3EN4vylQjps,8447
|
5
|
+
audio_scribe/transcriber.py,sha256=QHEFj1VnFUQgDtY1E1QekJ7oQ100oLl3WF64yWhgySM,11236
|
6
|
+
audio_scribe/utils.py,sha256=LYoTqFBwYMgYs0-BtE4Aq_271vWYhRyjhlKB26SzIOI,3386
|
7
|
+
audio_scribe-0.1.6.dist-info/licenses/LICENSE,sha256=TiyEjWfqvme6M3rcTYz949_eYuikTXC6RWry7vcCiCQ,11343
|
8
|
+
audio_scribe-0.1.6.dist-info/METADATA,sha256=lZdekBVUSLBvRTIB9C49PCv8MkzV_nH2LbYjv9XNDqM,14476
|
9
|
+
audio_scribe-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
10
|
+
audio_scribe-0.1.6.dist-info/entry_points.txt,sha256=fnxalHDqeJicftPQsvjjqaMKuN9vBBJOx6qUB2MzINg,107
|
11
|
+
audio_scribe-0.1.6.dist-info/top_level.txt,sha256=L1mltKt-5HrbTXPpAXwht8SXQCgcCceoqpCq4OCZRsk,13
|
12
|
+
audio_scribe-0.1.6.dist-info/RECORD,,
|
@@ -1,12 +0,0 @@
|
|
1
|
-
audio_scribe/__init__.py,sha256=zctCLubb6rhGLrH6UECTi8Sif3S9kc0lAUbk_EiSg_c,544
|
2
|
-
audio_scribe/auth.py,sha256=XR26nTvhof9yvkNgKESy5oWjZRS8mmGuZ-MJ7UysTHE,4355
|
3
|
-
audio_scribe/config.py,sha256=lKiBamkPf7YEx04P6zQX9uJydRkids8h2kWmuxMJWYM,938
|
4
|
-
audio_scribe/models.py,sha256=4N2MoLL9ZeU5ojp2JJq0tD54-yxNcgLp2-CAFwzg2w0,8423
|
5
|
-
audio_scribe/transcriber.py,sha256=Du8V9q9YhXXFZjKyd-Brs-8mH2FQtpjxGWyzXAkuJnw,4064
|
6
|
-
audio_scribe/utils.py,sha256=LYoTqFBwYMgYs0-BtE4Aq_271vWYhRyjhlKB26SzIOI,3386
|
7
|
-
audio_scribe-0.1.5.dist-info/LICENSE,sha256=TiyEjWfqvme6M3rcTYz949_eYuikTXC6RWry7vcCiCQ,11343
|
8
|
-
audio_scribe-0.1.5.dist-info/METADATA,sha256=TKRaavXN69-Ntx3Wn4VV9PSfEhzNJm0KXUdDs4pCri0,12296
|
9
|
-
audio_scribe-0.1.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
-
audio_scribe-0.1.5.dist-info/entry_points.txt,sha256=Bj7Co8Er22Ux59Vs2_S63ds2bnwDURvhHYNXVviZdPM,63
|
11
|
-
audio_scribe-0.1.5.dist-info/top_level.txt,sha256=L1mltKt-5HrbTXPpAXwht8SXQCgcCceoqpCq4OCZRsk,13
|
12
|
-
audio_scribe-0.1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|