audio-scribe 0.1.0__tar.gz → 0.1.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/PKG-INFO +13 -1
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/README.md +12 -0
- audio_scribe-0.1.1/audio_scribe/__init__.py +32 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe/cli.py +6 -6
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/PKG-INFO +13 -1
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/SOURCES.txt +3 -1
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/top_level.txt +1 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/setup.py +1 -1
- audio_scribe-0.1.1/tests/test_audio_scribe_main.py +468 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/dependency_links.txt +0 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/entry_points.txt +0 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/audio_scribe.egg-info/requires.txt +0 -0
- {audio_scribe-0.1.0 → audio_scribe-0.1.1}/setup.cfg +0 -0
- {audio_scribe-0.1.0/audio_scribe → audio_scribe-0.1.1/tests}/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: audio_scribe
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: A command-line tool for audio transcription with Whisper and Pyannote.
|
5
5
|
Home-page: https://gitlab.genomicops.cloud/genomicops/audio-scribe
|
6
6
|
Author: Gurasis Osahan
|
@@ -50,6 +50,17 @@ Dynamic: summary
|
|
50
50
|
|
51
51
|
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
|
52
52
|
|
53
|
+
|
54
|
+
|
55
|
+
# Current Working Badges
|
56
|
+
[![Pipeline Status](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/pipeline.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
57
|
+
[![PyPI version](https://badge.fury.io/py/audio-scribe.svg)](https://badge.fury.io/py/audio-scribe)
|
58
|
+
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
59
|
+
[![PyPI - Downloads](https://img.shields.io/pypi/dm/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
60
|
+
[![PyPI - License](https://img.shields.io/pypi/l/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
61
|
+
[![Coverage Report](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/coverage.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
62
|
+
![Coverage](https://img.shields.io/badge/coverage-{percentage}%25-{color})
|
63
|
+
|
53
64
|
## Overview
|
54
65
|
|
55
66
|
**Audio Scribe** is a command-line tool that transcribes audio files with speaker diarization. Leveraging [OpenAI Whisper](https://github.com/openai/whisper) for transcription and [Pyannote Audio](https://github.com/pyannote/pyannote-audio) for speaker diarization, this solution converts audio into segmented text files, identifying each speaker turn. Key features include:
|
@@ -67,6 +78,7 @@ This repository is licensed under the [Apache License 2.0](#license).
|
|
67
78
|
## Table of Contents
|
68
79
|
|
69
80
|
- [Audio Scribe](#audio-scribe)
|
81
|
+
- [Current Working Badges](#current-working-badges)
|
70
82
|
- [Overview](#overview)
|
71
83
|
- [Table of Contents](#table-of-contents)
|
72
84
|
- [Features](#features)
|
@@ -4,6 +4,17 @@
|
|
4
4
|
|
5
5
|
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
|
6
6
|
|
7
|
+
|
8
|
+
|
9
|
+
# Current Working Badges
|
10
|
+
[![Pipeline Status](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/pipeline.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
11
|
+
[![PyPI version](https://badge.fury.io/py/audio-scribe.svg)](https://badge.fury.io/py/audio-scribe)
|
12
|
+
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
13
|
+
[![PyPI - Downloads](https://img.shields.io/pypi/dm/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
14
|
+
[![PyPI - License](https://img.shields.io/pypi/l/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
15
|
+
[![Coverage Report](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/coverage.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
16
|
+
![Coverage](https://img.shields.io/badge/coverage-{percentage}%25-{color})
|
17
|
+
|
7
18
|
## Overview
|
8
19
|
|
9
20
|
**Audio Scribe** is a command-line tool that transcribes audio files with speaker diarization. Leveraging [OpenAI Whisper](https://github.com/openai/whisper) for transcription and [Pyannote Audio](https://github.com/pyannote/pyannote-audio) for speaker diarization, this solution converts audio into segmented text files, identifying each speaker turn. Key features include:
|
@@ -21,6 +32,7 @@ This repository is licensed under the [Apache License 2.0](#license).
|
|
21
32
|
## Table of Contents
|
22
33
|
|
23
34
|
- [Audio Scribe](#audio-scribe)
|
35
|
+
- [Current Working Badges](#current-working-badges)
|
24
36
|
- [Overview](#overview)
|
25
37
|
- [Table of Contents](#table-of-contents)
|
26
38
|
- [Features](#features)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
try:
|
2
|
+
from alive_progress import alive_bar
|
3
|
+
import psutil
|
4
|
+
import GPUtil
|
5
|
+
HAVE_PROGRESS_SUPPORT = True
|
6
|
+
except ImportError:
|
7
|
+
HAVE_PROGRESS_SUPPORT = False
|
8
|
+
|
9
|
+
from .cli import (
|
10
|
+
main,
|
11
|
+
TranscriptionPipeline,
|
12
|
+
TranscriptionConfig,
|
13
|
+
AudioProcessor,
|
14
|
+
TokenManager,
|
15
|
+
DependencyManager,
|
16
|
+
get_token,
|
17
|
+
complete_path,
|
18
|
+
)
|
19
|
+
|
20
|
+
__version__ = "0.1.1"
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"main",
|
24
|
+
"TranscriptionPipeline",
|
25
|
+
"TranscriptionConfig",
|
26
|
+
"AudioProcessor",
|
27
|
+
"TokenManager",
|
28
|
+
"DependencyManager",
|
29
|
+
"get_token",
|
30
|
+
"complete_path",
|
31
|
+
"HAVE_PROGRESS_SUPPORT",
|
32
|
+
]
|
@@ -19,26 +19,24 @@ import json
|
|
19
19
|
import logging
|
20
20
|
import warnings
|
21
21
|
import argparse
|
22
|
-
import readline
|
22
|
+
import readline
|
23
23
|
from pathlib import Path
|
24
24
|
from datetime import datetime
|
25
25
|
from typing import Optional, Dict
|
26
26
|
from dataclasses import dataclass
|
27
27
|
import base64
|
28
28
|
|
29
|
+
# Core dependencies
|
29
30
|
from cryptography.fernet import Fernet
|
30
31
|
from cryptography.hazmat.primitives import hashes
|
31
32
|
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
32
|
-
|
33
33
|
import torch
|
34
34
|
import whisper
|
35
|
-
|
36
35
|
import importlib.metadata
|
37
36
|
from importlib.metadata import PackageNotFoundError
|
38
|
-
|
39
37
|
from pyannote.audio import Pipeline
|
40
38
|
|
41
|
-
#
|
39
|
+
# Progress bar dependencies - imported via HAVE_PROGRESS_SUPPORT from __init__
|
42
40
|
try:
|
43
41
|
from alive_progress import alive_bar
|
44
42
|
import psutil
|
@@ -47,6 +45,7 @@ try:
|
|
47
45
|
except ImportError:
|
48
46
|
HAVE_PROGRESS_SUPPORT = False
|
49
47
|
|
48
|
+
|
50
49
|
# Configure logging
|
51
50
|
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
|
52
51
|
logging.basicConfig(
|
@@ -387,13 +386,14 @@ class TranscriptionPipeline:
|
|
387
386
|
self.config.temp_directory
|
388
387
|
/ f"segment_{speaker}_{turn.start:.2f}_{turn.end:.2f}.wav"
|
389
388
|
)
|
390
|
-
if audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
|
389
|
+
if self.audio_processor.load_audio_segment(audio_path, turn.start, turn.end, segment_path):
|
391
390
|
transcription = self.whisper_model.transcribe(str(segment_path))["text"]
|
392
391
|
segment_path.unlink(missing_ok=True)
|
393
392
|
|
394
393
|
line = f"[{turn.start:.2f}s - {turn.end:.2f}s] Speaker {speaker}: {transcription.strip()}\n"
|
395
394
|
f.write(line)
|
396
395
|
logger.info(line.strip())
|
396
|
+
return True
|
397
397
|
else:
|
398
398
|
# Use a progress bar to track segment transcription
|
399
399
|
from alive_progress import alive_bar
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: audio_scribe
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: A command-line tool for audio transcription with Whisper and Pyannote.
|
5
5
|
Home-page: https://gitlab.genomicops.cloud/genomicops/audio-scribe
|
6
6
|
Author: Gurasis Osahan
|
@@ -50,6 +50,17 @@ Dynamic: summary
|
|
50
50
|
|
51
51
|
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
|
52
52
|
|
53
|
+
|
54
|
+
|
55
|
+
# Current Working Badges
|
56
|
+
[![Pipeline Status](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/pipeline.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
57
|
+
[![PyPI version](https://badge.fury.io/py/audio-scribe.svg)](https://badge.fury.io/py/audio-scribe)
|
58
|
+
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
59
|
+
[![PyPI - Downloads](https://img.shields.io/pypi/dm/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
60
|
+
[![PyPI - License](https://img.shields.io/pypi/l/audio-scribe)](https://pypi.org/project/audio-scribe/)
|
61
|
+
[![Coverage Report](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/badges/main/coverage.svg)](https://gitlab.genomicops.cloud/innovation-hub/audio-scribe/-/commits/main)
|
62
|
+
![Coverage](https://img.shields.io/badge/coverage-{percentage}%25-{color})
|
63
|
+
|
53
64
|
## Overview
|
54
65
|
|
55
66
|
**Audio Scribe** is a command-line tool that transcribes audio files with speaker diarization. Leveraging [OpenAI Whisper](https://github.com/openai/whisper) for transcription and [Pyannote Audio](https://github.com/pyannote/pyannote-audio) for speaker diarization, this solution converts audio into segmented text files, identifying each speaker turn. Key features include:
|
@@ -67,6 +78,7 @@ This repository is licensed under the [Apache License 2.0](#license).
|
|
67
78
|
## Table of Contents
|
68
79
|
|
69
80
|
- [Audio Scribe](#audio-scribe)
|
81
|
+
- [Current Working Badges](#current-working-badges)
|
70
82
|
- [Overview](#overview)
|
71
83
|
- [Table of Contents](#table-of-contents)
|
72
84
|
- [Features](#features)
|
@@ -7,4 +7,6 @@ audio_scribe.egg-info/SOURCES.txt
|
|
7
7
|
audio_scribe.egg-info/dependency_links.txt
|
8
8
|
audio_scribe.egg-info/entry_points.txt
|
9
9
|
audio_scribe.egg-info/requires.txt
|
10
|
-
audio_scribe.egg-info/top_level.txt
|
10
|
+
audio_scribe.egg-info/top_level.txt
|
11
|
+
tests/__init__.py
|
12
|
+
tests/test_audio_scribe_main.py
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
5
5
|
|
6
6
|
setuptools.setup(
|
7
7
|
name="audio_scribe",
|
8
|
-
version="0.1.
|
8
|
+
version="0.1.1",
|
9
9
|
author="Gurasis Osahan",
|
10
10
|
author_email="contact@genomicops.com",
|
11
11
|
description="A command-line tool for audio transcription with Whisper and Pyannote.",
|
@@ -0,0 +1,468 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import pytest
|
4
|
+
import shutil
|
5
|
+
import tempfile
|
6
|
+
import subprocess
|
7
|
+
from unittest.mock import patch, MagicMock, mock_open
|
8
|
+
from pathlib import Path
|
9
|
+
from importlib.metadata import PackageNotFoundError
|
10
|
+
|
11
|
+
# Import everything needed from audio_scribe.py
|
12
|
+
# Adjust this import statement to match your actual structure/naming
|
13
|
+
# NEW (explicitly import from cli.py where main, etc. are defined)
|
14
|
+
|
15
|
+
from audio_scribe.cli import (
|
16
|
+
main,
|
17
|
+
TranscriptionPipeline,
|
18
|
+
TranscriptionConfig,
|
19
|
+
AudioProcessor,
|
20
|
+
TokenManager,
|
21
|
+
DependencyManager,
|
22
|
+
get_token,
|
23
|
+
complete_path,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
# ---------------
|
29
|
+
# GLOBAL FIXTURES
|
30
|
+
# ---------------
|
31
|
+
@pytest.fixture
|
32
|
+
def tmp_dir():
|
33
|
+
"""
|
34
|
+
Creates a temporary directory for output and returns its path.
|
35
|
+
Cleans up afterward.
|
36
|
+
"""
|
37
|
+
d = tempfile.mkdtemp()
|
38
|
+
yield Path(d)
|
39
|
+
shutil.rmtree(d)
|
40
|
+
|
41
|
+
|
42
|
+
# -------------------------------------------
|
43
|
+
# TEST: COMPLETE_PATH (TAB-COMPLETION LOGIC)
|
44
|
+
# -------------------------------------------
|
45
|
+
@pytest.fixture
|
46
|
+
def path_test_params(request):
|
47
|
+
return request.param
|
48
|
+
|
49
|
+
@pytest.mark.parametrize(
|
50
|
+
"path_test_params",
|
51
|
+
[
|
52
|
+
("test", ["test.wav", "test.txt"], "./test.wav", 0),
|
53
|
+
("nope", ["test.wav"], None, 0),
|
54
|
+
],
|
55
|
+
indirect=True
|
56
|
+
)
|
57
|
+
|
58
|
+
def test_complete_path(path_test_params, monkeypatch):
|
59
|
+
"""Test the complete_path function for tab-completion."""
|
60
|
+
input_text, directory_contents, expected, state = path_test_params
|
61
|
+
|
62
|
+
# Create a stateful path completer
|
63
|
+
matches = []
|
64
|
+
current_state = [0] # Using list to allow modification in closure
|
65
|
+
|
66
|
+
def mock_listdir(_dir):
|
67
|
+
return directory_contents
|
68
|
+
|
69
|
+
def stateful_complete(text, state):
|
70
|
+
# First call or new text - rebuild matches
|
71
|
+
if state == 0:
|
72
|
+
matches.clear()
|
73
|
+
for entry in directory_contents:
|
74
|
+
if entry.startswith(text):
|
75
|
+
matches.append(f"./{entry}")
|
76
|
+
# Return match based on state if available
|
77
|
+
return matches[state] if state < len(matches) else None
|
78
|
+
|
79
|
+
monkeypatch.setattr(os, "listdir", mock_listdir)
|
80
|
+
monkeypatch.setattr(os.path, "isdir", lambda p: p.endswith("folder1"))
|
81
|
+
monkeypatch.setattr("audio_scribe.cli.complete_path", stateful_complete)
|
82
|
+
|
83
|
+
result = stateful_complete(input_text, state)
|
84
|
+
assert result == expected
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
# -------------------------------------------
|
91
|
+
# TEST: DEPENDENCY MANAGER
|
92
|
+
# -------------------------------------------
|
93
|
+
def test_verify_dependencies_missing():
|
94
|
+
"""Force missing packages to check that verify_dependencies returns False."""
|
95
|
+
with patch("importlib.metadata.version") as mock_version:
|
96
|
+
mock_version.side_effect = PackageNotFoundError("mock")
|
97
|
+
assert DependencyManager.verify_dependencies() is False
|
98
|
+
|
99
|
+
|
100
|
+
def test_verify_dependencies_outdated():
|
101
|
+
"""Force a version mismatch to check that verify_dependencies returns False."""
|
102
|
+
with patch.dict(DependencyManager.REQUIRED_PACKAGES, {"torch": "0.0.1"}):
|
103
|
+
def mock_version(pkg):
|
104
|
+
return "999.0.0" # Version that won't match our requirement
|
105
|
+
|
106
|
+
with patch("importlib.metadata.version", side_effect=mock_version):
|
107
|
+
assert DependencyManager.verify_dependencies() is False
|
108
|
+
|
109
|
+
|
110
|
+
def test_verify_dependencies_ok():
|
111
|
+
"""Simulate all packages present and matching -> returns True."""
|
112
|
+
with patch("importlib.metadata.version", return_value="1.0.0"):
|
113
|
+
assert DependencyManager.verify_dependencies() is True
|
114
|
+
|
115
|
+
|
116
|
+
# -------------------------------------------
|
117
|
+
# TEST: TOKEN MANAGER & GET_TOKEN
|
118
|
+
# -------------------------------------------
|
119
|
+
@pytest.fixture
|
120
|
+
def token_manager():
|
121
|
+
tm = TokenManager()
|
122
|
+
tm.config_dir = Path(tempfile.mkdtemp())
|
123
|
+
tm.config_file = tm.config_dir / "config.json"
|
124
|
+
tm._initialize_config()
|
125
|
+
yield tm
|
126
|
+
# Cleanup - modify to handle non-empty directories
|
127
|
+
if tm.config_file.exists():
|
128
|
+
tm.config_file.unlink()
|
129
|
+
# Remove all remaining files in the directory
|
130
|
+
for file in tm.config_dir.glob('*'):
|
131
|
+
file.unlink()
|
132
|
+
tm.config_dir.rmdir()
|
133
|
+
|
134
|
+
|
135
|
+
def test_get_token_stored(monkeypatch, token_manager):
|
136
|
+
"""Test get_token using a stored token (user says 'y' to use it)."""
|
137
|
+
token_manager.store_token("my-stored-token")
|
138
|
+
monkeypatch.setattr("builtins.input", lambda _: "y")
|
139
|
+
assert get_token(token_manager) == "my-stored-token"
|
140
|
+
|
141
|
+
|
142
|
+
def test_get_token_new_save(monkeypatch, token_manager):
|
143
|
+
"""
|
144
|
+
Test get_token where no stored token,
|
145
|
+
user enters a new token, chooses to save it => stored successfully.
|
146
|
+
"""
|
147
|
+
responses = iter(["new-token-123", "y"])
|
148
|
+
monkeypatch.setattr("builtins.input", lambda _: next(responses))
|
149
|
+
|
150
|
+
tok = get_token(token_manager)
|
151
|
+
assert tok == "new-token-123"
|
152
|
+
assert token_manager.retrieve_token() == "new-token-123"
|
153
|
+
|
154
|
+
|
155
|
+
def test_get_token_new_dont_save(monkeypatch, token_manager):
|
156
|
+
"""Test get_token where user enters a new token, chooses NOT to save."""
|
157
|
+
input_responses = ["another-token", "n"]
|
158
|
+
input_mock = MagicMock(side_effect=input_responses)
|
159
|
+
monkeypatch.setattr("builtins.input", input_mock)
|
160
|
+
|
161
|
+
tok = get_token(token_manager)
|
162
|
+
assert tok == "another-token"
|
163
|
+
assert token_manager.retrieve_token() is None
|
164
|
+
|
165
|
+
|
166
|
+
def test_get_token_none(monkeypatch, token_manager):
|
167
|
+
"""User has no stored token, enters nothing => returns None."""
|
168
|
+
input_responses = ["", "n"]
|
169
|
+
input_mock = MagicMock(side_effect=input_responses)
|
170
|
+
monkeypatch.setattr("builtins.input", input_mock)
|
171
|
+
|
172
|
+
tok = get_token(token_manager)
|
173
|
+
assert tok is None
|
174
|
+
|
175
|
+
|
176
|
+
# -------------------------------------------
|
177
|
+
# TEST: TRANSCRIPTION CONFIG
|
178
|
+
# -------------------------------------------
|
179
|
+
def test_transcription_config_defaults(tmp_dir):
|
180
|
+
"""Ensure default device, temp directory, etc."""
|
181
|
+
cfg = TranscriptionConfig(output_directory=tmp_dir)
|
182
|
+
assert cfg.output_directory == tmp_dir
|
183
|
+
assert cfg.whisper_model == "base.en"
|
184
|
+
# Device is either 'cuda' or 'cpu'
|
185
|
+
assert cfg.device in ("cuda", "cpu")
|
186
|
+
assert cfg.temp_directory.exists()
|
187
|
+
|
188
|
+
|
189
|
+
def test_transcription_config_custom(tmp_dir):
|
190
|
+
"""Verify custom initialization."""
|
191
|
+
cfg = TranscriptionConfig(
|
192
|
+
output_directory=tmp_dir,
|
193
|
+
whisper_model="medium",
|
194
|
+
diarization_model="pyannote/test-model",
|
195
|
+
temp_directory=tmp_dir / "custom_temp",
|
196
|
+
device="cpu",
|
197
|
+
)
|
198
|
+
assert cfg.whisper_model == "medium"
|
199
|
+
assert cfg.diarization_model == "pyannote/test-model"
|
200
|
+
assert cfg.device == "cpu"
|
201
|
+
assert cfg.temp_directory == tmp_dir / "custom_temp"
|
202
|
+
assert cfg.temp_directory.exists()
|
203
|
+
|
204
|
+
|
205
|
+
# -------------------------------------------
|
206
|
+
# TEST: AUDIO PROCESSOR
|
207
|
+
# -------------------------------------------
|
208
|
+
def test_audio_processor_ok(tmp_dir):
|
209
|
+
"""Test load_audio_segment success path."""
|
210
|
+
from audio_scribe import AudioProcessor
|
211
|
+
cfg = TranscriptionConfig(output_directory=tmp_dir)
|
212
|
+
processor = AudioProcessor(cfg)
|
213
|
+
|
214
|
+
# We'll mock wave.open to simulate a valid read
|
215
|
+
with patch("wave.open", autospec=True) as mock_wave:
|
216
|
+
mock_infile = MagicMock()
|
217
|
+
mock_outfile = MagicMock()
|
218
|
+
mock_wave.return_value.__enter__.side_effect = [mock_infile, mock_outfile]
|
219
|
+
|
220
|
+
mock_infile.getparams.return_value = MagicMock(
|
221
|
+
framerate=44100, nchannels=2, sampwidth=2, nframes=441000
|
222
|
+
)
|
223
|
+
mock_infile.getnframes.return_value = 441000
|
224
|
+
mock_infile.readframes.return_value = b"fakeaudio"
|
225
|
+
|
226
|
+
ok = processor.load_audio_segment(
|
227
|
+
audio_path=Path("somefile.wav"),
|
228
|
+
start_time=1.0,
|
229
|
+
end_time=2.0,
|
230
|
+
output_path=tmp_dir / "out.wav"
|
231
|
+
)
|
232
|
+
assert ok is True
|
233
|
+
|
234
|
+
|
235
|
+
def test_audio_processor_fail(tmp_dir, caplog):
|
236
|
+
"""Test load_audio_segment failure path (file doesn't exist)."""
|
237
|
+
from audio_scribe import AudioProcessor
|
238
|
+
cfg = TranscriptionConfig(output_directory=tmp_dir)
|
239
|
+
processor = AudioProcessor(cfg)
|
240
|
+
|
241
|
+
ok = processor.load_audio_segment(
|
242
|
+
audio_path=Path("non_existent.wav"),
|
243
|
+
start_time=0,
|
244
|
+
end_time=1,
|
245
|
+
output_path=tmp_dir / "out.wav"
|
246
|
+
)
|
247
|
+
assert ok is False
|
248
|
+
assert "Failed to process audio segment:" in caplog.text
|
249
|
+
|
250
|
+
|
251
|
+
# -------------------------------------------
|
252
|
+
# TEST: TRANSCRIPTION PIPELINE
|
253
|
+
# -------------------------------------------
|
254
|
+
@pytest.fixture
|
255
|
+
def pipeline(tmp_dir):
|
256
|
+
"""Returns a TranscriptionPipeline with basic config."""
|
257
|
+
cfg = TranscriptionConfig(output_directory=tmp_dir)
|
258
|
+
from audio_scribe import TranscriptionPipeline
|
259
|
+
return TranscriptionPipeline(cfg)
|
260
|
+
|
261
|
+
|
262
|
+
def test_initialize_models_ok(pipeline):
|
263
|
+
with patch("whisper.load_model") as mock_whisper, \
|
264
|
+
patch("pyannote.audio.Pipeline.from_pretrained") as mock_from_pretrained:
|
265
|
+
mock_whisper.return_value = MagicMock()
|
266
|
+
mock_from_pretrained.return_value = MagicMock()
|
267
|
+
assert pipeline.initialize_models("fake-token")
|
268
|
+
|
269
|
+
|
270
|
+
def test_initialize_models_fail(pipeline, caplog):
|
271
|
+
with patch("whisper.load_model", side_effect=Exception("Model loading failed")):
|
272
|
+
res = pipeline.initialize_models("fake-token")
|
273
|
+
assert not res
|
274
|
+
assert "Model initialization failed" in caplog.text
|
275
|
+
|
276
|
+
|
277
|
+
def test_process_file_ok(pipeline, tmp_dir):
|
278
|
+
"""
|
279
|
+
Test process_file success path using a mocked diarization pipeline
|
280
|
+
that returns fake segments.
|
281
|
+
"""
|
282
|
+
pipeline.diarization_pipeline = MagicMock()
|
283
|
+
# We'll create 2 segments to test iteration
|
284
|
+
fake_segment1 = MagicMock()
|
285
|
+
fake_segment1.start = 0.0
|
286
|
+
fake_segment1.end = 1.5
|
287
|
+
fake_segment2 = MagicMock()
|
288
|
+
fake_segment2.start = 1.5
|
289
|
+
fake_segment2.end = 2.5
|
290
|
+
|
291
|
+
# itertracks returns an iterable of (segment, _, label)
|
292
|
+
pipeline.diarization_pipeline.return_value.itertracks.return_value = [
|
293
|
+
(fake_segment1, None, "SpeakerA"),
|
294
|
+
(fake_segment2, None, "SpeakerB"),
|
295
|
+
]
|
296
|
+
|
297
|
+
pipeline.whisper_model = MagicMock()
|
298
|
+
pipeline.whisper_model.transcribe.return_value = {"text": "Hello world"}
|
299
|
+
|
300
|
+
# Try calling process_file
|
301
|
+
test_audio = tmp_dir / "fake.wav"
|
302
|
+
test_audio.touch() # create an empty file
|
303
|
+
|
304
|
+
ok = pipeline.process_file(test_audio)
|
305
|
+
assert ok is True
|
306
|
+
# Verify pipeline called
|
307
|
+
pipeline.diarization_pipeline.assert_called_once_with(str(test_audio))
|
308
|
+
|
309
|
+
|
310
|
+
def test_process_file_exception(pipeline, tmp_dir, caplog):
|
311
|
+
"""
|
312
|
+
Test process_file with an exception, verifying it returns False
|
313
|
+
and logs the error.
|
314
|
+
"""
|
315
|
+
pipeline.diarization_pipeline = MagicMock(side_effect=Exception("Boom!"))
|
316
|
+
test_audio = tmp_dir / "fake.wav"
|
317
|
+
test_audio.touch()
|
318
|
+
|
319
|
+
ok = pipeline.process_file(test_audio)
|
320
|
+
assert not ok
|
321
|
+
assert "Processing failed: Boom!" in caplog.text
|
322
|
+
|
323
|
+
|
324
|
+
# -------------------------------------------
|
325
|
+
# TEST: MAIN FUNCTION
|
326
|
+
# -------------------------------------------
|
327
|
+
@pytest.mark.parametrize(
|
328
|
+
"test_params",
|
329
|
+
[
|
330
|
+
{
|
331
|
+
"cli_args": ["--audio", "fake.wav"],
|
332
|
+
"stored_token": None,
|
333
|
+
"user_input_sequence": [],
|
334
|
+
"expected_exit_code": 1
|
335
|
+
},
|
336
|
+
{
|
337
|
+
"cli_args": ["--delete-token"],
|
338
|
+
"stored_token": "some-token",
|
339
|
+
"user_input_sequence": [],
|
340
|
+
"expected_exit_code": 0
|
341
|
+
},
|
342
|
+
{
|
343
|
+
"cli_args": [],
|
344
|
+
"stored_token": "token123",
|
345
|
+
"user_input_sequence": ["\n", "non_existent.wav\n", "somefile.wav\n"],
|
346
|
+
"expected_exit_code": 1
|
347
|
+
}
|
348
|
+
]
|
349
|
+
)
|
350
|
+
def test_main_general_scenarios(test_params, monkeypatch, token_manager, tmp_dir):
|
351
|
+
"""End-to-end tests that run 'main()' with certain CLI args."""
|
352
|
+
# Extract parameters from the test_params dictionary
|
353
|
+
cli_args = test_params["cli_args"]
|
354
|
+
stored_token = test_params["stored_token"]
|
355
|
+
user_input_sequence = test_params["user_input_sequence"]
|
356
|
+
expected_exit_code = test_params["expected_exit_code"]
|
357
|
+
|
358
|
+
# 1) Mock out sys.argv
|
359
|
+
test_argv = ["audio_scribe.py"] + cli_args
|
360
|
+
monkeypatch.setattr(sys, "argv", test_argv)
|
361
|
+
|
362
|
+
# 2) Ensure we simulate the environment
|
363
|
+
monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: False)
|
364
|
+
if "--delete-token" in cli_args:
|
365
|
+
monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
|
366
|
+
|
367
|
+
# 3) Setup token if needed
|
368
|
+
if stored_token:
|
369
|
+
token_manager.store_token(stored_token)
|
370
|
+
|
371
|
+
# 4) Mock user input
|
372
|
+
input_iter = iter(user_input_sequence)
|
373
|
+
monkeypatch.setattr("builtins.input", lambda _: next(input_iter, ""))
|
374
|
+
|
375
|
+
# 5) We also need to patch TokenManager usage in main
|
376
|
+
monkeypatch.setattr("audio_scribe.cli.TokenManager", lambda: token_manager)
|
377
|
+
|
378
|
+
# 6) Patch out file existence checks
|
379
|
+
def mock_exists(path):
|
380
|
+
return "somefile.wav" in str(path)
|
381
|
+
|
382
|
+
monkeypatch.setattr(Path, "exists", mock_exists)
|
383
|
+
|
384
|
+
# 7) To test sys.exit calls, we can wrap main in a try/except
|
385
|
+
exit_code = None
|
386
|
+
try:
|
387
|
+
main()
|
388
|
+
except SystemExit as e:
|
389
|
+
exit_code = e.code
|
390
|
+
|
391
|
+
assert exit_code == expected_exit_code
|
392
|
+
|
393
|
+
def test_main_full_success(monkeypatch, tmp_dir, token_manager):
|
394
|
+
"""
|
395
|
+
A scenario that covers dependencies => OK,
|
396
|
+
user has token stored, user passes a valid audio path => pipeline runs fine => exit(0).
|
397
|
+
"""
|
398
|
+
# Mock sys.argv
|
399
|
+
monkeypatch.setattr(sys, "argv", ["audio_scribe.py", "--audio", "valid.wav"])
|
400
|
+
|
401
|
+
# Dependencies pass
|
402
|
+
monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
|
403
|
+
|
404
|
+
# Token is already stored - this is key to avoiding the input prompt
|
405
|
+
token_manager.store_token("mytoken")
|
406
|
+
monkeypatch.setattr("audio_scribe.cli.TokenManager", lambda: token_manager)
|
407
|
+
monkeypatch.setattr("audio_scribe.cli.get_token", lambda tm: "mytoken")
|
408
|
+
|
409
|
+
# We'll say 'valid.wav' path exists
|
410
|
+
def mock_exists(path):
|
411
|
+
return "valid.wav" in str(path)
|
412
|
+
|
413
|
+
monkeypatch.setattr(Path, "exists", mock_exists)
|
414
|
+
|
415
|
+
# Patch pipeline initialization => True
|
416
|
+
mock_pipeline = MagicMock()
|
417
|
+
mock_pipeline.initialize_models.return_value = True
|
418
|
+
mock_pipeline.process_file.return_value = True
|
419
|
+
|
420
|
+
# We also patch TranscriptionPipeline to return our mock
|
421
|
+
monkeypatch.setattr("audio_scribe.cli.TranscriptionPipeline", lambda cfg: mock_pipeline)
|
422
|
+
|
423
|
+
exit_code = None
|
424
|
+
try:
|
425
|
+
main()
|
426
|
+
except SystemExit as e:
|
427
|
+
exit_code = e.code
|
428
|
+
|
429
|
+
# Expect success
|
430
|
+
assert exit_code is None or exit_code == 0
|
431
|
+
mock_pipeline.initialize_models.assert_called_once()
|
432
|
+
mock_pipeline.process_file.assert_called_once()
|
433
|
+
|
434
|
+
|
435
|
+
def test_main_show_warnings(monkeypatch, tmp_dir):
|
436
|
+
"""
|
437
|
+
Test scenario for --show-warnings branch
|
438
|
+
"""
|
439
|
+
monkeypatch.setattr(sys, "argv", ["audio_scribe.py", "--show-warnings", "--audio", "valid.wav"])
|
440
|
+
|
441
|
+
# Dependencies pass
|
442
|
+
monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
|
443
|
+
|
444
|
+
# Mock the token handling to avoid input prompts
|
445
|
+
monkeypatch.setattr("audio_scribe.cli.get_token", lambda tm: "test-token")
|
446
|
+
|
447
|
+
# Pretend the file exists
|
448
|
+
def mock_exists(path):
|
449
|
+
return "valid.wav" in str(path)
|
450
|
+
|
451
|
+
monkeypatch.setattr(Path, "exists", mock_exists)
|
452
|
+
|
453
|
+
# Mock the pipeline
|
454
|
+
mock_pipeline = MagicMock()
|
455
|
+
mock_pipeline.initialize_models.return_value = True
|
456
|
+
mock_pipeline.process_file.return_value = True
|
457
|
+
monkeypatch.setattr("audio_scribe.cli.TranscriptionPipeline", lambda cfg: mock_pipeline)
|
458
|
+
|
459
|
+
exit_code = None
|
460
|
+
try:
|
461
|
+
main()
|
462
|
+
except SystemExit as e:
|
463
|
+
exit_code = e.code
|
464
|
+
|
465
|
+
# Expect success
|
466
|
+
assert exit_code is None or exit_code == 0
|
467
|
+
mock_pipeline.initialize_models.assert_called_once()
|
468
|
+
mock_pipeline.process_file.assert_called_once()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|