audio-scribe 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,468 +0,0 @@
1
- import os
2
- import sys
3
- import pytest
4
- import shutil
5
- import tempfile
6
- import subprocess
7
- from unittest.mock import patch, MagicMock, mock_open
8
- from pathlib import Path
9
- from importlib.metadata import PackageNotFoundError
10
-
11
- # Import everything needed from audio_scribe.py
12
- # Adjust this import statement to match your actual structure/naming
13
- # NEW (explicitly import from cli.py where main, etc. are defined)
14
-
15
- from audio_scribe.cli import (
16
- main,
17
- TranscriptionPipeline,
18
- TranscriptionConfig,
19
- AudioProcessor,
20
- TokenManager,
21
- DependencyManager,
22
- get_token,
23
- complete_path,
24
- )
25
-
26
-
27
-
28
- # ---------------
29
- # GLOBAL FIXTURES
30
- # ---------------
31
- @pytest.fixture
32
- def tmp_dir():
33
- """
34
- Creates a temporary directory for output and returns its path.
35
- Cleans up afterward.
36
- """
37
- d = tempfile.mkdtemp()
38
- yield Path(d)
39
- shutil.rmtree(d)
40
-
41
-
42
- # -------------------------------------------
43
- # TEST: COMPLETE_PATH (TAB-COMPLETION LOGIC)
44
- # -------------------------------------------
45
- @pytest.fixture
46
- def path_test_params(request):
47
- return request.param
48
-
49
- @pytest.mark.parametrize(
50
- "path_test_params",
51
- [
52
- ("test", ["test.wav", "test.txt"], "./test.wav", 0),
53
- ("nope", ["test.wav"], None, 0),
54
- ],
55
- indirect=True
56
- )
57
-
58
- def test_complete_path(path_test_params, monkeypatch):
59
- """Test the complete_path function for tab-completion."""
60
- input_text, directory_contents, expected, state = path_test_params
61
-
62
- # Create a stateful path completer
63
- matches = []
64
- current_state = [0] # Using list to allow modification in closure
65
-
66
- def mock_listdir(_dir):
67
- return directory_contents
68
-
69
- def stateful_complete(text, state):
70
- # First call or new text - rebuild matches
71
- if state == 0:
72
- matches.clear()
73
- for entry in directory_contents:
74
- if entry.startswith(text):
75
- matches.append(f"./{entry}")
76
- # Return match based on state if available
77
- return matches[state] if state < len(matches) else None
78
-
79
- monkeypatch.setattr(os, "listdir", mock_listdir)
80
- monkeypatch.setattr(os.path, "isdir", lambda p: p.endswith("folder1"))
81
- monkeypatch.setattr("audio_scribe.cli.complete_path", stateful_complete)
82
-
83
- result = stateful_complete(input_text, state)
84
- assert result == expected
85
-
86
-
87
-
88
-
89
-
90
- # -------------------------------------------
91
- # TEST: DEPENDENCY MANAGER
92
- # -------------------------------------------
93
- def test_verify_dependencies_missing():
94
- """Force missing packages to check that verify_dependencies returns False."""
95
- with patch("importlib.metadata.version") as mock_version:
96
- mock_version.side_effect = PackageNotFoundError("mock")
97
- assert DependencyManager.verify_dependencies() is False
98
-
99
-
100
- def test_verify_dependencies_outdated():
101
- """Force a version mismatch to check that verify_dependencies returns False."""
102
- with patch.dict(DependencyManager.REQUIRED_PACKAGES, {"torch": "0.0.1"}):
103
- def mock_version(pkg):
104
- return "999.0.0" # Version that won't match our requirement
105
-
106
- with patch("importlib.metadata.version", side_effect=mock_version):
107
- assert DependencyManager.verify_dependencies() is False
108
-
109
-
110
- def test_verify_dependencies_ok():
111
- """Simulate all packages present and matching -> returns True."""
112
- with patch("importlib.metadata.version", return_value="1.0.0"):
113
- assert DependencyManager.verify_dependencies() is True
114
-
115
-
116
- # -------------------------------------------
117
- # TEST: TOKEN MANAGER & GET_TOKEN
118
- # -------------------------------------------
119
- @pytest.fixture
120
- def token_manager():
121
- tm = TokenManager()
122
- tm.config_dir = Path(tempfile.mkdtemp())
123
- tm.config_file = tm.config_dir / "config.json"
124
- tm._initialize_config()
125
- yield tm
126
- # Cleanup - modify to handle non-empty directories
127
- if tm.config_file.exists():
128
- tm.config_file.unlink()
129
- # Remove all remaining files in the directory
130
- for file in tm.config_dir.glob('*'):
131
- file.unlink()
132
- tm.config_dir.rmdir()
133
-
134
-
135
- def test_get_token_stored(monkeypatch, token_manager):
136
- """Test get_token using a stored token (user says 'y' to use it)."""
137
- token_manager.store_token("my-stored-token")
138
- monkeypatch.setattr("builtins.input", lambda _: "y")
139
- assert get_token(token_manager) == "my-stored-token"
140
-
141
-
142
- def test_get_token_new_save(monkeypatch, token_manager):
143
- """
144
- Test get_token where no stored token,
145
- user enters a new token, chooses to save it => stored successfully.
146
- """
147
- responses = iter(["new-token-123", "y"])
148
- monkeypatch.setattr("builtins.input", lambda _: next(responses))
149
-
150
- tok = get_token(token_manager)
151
- assert tok == "new-token-123"
152
- assert token_manager.retrieve_token() == "new-token-123"
153
-
154
-
155
- def test_get_token_new_dont_save(monkeypatch, token_manager):
156
- """Test get_token where user enters a new token, chooses NOT to save."""
157
- input_responses = ["another-token", "n"]
158
- input_mock = MagicMock(side_effect=input_responses)
159
- monkeypatch.setattr("builtins.input", input_mock)
160
-
161
- tok = get_token(token_manager)
162
- assert tok == "another-token"
163
- assert token_manager.retrieve_token() is None
164
-
165
-
166
- def test_get_token_none(monkeypatch, token_manager):
167
- """User has no stored token, enters nothing => returns None."""
168
- input_responses = ["", "n"]
169
- input_mock = MagicMock(side_effect=input_responses)
170
- monkeypatch.setattr("builtins.input", input_mock)
171
-
172
- tok = get_token(token_manager)
173
- assert tok is None
174
-
175
-
176
- # -------------------------------------------
177
- # TEST: TRANSCRIPTION CONFIG
178
- # -------------------------------------------
179
- def test_transcription_config_defaults(tmp_dir):
180
- """Ensure default device, temp directory, etc."""
181
- cfg = TranscriptionConfig(output_directory=tmp_dir)
182
- assert cfg.output_directory == tmp_dir
183
- assert cfg.whisper_model == "base.en"
184
- # Device is either 'cuda' or 'cpu'
185
- assert cfg.device in ("cuda", "cpu")
186
- assert cfg.temp_directory.exists()
187
-
188
-
189
- def test_transcription_config_custom(tmp_dir):
190
- """Verify custom initialization."""
191
- cfg = TranscriptionConfig(
192
- output_directory=tmp_dir,
193
- whisper_model="medium",
194
- diarization_model="pyannote/test-model",
195
- temp_directory=tmp_dir / "custom_temp",
196
- device="cpu",
197
- )
198
- assert cfg.whisper_model == "medium"
199
- assert cfg.diarization_model == "pyannote/test-model"
200
- assert cfg.device == "cpu"
201
- assert cfg.temp_directory == tmp_dir / "custom_temp"
202
- assert cfg.temp_directory.exists()
203
-
204
-
205
- # -------------------------------------------
206
- # TEST: AUDIO PROCESSOR
207
- # -------------------------------------------
208
- def test_audio_processor_ok(tmp_dir):
209
- """Test load_audio_segment success path."""
210
- from audio_scribe import AudioProcessor
211
- cfg = TranscriptionConfig(output_directory=tmp_dir)
212
- processor = AudioProcessor(cfg)
213
-
214
- # We'll mock wave.open to simulate a valid read
215
- with patch("wave.open", autospec=True) as mock_wave:
216
- mock_infile = MagicMock()
217
- mock_outfile = MagicMock()
218
- mock_wave.return_value.__enter__.side_effect = [mock_infile, mock_outfile]
219
-
220
- mock_infile.getparams.return_value = MagicMock(
221
- framerate=44100, nchannels=2, sampwidth=2, nframes=441000
222
- )
223
- mock_infile.getnframes.return_value = 441000
224
- mock_infile.readframes.return_value = b"fakeaudio"
225
-
226
- ok = processor.load_audio_segment(
227
- audio_path=Path("somefile.wav"),
228
- start_time=1.0,
229
- end_time=2.0,
230
- output_path=tmp_dir / "out.wav"
231
- )
232
- assert ok is True
233
-
234
-
235
- def test_audio_processor_fail(tmp_dir, caplog):
236
- """Test load_audio_segment failure path (file doesn't exist)."""
237
- from audio_scribe import AudioProcessor
238
- cfg = TranscriptionConfig(output_directory=tmp_dir)
239
- processor = AudioProcessor(cfg)
240
-
241
- ok = processor.load_audio_segment(
242
- audio_path=Path("non_existent.wav"),
243
- start_time=0,
244
- end_time=1,
245
- output_path=tmp_dir / "out.wav"
246
- )
247
- assert ok is False
248
- assert "Failed to process audio segment:" in caplog.text
249
-
250
-
251
- # -------------------------------------------
252
- # TEST: TRANSCRIPTION PIPELINE
253
- # -------------------------------------------
254
- @pytest.fixture
255
- def pipeline(tmp_dir):
256
- """Returns a TranscriptionPipeline with basic config."""
257
- cfg = TranscriptionConfig(output_directory=tmp_dir)
258
- from audio_scribe import TranscriptionPipeline
259
- return TranscriptionPipeline(cfg)
260
-
261
-
262
- def test_initialize_models_ok(pipeline):
263
- with patch("whisper.load_model") as mock_whisper, \
264
- patch("pyannote.audio.Pipeline.from_pretrained") as mock_from_pretrained:
265
- mock_whisper.return_value = MagicMock()
266
- mock_from_pretrained.return_value = MagicMock()
267
- assert pipeline.initialize_models("fake-token")
268
-
269
-
270
- def test_initialize_models_fail(pipeline, caplog):
271
- with patch("whisper.load_model", side_effect=Exception("Model loading failed")):
272
- res = pipeline.initialize_models("fake-token")
273
- assert not res
274
- assert "Model initialization failed" in caplog.text
275
-
276
-
277
- def test_process_file_ok(pipeline, tmp_dir):
278
- """
279
- Test process_file success path using a mocked diarization pipeline
280
- that returns fake segments.
281
- """
282
- pipeline.diarization_pipeline = MagicMock()
283
- # We'll create 2 segments to test iteration
284
- fake_segment1 = MagicMock()
285
- fake_segment1.start = 0.0
286
- fake_segment1.end = 1.5
287
- fake_segment2 = MagicMock()
288
- fake_segment2.start = 1.5
289
- fake_segment2.end = 2.5
290
-
291
- # itertracks returns an iterable of (segment, _, label)
292
- pipeline.diarization_pipeline.return_value.itertracks.return_value = [
293
- (fake_segment1, None, "SpeakerA"),
294
- (fake_segment2, None, "SpeakerB"),
295
- ]
296
-
297
- pipeline.whisper_model = MagicMock()
298
- pipeline.whisper_model.transcribe.return_value = {"text": "Hello world"}
299
-
300
- # Try calling process_file
301
- test_audio = tmp_dir / "fake.wav"
302
- test_audio.touch() # create an empty file
303
-
304
- ok = pipeline.process_file(test_audio)
305
- assert ok is True
306
- # Verify pipeline called
307
- pipeline.diarization_pipeline.assert_called_once_with(str(test_audio))
308
-
309
-
310
- def test_process_file_exception(pipeline, tmp_dir, caplog):
311
- """
312
- Test process_file with an exception, verifying it returns False
313
- and logs the error.
314
- """
315
- pipeline.diarization_pipeline = MagicMock(side_effect=Exception("Boom!"))
316
- test_audio = tmp_dir / "fake.wav"
317
- test_audio.touch()
318
-
319
- ok = pipeline.process_file(test_audio)
320
- assert not ok
321
- assert "Processing failed: Boom!" in caplog.text
322
-
323
-
324
- # -------------------------------------------
325
- # TEST: MAIN FUNCTION
326
- # -------------------------------------------
327
- @pytest.mark.parametrize(
328
- "test_params",
329
- [
330
- {
331
- "cli_args": ["--audio", "fake.wav"],
332
- "stored_token": None,
333
- "user_input_sequence": [],
334
- "expected_exit_code": 1
335
- },
336
- {
337
- "cli_args": ["--delete-token"],
338
- "stored_token": "some-token",
339
- "user_input_sequence": [],
340
- "expected_exit_code": 0
341
- },
342
- {
343
- "cli_args": [],
344
- "stored_token": "token123",
345
- "user_input_sequence": ["\n", "non_existent.wav\n", "somefile.wav\n"],
346
- "expected_exit_code": 1
347
- }
348
- ]
349
- )
350
- def test_main_general_scenarios(test_params, monkeypatch, token_manager, tmp_dir):
351
- """End-to-end tests that run 'main()' with certain CLI args."""
352
- # Extract parameters from the test_params dictionary
353
- cli_args = test_params["cli_args"]
354
- stored_token = test_params["stored_token"]
355
- user_input_sequence = test_params["user_input_sequence"]
356
- expected_exit_code = test_params["expected_exit_code"]
357
-
358
- # 1) Mock out sys.argv
359
- test_argv = ["audio_scribe.py"] + cli_args
360
- monkeypatch.setattr(sys, "argv", test_argv)
361
-
362
- # 2) Ensure we simulate the environment
363
- monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: False)
364
- if "--delete-token" in cli_args:
365
- monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
366
-
367
- # 3) Setup token if needed
368
- if stored_token:
369
- token_manager.store_token(stored_token)
370
-
371
- # 4) Mock user input
372
- input_iter = iter(user_input_sequence)
373
- monkeypatch.setattr("builtins.input", lambda _: next(input_iter, ""))
374
-
375
- # 5) We also need to patch TokenManager usage in main
376
- monkeypatch.setattr("audio_scribe.cli.TokenManager", lambda: token_manager)
377
-
378
- # 6) Patch out file existence checks
379
- def mock_exists(path):
380
- return "somefile.wav" in str(path)
381
-
382
- monkeypatch.setattr(Path, "exists", mock_exists)
383
-
384
- # 7) To test sys.exit calls, we can wrap main in a try/except
385
- exit_code = None
386
- try:
387
- main()
388
- except SystemExit as e:
389
- exit_code = e.code
390
-
391
- assert exit_code == expected_exit_code
392
-
393
- def test_main_full_success(monkeypatch, tmp_dir, token_manager):
394
- """
395
- A scenario that covers dependencies => OK,
396
- user has token stored, user passes a valid audio path => pipeline runs fine => exit(0).
397
- """
398
- # Mock sys.argv
399
- monkeypatch.setattr(sys, "argv", ["audio_scribe.py", "--audio", "valid.wav"])
400
-
401
- # Dependencies pass
402
- monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
403
-
404
- # Token is already stored - this is key to avoiding the input prompt
405
- token_manager.store_token("mytoken")
406
- monkeypatch.setattr("audio_scribe.cli.TokenManager", lambda: token_manager)
407
- monkeypatch.setattr("audio_scribe.cli.get_token", lambda tm: "mytoken")
408
-
409
- # We'll say 'valid.wav' path exists
410
- def mock_exists(path):
411
- return "valid.wav" in str(path)
412
-
413
- monkeypatch.setattr(Path, "exists", mock_exists)
414
-
415
- # Patch pipeline initialization => True
416
- mock_pipeline = MagicMock()
417
- mock_pipeline.initialize_models.return_value = True
418
- mock_pipeline.process_file.return_value = True
419
-
420
- # We also patch TranscriptionPipeline to return our mock
421
- monkeypatch.setattr("audio_scribe.cli.TranscriptionPipeline", lambda cfg: mock_pipeline)
422
-
423
- exit_code = None
424
- try:
425
- main()
426
- except SystemExit as e:
427
- exit_code = e.code
428
-
429
- # Expect success
430
- assert exit_code is None or exit_code == 0
431
- mock_pipeline.initialize_models.assert_called_once()
432
- mock_pipeline.process_file.assert_called_once()
433
-
434
-
435
- def test_main_show_warnings(monkeypatch, tmp_dir):
436
- """
437
- Test scenario for --show-warnings branch
438
- """
439
- monkeypatch.setattr(sys, "argv", ["audio_scribe.py", "--show-warnings", "--audio", "valid.wav"])
440
-
441
- # Dependencies pass
442
- monkeypatch.setattr("audio_scribe.cli.DependencyManager.verify_dependencies", lambda: True)
443
-
444
- # Mock the token handling to avoid input prompts
445
- monkeypatch.setattr("audio_scribe.cli.get_token", lambda tm: "test-token")
446
-
447
- # Pretend the file exists
448
- def mock_exists(path):
449
- return "valid.wav" in str(path)
450
-
451
- monkeypatch.setattr(Path, "exists", mock_exists)
452
-
453
- # Mock the pipeline
454
- mock_pipeline = MagicMock()
455
- mock_pipeline.initialize_models.return_value = True
456
- mock_pipeline.process_file.return_value = True
457
- monkeypatch.setattr("audio_scribe.cli.TranscriptionPipeline", lambda cfg: mock_pipeline)
458
-
459
- exit_code = None
460
- try:
461
- main()
462
- except SystemExit as e:
463
- exit_code = e.code
464
-
465
- # Expect success
466
- assert exit_code is None or exit_code == 0
467
- mock_pipeline.initialize_models.assert_called_once()
468
- mock_pipeline.process_file.assert_called_once()