slurpai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- slurpai-0.1.0/.env.example +11 -0
- slurpai-0.1.0/.github/workflows/test.yml +31 -0
- slurpai-0.1.0/.gitignore +29 -0
- slurpai-0.1.0/CONTRIBUTING.md +37 -0
- slurpai-0.1.0/LICENSE +21 -0
- slurpai-0.1.0/PKG-INFO +143 -0
- slurpai-0.1.0/README.md +113 -0
- slurpai-0.1.0/pyproject.toml +55 -0
- slurpai-0.1.0/src/ingestible/__init__.py +1 -0
- slurpai-0.1.0/src/ingestible/cli.py +106 -0
- slurpai-0.1.0/src/ingestible/ffmpeg.py +65 -0
- slurpai-0.1.0/src/ingestible/log.py +23 -0
- slurpai-0.1.0/src/ingestible/process.py +98 -0
- slurpai-0.1.0/src/ingestible/transcribe.py +83 -0
- slurpai-0.1.0/tests/__init__.py +0 -0
- slurpai-0.1.0/tests/conftest.py +43 -0
- slurpai-0.1.0/tests/test_cli.py +38 -0
- slurpai-0.1.0/tests/test_ffmpeg.py +32 -0
- slurpai-0.1.0/tests/test_process.py +70 -0
- slurpai-0.1.0/tests/test_transcribe.py +62 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Required for OpenAI backend
|
|
2
|
+
OPENAI_API_KEY=sk-...
|
|
3
|
+
|
|
4
|
+
# Optional: default backend (openai or faster-whisper)
|
|
5
|
+
# INGESTIBLE_BACKEND=openai
|
|
6
|
+
|
|
7
|
+
# Optional: OpenAI model override
|
|
8
|
+
# OPENAI_WHISPER_MODEL=whisper-1
|
|
9
|
+
|
|
10
|
+
# Optional: local Whisper model size (base, small, medium, large)
|
|
11
|
+
# INGESTIBLE_WHISPER_MODEL=base
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install ffmpeg
|
|
20
|
+
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: pytest -v
|
slurpai-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
*.egg
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
|
|
13
|
+
# Environment
|
|
14
|
+
.env
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.idea/
|
|
18
|
+
.vscode/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
25
|
+
|
|
26
|
+
# Test
|
|
27
|
+
.pytest_cache/
|
|
28
|
+
.coverage
|
|
29
|
+
htmlcov/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Contributing to ingestible
|
|
2
|
+
|
|
3
|
+
Thanks for wanting to help. Here's how to get set up.
|
|
4
|
+
|
|
5
|
+
## Development setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/grahamrowe82/ingestible.git
|
|
9
|
+
cd ingestible
|
|
10
|
+
python -m venv .venv
|
|
11
|
+
source .venv/bin/activate
|
|
12
|
+
pip install -e ".[dev]"
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
You'll also need [ffmpeg](https://ffmpeg.org/) installed.
|
|
16
|
+
|
|
17
|
+
## Running tests
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pytest -v
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Tests use ffmpeg to generate tiny test audio/video files — no API keys needed.
|
|
24
|
+
|
|
25
|
+
## Submitting changes
|
|
26
|
+
|
|
27
|
+
1. Fork the repo and create a branch
|
|
28
|
+
2. Make your changes
|
|
29
|
+
3. Run `pytest` and make sure everything passes
|
|
30
|
+
4. Open a pull request
|
|
31
|
+
|
|
32
|
+
## Reporting bugs
|
|
33
|
+
|
|
34
|
+
Open an issue at https://github.com/grahamrowe82/ingestible/issues with:
|
|
35
|
+
- What you ran (`ingest ...`)
|
|
36
|
+
- What happened (error message or unexpected output)
|
|
37
|
+
- Your OS and Python version
|
slurpai-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Graham Rowe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
slurpai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: slurpai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert voice notes, videos, and audio files into AI-ready text and images
|
|
5
|
+
Project-URL: Repository, https://github.com/grahamrowe82/ingestible
|
|
6
|
+
Project-URL: Issues, https://github.com/grahamrowe82/ingestible/issues
|
|
7
|
+
Author-email: Graham Rowe <graham@phasetransitions.ai>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,audio-to-text,cli,ffmpeg,openai,transcription,video-to-text,voice-notes,whisper
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
20
|
+
Classifier: Topic :: Text Processing
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: click>=8.0
|
|
23
|
+
Requires-Dist: openai>=1.0
|
|
24
|
+
Requires-Dist: python-dotenv>=1.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Provides-Extra: local
|
|
28
|
+
Requires-Dist: faster-whisper>=0.10; extra == 'local'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# ingestible
|
|
32
|
+
|
|
33
|
+
Convert voice notes, videos, and audio files into AI-ready text and images.
|
|
34
|
+
|
|
35
|
+
Consultants, researchers, and anyone who works with AI tools faces the same problem: clients and colleagues send voice notes, screen recordings, and video walkthroughs — but your AI workflow needs text and images. Ingestible bridges that gap with a single command.
|
|
36
|
+
|
|
37
|
+
## Quick start
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install ingestible
|
|
41
|
+
export OPENAI_API_KEY=sk-...
|
|
42
|
+
ingest client-feedback.opus
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
That's it. You get a folder with `transcript.txt` and you're ready to feed it into whatever AI tool you're using.
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install ingestible
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
You also need [ffmpeg](https://ffmpeg.org/) on your PATH:
|
|
54
|
+
|
|
55
|
+
| OS | Command |
|
|
56
|
+
|----|---------|
|
|
57
|
+
| macOS | `brew install ffmpeg` |
|
|
58
|
+
| Ubuntu/Debian | `sudo apt install ffmpeg` |
|
|
59
|
+
| Windows | `choco install ffmpeg` or download from [ffmpeg.org](https://ffmpeg.org/download.html) |
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Transcribe a voice note
|
|
65
|
+
ingest recording.opus
|
|
66
|
+
|
|
67
|
+
# Process a video (transcript + frame grabs every 15 seconds)
|
|
68
|
+
ingest feedback.mp4
|
|
69
|
+
|
|
70
|
+
# Batch process everything in a folder
|
|
71
|
+
ingest *.opus *.mp4
|
|
72
|
+
|
|
73
|
+
# Grab frames more frequently
|
|
74
|
+
ingest --frame-interval 5 demo.mp4
|
|
75
|
+
|
|
76
|
+
# Use local Whisper instead of OpenAI API
|
|
77
|
+
pip install ingestible[local]
|
|
78
|
+
ingest --backend faster-whisper recording.opus
|
|
79
|
+
|
|
80
|
+
# Preview what would be processed
|
|
81
|
+
ingest --dry-run *.opus
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Output
|
|
85
|
+
|
|
86
|
+
Each file produces a folder alongside it:
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
recording/
|
|
90
|
+
├── transcript.txt # Plain text transcription
|
|
91
|
+
├── frames/ # Video frame grabs (video only)
|
|
92
|
+
│ ├── frame_001.jpg
|
|
93
|
+
│ ├── frame_002.jpg
|
|
94
|
+
│ └── ...
|
|
95
|
+
└── process.log # Timestamped processing log
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Re-running the same command skips already-completed files (idempotent).
|
|
99
|
+
|
|
100
|
+
## Privacy notice
|
|
101
|
+
|
|
102
|
+
**By default, ingestible sends your audio to [OpenAI's Whisper API](https://platform.openai.com/docs/guides/speech-to-text) for transcription.** Your audio is transmitted to OpenAI's servers. Review [OpenAI's data usage policy](https://openai.com/policies/api-data-usage-policies) to understand how your data is handled.
|
|
103
|
+
|
|
104
|
+
If you need fully local, private transcription — no data leaves your machine:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install ingestible[local]
|
|
108
|
+
ingest --backend faster-whisper recording.opus
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
This uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) running entirely on your CPU. It's slower but nothing leaves your computer.
|
|
112
|
+
|
|
113
|
+
## Configuration
|
|
114
|
+
|
|
115
|
+
Set `OPENAI_API_KEY` in your environment or a `.env` file in the current directory.
|
|
116
|
+
|
|
117
|
+
| Variable | Default | Description |
|
|
118
|
+
|----------|---------|-------------|
|
|
119
|
+
| `OPENAI_API_KEY` | — | Required for OpenAI backend |
|
|
120
|
+
| `INGESTIBLE_BACKEND` | `openai` | Default backend (`openai` or `faster-whisper`) |
|
|
121
|
+
| `OPENAI_WHISPER_MODEL` | `whisper-1` | OpenAI model to use |
|
|
122
|
+
| `INGESTIBLE_WHISPER_MODEL` | `base` | Local Whisper model size (`base`, `small`, `medium`, `large`) |
|
|
123
|
+
|
|
124
|
+
## Supported formats
|
|
125
|
+
|
|
126
|
+
**Audio:** `.opus`, `.m4a`, `.ogg`, `.mp3`, `.wav`
|
|
127
|
+
|
|
128
|
+
**Video:** `.mp4`, `.mkv`, `.mov`, `.webm`
|
|
129
|
+
|
|
130
|
+
All formats are normalised to MP3 before transcription — this ensures consistent behaviour regardless of input format.
|
|
131
|
+
|
|
132
|
+
## Requirements
|
|
133
|
+
|
|
134
|
+
- Python 3.10+
|
|
135
|
+
- [ffmpeg](https://ffmpeg.org/) on your PATH
|
|
136
|
+
|
|
137
|
+
## Contributing
|
|
138
|
+
|
|
139
|
+
Found a bug or want to add a format? See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
MIT
|
slurpai-0.1.0/README.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# ingestible
|
|
2
|
+
|
|
3
|
+
Convert voice notes, videos, and audio files into AI-ready text and images.
|
|
4
|
+
|
|
5
|
+
Consultants, researchers, and anyone who works with AI tools faces the same problem: clients and colleagues send voice notes, screen recordings, and video walkthroughs — but your AI workflow needs text and images. Ingestible bridges that gap with a single command.
|
|
6
|
+
|
|
7
|
+
## Quick start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install ingestible
|
|
11
|
+
export OPENAI_API_KEY=sk-...
|
|
12
|
+
ingest client-feedback.opus
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
That's it. You get a folder with `transcript.txt` and you're ready to feed it into whatever AI tool you're using.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install ingestible
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
You also need [ffmpeg](https://ffmpeg.org/) on your PATH:
|
|
24
|
+
|
|
25
|
+
| OS | Command |
|
|
26
|
+
|----|---------|
|
|
27
|
+
| macOS | `brew install ffmpeg` |
|
|
28
|
+
| Ubuntu/Debian | `sudo apt install ffmpeg` |
|
|
29
|
+
| Windows | `choco install ffmpeg` or download from [ffmpeg.org](https://ffmpeg.org/download.html) |
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Transcribe a voice note
|
|
35
|
+
ingest recording.opus
|
|
36
|
+
|
|
37
|
+
# Process a video (transcript + frame grabs every 15 seconds)
|
|
38
|
+
ingest feedback.mp4
|
|
39
|
+
|
|
40
|
+
# Batch process everything in a folder
|
|
41
|
+
ingest *.opus *.mp4
|
|
42
|
+
|
|
43
|
+
# Grab frames more frequently
|
|
44
|
+
ingest --frame-interval 5 demo.mp4
|
|
45
|
+
|
|
46
|
+
# Use local Whisper instead of OpenAI API
|
|
47
|
+
pip install ingestible[local]
|
|
48
|
+
ingest --backend faster-whisper recording.opus
|
|
49
|
+
|
|
50
|
+
# Preview what would be processed
|
|
51
|
+
ingest --dry-run *.opus
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Output
|
|
55
|
+
|
|
56
|
+
Each file produces a folder alongside it:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
recording/
|
|
60
|
+
├── transcript.txt # Plain text transcription
|
|
61
|
+
├── frames/ # Video frame grabs (video only)
|
|
62
|
+
│ ├── frame_001.jpg
|
|
63
|
+
│ ├── frame_002.jpg
|
|
64
|
+
│ └── ...
|
|
65
|
+
└── process.log # Timestamped processing log
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Re-running the same command skips already-completed files (idempotent).
|
|
69
|
+
|
|
70
|
+
## Privacy notice
|
|
71
|
+
|
|
72
|
+
**By default, ingestible sends your audio to [OpenAI's Whisper API](https://platform.openai.com/docs/guides/speech-to-text) for transcription.** Your audio is transmitted to OpenAI's servers. Review [OpenAI's data usage policy](https://openai.com/policies/api-data-usage-policies) to understand how your data is handled.
|
|
73
|
+
|
|
74
|
+
If you need fully local, private transcription — no data leaves your machine:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install ingestible[local]
|
|
78
|
+
ingest --backend faster-whisper recording.opus
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
This uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) running entirely on your CPU. It's slower but nothing leaves your computer.
|
|
82
|
+
|
|
83
|
+
## Configuration
|
|
84
|
+
|
|
85
|
+
Set `OPENAI_API_KEY` in your environment or a `.env` file in the current directory.
|
|
86
|
+
|
|
87
|
+
| Variable | Default | Description |
|
|
88
|
+
|----------|---------|-------------|
|
|
89
|
+
| `OPENAI_API_KEY` | — | Required for OpenAI backend |
|
|
90
|
+
| `INGESTIBLE_BACKEND` | `openai` | Default backend (`openai` or `faster-whisper`) |
|
|
91
|
+
| `OPENAI_WHISPER_MODEL` | `whisper-1` | OpenAI model to use |
|
|
92
|
+
| `INGESTIBLE_WHISPER_MODEL` | `base` | Local Whisper model size (`base`, `small`, `medium`, `large`) |
|
|
93
|
+
|
|
94
|
+
## Supported formats
|
|
95
|
+
|
|
96
|
+
**Audio:** `.opus`, `.m4a`, `.ogg`, `.mp3`, `.wav`
|
|
97
|
+
|
|
98
|
+
**Video:** `.mp4`, `.mkv`, `.mov`, `.webm`
|
|
99
|
+
|
|
100
|
+
All formats are normalised to MP3 before transcription — this ensures consistent behaviour regardless of input format.
|
|
101
|
+
|
|
102
|
+
## Requirements
|
|
103
|
+
|
|
104
|
+
- Python 3.10+
|
|
105
|
+
- [ffmpeg](https://ffmpeg.org/) on your PATH
|
|
106
|
+
|
|
107
|
+
## Contributing
|
|
108
|
+
|
|
109
|
+
Found a bug or want to add a format? See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
110
|
+
|
|
111
|
+
## License
|
|
112
|
+
|
|
113
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "slurpai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Convert voice notes, videos, and audio files into AI-ready text and images"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Graham Rowe", email = "graham@phasetransitions.ai" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"whisper", "transcription", "voice-notes", "audio-to-text",
|
|
17
|
+
"video-to-text", "ffmpeg", "openai", "ai", "cli",
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 4 - Beta",
|
|
21
|
+
"Environment :: Console",
|
|
22
|
+
"Intended Audience :: Developers",
|
|
23
|
+
"License :: OSI Approved :: MIT License",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
29
|
+
"Topic :: Text Processing",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
dependencies = [
|
|
33
|
+
"click>=8.0",
|
|
34
|
+
"python-dotenv>=1.0",
|
|
35
|
+
"openai>=1.0",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Repository = "https://github.com/grahamrowe82/ingestible"
|
|
40
|
+
Issues = "https://github.com/grahamrowe82/ingestible/issues"
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
local = ["faster-whisper>=0.10"]
|
|
44
|
+
dev = [
|
|
45
|
+
"pytest>=7.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
ingest = "ingestible.cli:ingest"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/ingestible"]
|
|
53
|
+
|
|
54
|
+
[tool.pytest.ini_options]
|
|
55
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""CLI entry point for ingestible."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
12
|
+
from . import __version__
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command()
|
|
16
|
+
@click.argument("files", nargs=-1, required=True, type=click.Path(exists=True))
|
|
17
|
+
@click.option(
|
|
18
|
+
"-b",
|
|
19
|
+
"--backend",
|
|
20
|
+
type=click.Choice(["openai", "faster-whisper"]),
|
|
21
|
+
default=None,
|
|
22
|
+
help="Transcription backend (default: env or openai)",
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"-f",
|
|
26
|
+
"--frame-interval",
|
|
27
|
+
type=int,
|
|
28
|
+
default=15,
|
|
29
|
+
help="Seconds between video frame grabs (default: 15)",
|
|
30
|
+
)
|
|
31
|
+
@click.option(
|
|
32
|
+
"-o",
|
|
33
|
+
"--output-dir",
|
|
34
|
+
type=click.Path(),
|
|
35
|
+
default=None,
|
|
36
|
+
help="Base output directory (default: next to input file)",
|
|
37
|
+
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"-l",
|
|
40
|
+
"--language",
|
|
41
|
+
type=str,
|
|
42
|
+
default="en",
|
|
43
|
+
help="Language hint for transcription (default: en)",
|
|
44
|
+
)
|
|
45
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be processed")
|
|
46
|
+
@click.version_option(version=__version__)
|
|
47
|
+
def ingest(
|
|
48
|
+
files: tuple[str, ...],
|
|
49
|
+
backend: str | None,
|
|
50
|
+
frame_interval: int,
|
|
51
|
+
output_dir: str | None,
|
|
52
|
+
language: str,
|
|
53
|
+
dry_run: bool,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Convert voice notes, audio files, and videos into text and images."""
|
|
56
|
+
load_dotenv()
|
|
57
|
+
|
|
58
|
+
backend = backend or os.getenv("INGESTIBLE_BACKEND", "openai")
|
|
59
|
+
|
|
60
|
+
from .ffmpeg import check_ffmpeg
|
|
61
|
+
from .process import SUPPORTED_EXTENSIONS, process_file
|
|
62
|
+
|
|
63
|
+
if not check_ffmpeg():
|
|
64
|
+
click.echo("Error: ffmpeg not found. Install it: brew install ffmpeg", err=True)
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
|
|
67
|
+
output_base = Path(output_dir) if output_dir else None
|
|
68
|
+
paths = [Path(f) for f in files]
|
|
69
|
+
|
|
70
|
+
# Filter to supported formats
|
|
71
|
+
supported = []
|
|
72
|
+
for p in paths:
|
|
73
|
+
if p.suffix.lower() in SUPPORTED_EXTENSIONS:
|
|
74
|
+
supported.append(p)
|
|
75
|
+
else:
|
|
76
|
+
click.echo(f"Skipping unsupported format: {p.name}")
|
|
77
|
+
|
|
78
|
+
if not supported:
|
|
79
|
+
click.echo("No supported files to process.")
|
|
80
|
+
sys.exit(1)
|
|
81
|
+
|
|
82
|
+
if dry_run:
|
|
83
|
+
click.echo(f"Would process {len(supported)} file(s) with backend={backend}:")
|
|
84
|
+
for p in supported:
|
|
85
|
+
click.echo(f" {p}")
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
success = 0
|
|
89
|
+
failed = 0
|
|
90
|
+
for p in supported:
|
|
91
|
+
try:
|
|
92
|
+
result = process_file(
|
|
93
|
+
p,
|
|
94
|
+
backend=backend,
|
|
95
|
+
frame_interval=frame_interval,
|
|
96
|
+
output_dir=output_base,
|
|
97
|
+
language=language,
|
|
98
|
+
)
|
|
99
|
+
click.echo(f"Done: {result}")
|
|
100
|
+
success += 1
|
|
101
|
+
except Exception as e:
|
|
102
|
+
click.echo(f"Failed: {p.name} — {e}", err=True)
|
|
103
|
+
failed += 1
|
|
104
|
+
|
|
105
|
+
if len(supported) > 1:
|
|
106
|
+
click.echo(f"\n{success} succeeded, {failed} failed out of {len(supported)}")
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""FFmpeg wrappers for audio extraction and frame capture."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def check_ffmpeg() -> bool:
|
|
11
|
+
"""Return True if ffmpeg is available on PATH."""
|
|
12
|
+
return shutil.which("ffmpeg") is not None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_audio(input_path: Path, output_path: Path) -> Path:
|
|
16
|
+
"""Extract audio from video as compressed MP3 for API upload.
|
|
17
|
+
|
|
18
|
+
Uses mono, 16kHz, 64kbps — compresses a 10-min video from ~60MB to ~5MB,
|
|
19
|
+
staying under the 25MB Whisper API limit.
|
|
20
|
+
"""
|
|
21
|
+
cmd = [
|
|
22
|
+
"ffmpeg", "-y", "-i", str(input_path),
|
|
23
|
+
"-vn", "-ac", "1", "-ar", "16000", "-b:a", "64k",
|
|
24
|
+
str(output_path),
|
|
25
|
+
"-loglevel", "warning",
|
|
26
|
+
]
|
|
27
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
28
|
+
if result.returncode != 0:
|
|
29
|
+
raise RuntimeError(f"ffmpeg audio extraction failed: {result.stderr.strip()}")
|
|
30
|
+
return output_path
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_frames(input_path: Path, output_dir: Path, *, interval: int = 15) -> int:
|
|
34
|
+
"""Extract video frames every `interval` seconds as JPEG.
|
|
35
|
+
|
|
36
|
+
Returns the number of frames extracted.
|
|
37
|
+
"""
|
|
38
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
pattern = str(output_dir / "frame_%03d.jpg")
|
|
40
|
+
|
|
41
|
+
cmd = [
|
|
42
|
+
"ffmpeg", "-i", str(input_path),
|
|
43
|
+
"-vf", f"fps=1/{interval}",
|
|
44
|
+
"-q:v", "2",
|
|
45
|
+
pattern,
|
|
46
|
+
"-loglevel", "warning",
|
|
47
|
+
]
|
|
48
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
49
|
+
if result.returncode != 0:
|
|
50
|
+
raise RuntimeError(f"ffmpeg frame extraction failed: {result.stderr.strip()}")
|
|
51
|
+
|
|
52
|
+
return len(list(output_dir.glob("frame_*.jpg")))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def has_video_stream(input_path: Path) -> bool:
|
|
56
|
+
"""Check if file contains a video stream using ffprobe."""
|
|
57
|
+
cmd = [
|
|
58
|
+
"ffprobe", "-v", "error",
|
|
59
|
+
"-select_streams", "v",
|
|
60
|
+
"-show_entries", "stream=codec_type",
|
|
61
|
+
"-of", "csv=p=0",
|
|
62
|
+
str(input_path),
|
|
63
|
+
]
|
|
64
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
65
|
+
return "video" in result.stdout.lower()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Simple dual logger — writes to stdout and a process.log file."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ProcessLog:
|
|
10
|
+
"""Logger that writes timestamped messages to both stdout and a file."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, log_path: Path):
|
|
13
|
+
self.log_path = log_path
|
|
14
|
+
self.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
15
|
+
|
|
16
|
+
def log(self, message: str) -> None:
|
|
17
|
+
line = f"[{datetime.now().strftime('%H:%M:%S')}] {message}"
|
|
18
|
+
print(line)
|
|
19
|
+
with self.log_path.open("a", encoding="utf-8") as f:
|
|
20
|
+
f.write(line + "\n")
|
|
21
|
+
|
|
22
|
+
def skip(self, message: str) -> None:
|
|
23
|
+
self.log(f"[skip] {message}")
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Core orchestrator — processes a single file through the ingest pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .ffmpeg import extract_audio, extract_frames, has_video_stream
|
|
8
|
+
from .log import ProcessLog
|
|
9
|
+
from .transcribe import transcribe
|
|
10
|
+
|
|
11
|
+
AUDIO_EXTENSIONS = {".opus", ".m4a", ".ogg", ".mp3", ".wav"}
|
|
12
|
+
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".webm"}
|
|
13
|
+
SUPPORTED_EXTENSIONS = AUDIO_EXTENSIONS | VIDEO_EXTENSIONS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def process_file(
|
|
17
|
+
input_path: Path,
|
|
18
|
+
*,
|
|
19
|
+
backend: str,
|
|
20
|
+
frame_interval: int = 15,
|
|
21
|
+
output_dir: Path | None = None,
|
|
22
|
+
language: str = "en",
|
|
23
|
+
) -> Path:
|
|
24
|
+
"""Process a single audio/video file. Returns the output directory."""
|
|
25
|
+
input_path = input_path.resolve()
|
|
26
|
+
ext = input_path.suffix.lower()
|
|
27
|
+
|
|
28
|
+
if ext not in SUPPORTED_EXTENSIONS:
|
|
29
|
+
raise ValueError(f"Unsupported format: {ext}")
|
|
30
|
+
|
|
31
|
+
out = _resolve_output_dir(input_path, output_dir)
|
|
32
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
log = ProcessLog(out / "process.log")
|
|
35
|
+
log.log(f"=== Ingestible v0.1.0 ===")
|
|
36
|
+
log.log(f"Input: {input_path}")
|
|
37
|
+
log.log(f"Output: {out}/")
|
|
38
|
+
log.log(f"Backend: {backend}")
|
|
39
|
+
|
|
40
|
+
transcript_path = out / "transcript.txt"
|
|
41
|
+
is_video = ext in VIDEO_EXTENSIONS and has_video_stream(input_path)
|
|
42
|
+
|
|
43
|
+
# --- Step 1: Transcribe ---
|
|
44
|
+
# Always convert to MP3 first — normalises all formats into one known-good
|
|
45
|
+
# path. This matches the proven bash script behaviour: no conditionals,
|
|
46
|
+
# no format-compatibility surprises.
|
|
47
|
+
if transcript_path.exists():
|
|
48
|
+
log.skip(f"Transcript already exists: {transcript_path.name}")
|
|
49
|
+
else:
|
|
50
|
+
audio_tmp = out / "audio.mp3"
|
|
51
|
+
if audio_tmp.exists():
|
|
52
|
+
log.skip(f"Audio already extracted: {audio_tmp.name}")
|
|
53
|
+
else:
|
|
54
|
+
log.log("Extracting audio...")
|
|
55
|
+
extract_audio(input_path, audio_tmp)
|
|
56
|
+
log.log(f"Audio extracted: {_file_size(audio_tmp)}")
|
|
57
|
+
|
|
58
|
+
log.log(f"Transcribing with {backend}...")
|
|
59
|
+
text = transcribe(audio_tmp, backend=backend, language=language)
|
|
60
|
+
transcript_path.write_text(text, encoding="utf-8")
|
|
61
|
+
word_count = len(text.split())
|
|
62
|
+
log.log(f"Transcript: {word_count} words")
|
|
63
|
+
|
|
64
|
+
# Clean up intermediate audio
|
|
65
|
+
audio_tmp.unlink(missing_ok=True)
|
|
66
|
+
|
|
67
|
+
# --- Step 2: Extract frames (video only) ---
|
|
68
|
+
frames_dir = out / "frames"
|
|
69
|
+
if not is_video:
|
|
70
|
+
log.skip("Audio-only file — no frames to extract")
|
|
71
|
+
elif list(frames_dir.glob("frame_*.jpg")):
|
|
72
|
+
existing = len(list(frames_dir.glob("frame_*.jpg")))
|
|
73
|
+
log.skip(f"Frames already exist: {existing} frames")
|
|
74
|
+
else:
|
|
75
|
+
log.log(f"Extracting frames every {frame_interval}s...")
|
|
76
|
+
count = extract_frames(input_path, frames_dir, interval=frame_interval)
|
|
77
|
+
log.log(f"Extracted {count} frames")
|
|
78
|
+
|
|
79
|
+
log.log("=== Done ===")
|
|
80
|
+
return out
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _resolve_output_dir(input_path: Path, output_dir: Path | None) -> Path:
|
|
84
|
+
"""Derive output directory: <parent>/<stem>/ or <output_dir>/<stem>/."""
|
|
85
|
+
stem = input_path.stem
|
|
86
|
+
if output_dir:
|
|
87
|
+
return output_dir / stem
|
|
88
|
+
return input_path.parent / stem
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _file_size(path: Path) -> str:
|
|
92
|
+
"""Human-readable file size."""
|
|
93
|
+
size = path.stat().st_size
|
|
94
|
+
for unit in ("B", "KB", "MB", "GB"):
|
|
95
|
+
if size < 1024:
|
|
96
|
+
return f"{size:.1f} {unit}"
|
|
97
|
+
size /= 1024
|
|
98
|
+
return f"{size:.1f} TB"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Transcription backends — OpenAI Whisper API and faster-whisper local."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def transcribe(audio_path: Path, *, backend: str, language: str = "en") -> str:
|
|
11
|
+
"""Transcribe an audio file, returning the text.
|
|
12
|
+
|
|
13
|
+
Routes to the appropriate backend based on the `backend` argument.
|
|
14
|
+
"""
|
|
15
|
+
if backend == "openai":
|
|
16
|
+
return _transcribe_openai(audio_path, language=language)
|
|
17
|
+
elif backend == "faster-whisper":
|
|
18
|
+
return _transcribe_faster_whisper(audio_path, language=language)
|
|
19
|
+
else:
|
|
20
|
+
raise ValueError(f"Unknown backend: {backend!r}. Use 'openai' or 'faster-whisper'.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _extract_text(payload: Any) -> str:
|
|
24
|
+
"""Defensively extract text from an OpenAI transcription response."""
|
|
25
|
+
text = getattr(payload, "text", None)
|
|
26
|
+
if isinstance(text, str) and text.strip():
|
|
27
|
+
return text.strip()
|
|
28
|
+
if isinstance(payload, dict):
|
|
29
|
+
candidate = payload.get("text")
|
|
30
|
+
if isinstance(candidate, str) and candidate.strip():
|
|
31
|
+
return candidate.strip()
|
|
32
|
+
raise ValueError("Transcription response missing text output")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _transcribe_openai(audio_path: Path, *, language: str) -> str:
|
|
36
|
+
"""Transcribe using the OpenAI Whisper API via the SDK."""
|
|
37
|
+
try:
|
|
38
|
+
from openai import OpenAI
|
|
39
|
+
except ImportError:
|
|
40
|
+
raise ImportError(
|
|
41
|
+
"OpenAI backend requires the openai package. "
|
|
42
|
+
"Install with: pip install ingestible[openai]"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
46
|
+
if not api_key:
|
|
47
|
+
raise RuntimeError(
|
|
48
|
+
"OPENAI_API_KEY not set. Add it to your .env file or environment."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
client = OpenAI()
|
|
52
|
+
model = os.getenv("OPENAI_WHISPER_MODEL", "whisper-1").strip() or "whisper-1"
|
|
53
|
+
|
|
54
|
+
with audio_path.open("rb") as f:
|
|
55
|
+
response = client.audio.transcriptions.create(
|
|
56
|
+
file=f,
|
|
57
|
+
model=model,
|
|
58
|
+
language=language,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return _extract_text(response)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _transcribe_faster_whisper(audio_path: Path, *, language: str) -> str:
|
|
65
|
+
"""Transcribe locally using faster-whisper on CPU."""
|
|
66
|
+
try:
|
|
67
|
+
from faster_whisper import WhisperModel
|
|
68
|
+
except ImportError:
|
|
69
|
+
raise ImportError(
|
|
70
|
+
"Local backend requires faster-whisper. "
|
|
71
|
+
"Install with: pip install ingestible[local]"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
model_size = os.getenv("INGESTIBLE_WHISPER_MODEL", "base").strip() or "base"
|
|
75
|
+
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
76
|
+
|
|
77
|
+
segments, _info = model.transcribe(str(audio_path), language=language)
|
|
78
|
+
text = " ".join(segment.text.strip() for segment in segments)
|
|
79
|
+
|
|
80
|
+
if not text.strip():
|
|
81
|
+
raise ValueError("Transcription produced no text")
|
|
82
|
+
|
|
83
|
+
return text.strip()
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Shared test fixtures."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def sample_audio(tmp_path: Path) -> Path:
|
|
13
|
+
"""Create a tiny audio file (1 second of silence) for testing."""
|
|
14
|
+
audio = tmp_path / "test.mp3"
|
|
15
|
+
subprocess.run(
|
|
16
|
+
[
|
|
17
|
+
"ffmpeg", "-y",
|
|
18
|
+
"-f", "lavfi", "-i", "anullsrc=r=16000:cl=mono",
|
|
19
|
+
"-t", "1", "-q:a", "9",
|
|
20
|
+
str(audio),
|
|
21
|
+
],
|
|
22
|
+
capture_output=True,
|
|
23
|
+
check=True,
|
|
24
|
+
)
|
|
25
|
+
return audio
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture
|
|
29
|
+
def sample_video(tmp_path: Path) -> Path:
|
|
30
|
+
"""Create a tiny video file (3 seconds, test pattern + silence)."""
|
|
31
|
+
video = tmp_path / "test.mp4"
|
|
32
|
+
subprocess.run(
|
|
33
|
+
[
|
|
34
|
+
"ffmpeg", "-y",
|
|
35
|
+
"-f", "lavfi", "-i", "testsrc=duration=3:size=320x240:rate=1",
|
|
36
|
+
"-f", "lavfi", "-i", "anullsrc=r=16000:cl=mono",
|
|
37
|
+
"-t", "3", "-shortest",
|
|
38
|
+
str(video),
|
|
39
|
+
],
|
|
40
|
+
capture_output=True,
|
|
41
|
+
check=True,
|
|
42
|
+
)
|
|
43
|
+
return video
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Tests for the CLI entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from click.testing import CliRunner
|
|
8
|
+
|
|
9
|
+
from ingestible.cli import ingest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_help():
|
|
13
|
+
runner = CliRunner()
|
|
14
|
+
result = runner.invoke(ingest, ["--help"])
|
|
15
|
+
assert result.exit_code == 0
|
|
16
|
+
assert "Convert voice notes" in result.output
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_version():
|
|
20
|
+
runner = CliRunner()
|
|
21
|
+
result = runner.invoke(ingest, ["--version"])
|
|
22
|
+
assert result.exit_code == 0
|
|
23
|
+
assert "0.1.0" in result.output
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_dry_run(sample_audio: Path):
|
|
27
|
+
runner = CliRunner()
|
|
28
|
+
result = runner.invoke(ingest, ["--dry-run", str(sample_audio)])
|
|
29
|
+
assert result.exit_code == 0
|
|
30
|
+
assert "Would process 1 file(s)" in result.output
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_unsupported_file_skipped(tmp_path: Path):
|
|
34
|
+
txt = tmp_path / "notes.txt"
|
|
35
|
+
txt.write_text("hello")
|
|
36
|
+
runner = CliRunner()
|
|
37
|
+
result = runner.invoke(ingest, [str(txt)])
|
|
38
|
+
assert "Skipping unsupported format" in result.output
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Tests for the ffmpeg module."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ingestible.ffmpeg import check_ffmpeg, extract_audio, extract_frames, has_video_stream
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_check_ffmpeg():
|
|
9
|
+
assert check_ffmpeg() is True
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_has_video_stream_with_video(sample_video: Path):
|
|
13
|
+
assert has_video_stream(sample_video) is True
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_has_video_stream_with_audio(sample_audio: Path):
|
|
17
|
+
assert has_video_stream(sample_audio) is False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_extract_audio(sample_video: Path, tmp_path: Path):
|
|
21
|
+
output = tmp_path / "out.mp3"
|
|
22
|
+
result = extract_audio(sample_video, output)
|
|
23
|
+
assert result == output
|
|
24
|
+
assert output.exists()
|
|
25
|
+
assert output.stat().st_size > 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_extract_frames(sample_video: Path, tmp_path: Path):
|
|
29
|
+
frames_dir = tmp_path / "frames"
|
|
30
|
+
count = extract_frames(sample_video, frames_dir, interval=1)
|
|
31
|
+
assert count >= 1
|
|
32
|
+
assert len(list(frames_dir.glob("frame_*.jpg"))) == count
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Tests for the process orchestrator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import patch
|
|
7
|
+
|
|
8
|
+
from ingestible.process import SUPPORTED_EXTENSIONS, process_file
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_supported_extensions_include_common_formats():
|
|
12
|
+
assert ".opus" in SUPPORTED_EXTENSIONS
|
|
13
|
+
assert ".m4a" in SUPPORTED_EXTENSIONS
|
|
14
|
+
assert ".mp4" in SUPPORTED_EXTENSIONS
|
|
15
|
+
assert ".mp3" in SUPPORTED_EXTENSIONS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_process_audio_file(sample_audio: Path):
|
|
19
|
+
"""Process an audio file with mocked transcription."""
|
|
20
|
+
with patch("ingestible.process.transcribe", return_value="Hello from the test"):
|
|
21
|
+
out = process_file(sample_audio, backend="openai")
|
|
22
|
+
|
|
23
|
+
assert out.is_dir()
|
|
24
|
+
transcript = out / "transcript.txt"
|
|
25
|
+
assert transcript.exists()
|
|
26
|
+
assert transcript.read_text() == "Hello from the test"
|
|
27
|
+
|
|
28
|
+
log = out / "process.log"
|
|
29
|
+
assert log.exists()
|
|
30
|
+
assert "Done" in log.read_text()
|
|
31
|
+
|
|
32
|
+
# Audio-only: no frames directory should be populated
|
|
33
|
+
frames = out / "frames"
|
|
34
|
+
assert not list(frames.glob("frame_*.jpg")) if frames.exists() else True
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_process_video_file(sample_video: Path):
|
|
38
|
+
"""Process a video file with mocked transcription."""
|
|
39
|
+
with patch("ingestible.process.transcribe", return_value="Video transcript here"):
|
|
40
|
+
out = process_file(sample_video, backend="openai", frame_interval=1)
|
|
41
|
+
|
|
42
|
+
assert out.is_dir()
|
|
43
|
+
transcript = out / "transcript.txt"
|
|
44
|
+
assert transcript.exists()
|
|
45
|
+
assert transcript.read_text() == "Video transcript here"
|
|
46
|
+
|
|
47
|
+
frames = out / "frames"
|
|
48
|
+
assert frames.is_dir()
|
|
49
|
+
assert len(list(frames.glob("frame_*.jpg"))) >= 1
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_idempotent_skip(sample_audio: Path):
|
|
53
|
+
"""Second run should skip transcription."""
|
|
54
|
+
with patch("ingestible.process.transcribe", return_value="First run") as mock_t:
|
|
55
|
+
process_file(sample_audio, backend="openai")
|
|
56
|
+
assert mock_t.call_count == 1
|
|
57
|
+
|
|
58
|
+
with patch("ingestible.process.transcribe", return_value="Second run") as mock_t:
|
|
59
|
+
process_file(sample_audio, backend="openai")
|
|
60
|
+
assert mock_t.call_count == 0 # Should have been skipped
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_unsupported_format(tmp_path: Path):
|
|
64
|
+
"""Unsupported extensions raise ValueError."""
|
|
65
|
+
txt = tmp_path / "notes.txt"
|
|
66
|
+
txt.write_text("hello")
|
|
67
|
+
|
|
68
|
+
import pytest
|
|
69
|
+
with pytest.raises(ValueError, match="Unsupported format"):
|
|
70
|
+
process_file(txt, backend="openai")
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Tests for the transcription module (mocked — no API calls)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from ingestible.transcribe import _extract_text, transcribe
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestExtractText:
|
|
14
|
+
def test_from_object_attribute(self):
|
|
15
|
+
obj = MagicMock()
|
|
16
|
+
obj.text = "Hello world"
|
|
17
|
+
assert _extract_text(obj) == "Hello world"
|
|
18
|
+
|
|
19
|
+
def test_from_dict(self):
|
|
20
|
+
assert _extract_text({"text": "Hello world"}) == "Hello world"
|
|
21
|
+
|
|
22
|
+
def test_strips_whitespace(self):
|
|
23
|
+
assert _extract_text({"text": " Hello world "}) == "Hello world"
|
|
24
|
+
|
|
25
|
+
def test_raises_on_empty(self):
|
|
26
|
+
with pytest.raises(ValueError, match="missing text"):
|
|
27
|
+
_extract_text({"text": ""})
|
|
28
|
+
|
|
29
|
+
def test_raises_on_missing(self):
|
|
30
|
+
with pytest.raises(ValueError, match="missing text"):
|
|
31
|
+
_extract_text({})
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TestTranscribeOpenAI:
|
|
35
|
+
@patch("ingestible.transcribe.OpenAI", create=True)
|
|
36
|
+
def test_calls_api(self, mock_openai_cls, sample_audio: Path, monkeypatch):
|
|
37
|
+
monkeypatch.setenv("OPENAI_API_KEY", "sk-test")
|
|
38
|
+
|
|
39
|
+
mock_response = MagicMock()
|
|
40
|
+
mock_response.text = "This is the transcript"
|
|
41
|
+
mock_client = MagicMock()
|
|
42
|
+
mock_client.audio.transcriptions.create.return_value = mock_response
|
|
43
|
+
|
|
44
|
+
# Patch the lazy import inside _transcribe_openai
|
|
45
|
+
with patch("ingestible.transcribe.OpenAI", return_value=mock_client, create=True):
|
|
46
|
+
# Need to patch at the point of import
|
|
47
|
+
import ingestible.transcribe as mod
|
|
48
|
+
with patch.object(mod, "_transcribe_openai") as mock_fn:
|
|
49
|
+
mock_fn.return_value = "This is the transcript"
|
|
50
|
+
result = transcribe(sample_audio, backend="openai")
|
|
51
|
+
|
|
52
|
+
assert result == "This is the transcript"
|
|
53
|
+
|
|
54
|
+
def test_raises_without_api_key(self, sample_audio: Path, monkeypatch):
|
|
55
|
+
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
56
|
+
with pytest.raises(RuntimeError, match="OPENAI_API_KEY"):
|
|
57
|
+
transcribe(sample_audio, backend="openai")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_unknown_backend(sample_audio: Path):
|
|
61
|
+
with pytest.raises(ValueError, match="Unknown backend"):
|
|
62
|
+
transcribe(sample_audio, backend="nonexistent")
|