asub 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asub-1.0.0/LICENSE +21 -0
- asub-1.0.0/PKG-INFO +256 -0
- asub-1.0.0/README.md +226 -0
- asub-1.0.0/pyproject.toml +56 -0
- asub-1.0.0/setup.cfg +4 -0
- asub-1.0.0/src/asub/__init__.py +3 -0
- asub-1.0.0/src/asub/__main__.py +7 -0
- asub-1.0.0/src/asub/cli.py +206 -0
- asub-1.0.0/src/asub/progress.py +65 -0
- asub-1.0.0/src/asub/subtitle.py +135 -0
- asub-1.0.0/src/asub/transcriber.py +174 -0
- asub-1.0.0/src/asub/translator.py +120 -0
- asub-1.0.0/src/asub.egg-info/PKG-INFO +256 -0
- asub-1.0.0/src/asub.egg-info/SOURCES.txt +18 -0
- asub-1.0.0/src/asub.egg-info/dependency_links.txt +1 -0
- asub-1.0.0/src/asub.egg-info/entry_points.txt +2 -0
- asub-1.0.0/src/asub.egg-info/requires.txt +7 -0
- asub-1.0.0/src/asub.egg-info/top_level.txt +1 -0
- asub-1.0.0/tests/test_cli.py +36 -0
- asub-1.0.0/tests/test_subtitle.py +95 -0
asub-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 asub contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
asub-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: asub
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Generate and translate subtitles from audio/video files using Whisper.
|
|
5
|
+
Author: asub contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/simoneraffaelli/subtitle-generator
|
|
8
|
+
Project-URL: Repository, https://github.com/simoneraffaelli/subtitle-generator
|
|
9
|
+
Project-URL: Issues, https://github.com/simoneraffaelli/subtitle-generator/issues
|
|
10
|
+
Keywords: subtitles,whisper,transcription,translation,srt,vtt
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
19
|
+
Classifier: Topic :: Multimedia :: Video
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: faster-whisper<2.0.0,>=1.0.0
|
|
24
|
+
Requires-Dist: deep-translator<2.0.0,>=1.11.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pyinstaller>=6.0; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# asub
|
|
32
|
+
|
|
33
|
+
Generate and translate subtitles from any audio or video file — powered by
|
|
34
|
+
[faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
|
|
35
|
+
[deep-translator](https://github.com/nidhaloff/deep-translator).
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
|
|
40
|
+
accuracy, using CTranslate2.
|
|
41
|
+
- **Automatic language detection** — or specify the source language manually.
|
|
42
|
+
- **Translation** — translate subtitles to 100+ languages via Google Translate
|
|
43
|
+
(free, no API key).
|
|
44
|
+
- **Multiple output formats** — SRT and WebVTT.
|
|
45
|
+
- **VAD filtering** — Silero VAD removes silence and reduces hallucination.
|
|
46
|
+
- **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
|
|
47
|
+
(slow, most accurate).
|
|
48
|
+
- **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
|
|
49
|
+
- **Packagable as .exe** — single-file Windows executable via PyInstaller.
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
### From source (recommended for development)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git clone https://github.com/simoneraffaelli/subtitle-generator.git
|
|
57
|
+
cd subtitle-generator
|
|
58
|
+
pip install -e ".[dev]"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### From PyPI (once published)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install asub
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quick start
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# Transcribe a video and generate subtitles (auto-detect language)
|
|
71
|
+
asub video.mp4
|
|
72
|
+
|
|
73
|
+
# Use a specific model and output format
|
|
74
|
+
asub video.mp4 -m large-v3 -f vtt
|
|
75
|
+
|
|
76
|
+
# Transcribe and translate to Italian
|
|
77
|
+
asub video.mp4 -t it
|
|
78
|
+
|
|
79
|
+
# Specify source language, translate to German, verbose output
|
|
80
|
+
asub podcast.mp3 -l en -t de -v
|
|
81
|
+
|
|
82
|
+
# Use CPU with int8 quantisation
|
|
83
|
+
asub interview.wav --device cpu --compute-type int8
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## CLI reference
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
|
|
90
|
+
[--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
|
|
91
|
+
[--list-languages]
|
|
92
|
+
input
|
|
93
|
+
|
|
94
|
+
positional arguments:
|
|
95
|
+
input Path to an audio or video file.
|
|
96
|
+
|
|
97
|
+
options:
|
|
98
|
+
-o, --output Output subtitle file path (default: <input>.srt)
|
|
99
|
+
-f, --format Subtitle format: srt, vtt
|
|
100
|
+
-v, --verbose Increase verbosity (-v INFO, -vv DEBUG)
|
|
101
|
+
--version Show version and exit
|
|
102
|
+
--list-languages Print supported translation languages and exit
|
|
103
|
+
|
|
104
|
+
transcription:
|
|
105
|
+
-m, --model Whisper model size (default: medium)
|
|
106
|
+
--device auto | cpu | cuda (default: auto)
|
|
107
|
+
--compute-type Quantisation type (auto-selected if omitted)
|
|
108
|
+
-l, --language Source language code (auto-detected if omitted)
|
|
109
|
+
--no-vad Disable Voice Activity Detection
|
|
110
|
+
|
|
111
|
+
translation:
|
|
112
|
+
-t, --translate LANG Translate subtitles to this language code
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Python API
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from asub.transcriber import load_model, transcribe
|
|
119
|
+
from asub.translator import translate_segments
|
|
120
|
+
from asub.subtitle import write_subtitle_file, SubtitleFormat
|
|
121
|
+
|
|
122
|
+
# 1. Transcribe
|
|
123
|
+
model = load_model("medium", device="auto")
|
|
124
|
+
result = transcribe(model, "video.mp4")
|
|
125
|
+
|
|
126
|
+
# 2. Translate (optional)
|
|
127
|
+
translated = translate_segments(result.segments, source=result.language, target="it")
|
|
128
|
+
|
|
129
|
+
# 3. Write subtitle file
|
|
130
|
+
write_subtitle_file(translated, "video_it.srt")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Building a Windows .exe
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
pip install ".[dev]"
|
|
137
|
+
pyinstaller asub.spec
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The executable will be in `dist/asub.exe`.
|
|
141
|
+
|
|
142
|
+
> **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
|
|
143
|
+
> on first run and cached in the default Hugging Face cache directory.
|
|
144
|
+
|
|
145
|
+
## Hugging Face token (optional)
|
|
146
|
+
|
|
147
|
+
On first run, Whisper model weights are downloaded from the Hugging Face Hub.
|
|
148
|
+
Without authentication you may see this warning:
|
|
149
|
+
|
|
150
|
+
> You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
|
|
151
|
+
> to enable higher rate limits and faster downloads
|
|
152
|
+
|
|
153
|
+
This is **not an error** — the download still works, just at lower rate limits.
|
|
154
|
+
To silence the warning and get faster downloads:
|
|
155
|
+
|
|
156
|
+
1. Create a free account at <https://huggingface.co>.
|
|
157
|
+
2. Go to **Settings → Access Tokens** and generate a token.
|
|
158
|
+
3. Set the token before running asub:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# Linux / macOS
|
|
162
|
+
export HF_TOKEN="hf_your_token_here"
|
|
163
|
+
|
|
164
|
+
# Windows PowerShell
|
|
165
|
+
$env:HF_TOKEN = "hf_your_token_here"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
To make this permanent, add the variable to your shell profile or set it via
|
|
169
|
+
**System → Environment Variables** on Windows.
|
|
170
|
+
|
|
171
|
+
## Available models
|
|
172
|
+
|
|
173
|
+
| Model | Parameters | Relative speed | VRAM |
|
|
174
|
+
| ---------------- | ---------- | -------------- | ------ |
|
|
175
|
+
| `tiny` | 39 M | ~10× | ~1 GB |
|
|
176
|
+
| `base` | 74 M | ~7× | ~1 GB |
|
|
177
|
+
| `small` | 244 M | ~4× | ~2 GB |
|
|
178
|
+
| `medium` | 769 M | ~2× | ~5 GB |
|
|
179
|
+
| `large-v3` | 1550 M | 1× | ~10 GB |
|
|
180
|
+
| `turbo` | 809 M | ~8× | ~6 GB |
|
|
181
|
+
| `distil-large-v3`| 756 M | ~6× | ~6 GB |
|
|
182
|
+
|
|
183
|
+
### Choosing the right model
|
|
184
|
+
|
|
185
|
+
Not every model is the best choice for every situation. Here's a breakdown to
|
|
186
|
+
help you pick:
|
|
187
|
+
|
|
188
|
+
- **`tiny`** — Fastest model by far. Good for quick previews or testing your
|
|
189
|
+
pipeline. Accuracy is noticeably lower, especially on non-English audio or
|
|
190
|
+
noisy recordings. Use it when speed matters more than quality.
|
|
191
|
+
- **`base`** — A small step up from `tiny`. Slightly more accurate, still very
|
|
192
|
+
fast. Suitable for clear speech in common languages.
|
|
193
|
+
- **`small`** — A solid mid-range option. Handles most languages well and runs
|
|
194
|
+
comfortably on CPU. Good balance for everyday use when you don't have a GPU.
|
|
195
|
+
- **`medium`** — The default. Significantly more accurate than `small`,
|
|
196
|
+
especially for accented speech, niche languages, and overlapping speakers.
|
|
197
|
+
Slower on CPU, but a great choice with a GPU.
|
|
198
|
+
- **`large-v3`** — The most accurate model. Best for professional-quality
|
|
199
|
+
subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
|
|
200
|
+
with at least 10 GB VRAM for practical use.
|
|
201
|
+
- **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
|
|
202
|
+
best "quality per second" option if you have a GPU with ≥6 GB VRAM.
|
|
203
|
+
- **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
|
|
204
|
+
on English, slightly worse on other languages. Fast and memory-efficient.
|
|
205
|
+
Best for English-heavy workloads on a GPU.
|
|
206
|
+
|
|
207
|
+
### Recommended commands
|
|
208
|
+
|
|
209
|
+
**Fastest result** — use `tiny` when you just need a rough draft quickly:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
asub video.mp4 -m tiny
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
**Best result** — use `large-v3` (GPU required) for maximum accuracy:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
asub video.mp4 -m large-v3
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
|
|
222
|
+
or `small` on CPU for a good quality-to-speed ratio:
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
# With a CUDA GPU (recommended)
|
|
226
|
+
asub video.mp4 -m turbo
|
|
227
|
+
|
|
228
|
+
# CPU only
|
|
229
|
+
asub video.mp4 -m small
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
> **Tip:** The device and compute type are auto-detected. If you have a CUDA
|
|
233
|
+
> GPU, asub will use it with `float16` automatically. On CPU it falls back
|
|
234
|
+
> to `int8` quantisation.
|
|
235
|
+
|
|
236
|
+
## Upgrading dependencies
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
pip install --upgrade faster-whisper deep-translator
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Contributing
|
|
243
|
+
|
|
244
|
+
1. Fork the repo and create a feature branch.
|
|
245
|
+
2. Install dev dependencies: `pip install -e ".[dev]"`
|
|
246
|
+
3. Run tests: `python -m pytest`
|
|
247
|
+
4. Lint: `ruff check src/ tests/`
|
|
248
|
+
5. Open a pull request.
|
|
249
|
+
|
|
250
|
+
## License
|
|
251
|
+
|
|
252
|
+
[MIT](LICENSE)
|
|
253
|
+
|
|
254
|
+
## Acknowledgements
|
|
255
|
+
|
|
256
|
+
Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.
|
asub-1.0.0/README.md
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# asub
|
|
2
|
+
|
|
3
|
+
Generate and translate subtitles from any audio or video file — powered by
|
|
4
|
+
[faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
|
|
5
|
+
[deep-translator](https://github.com/nidhaloff/deep-translator).
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
|
|
10
|
+
accuracy, using CTranslate2.
|
|
11
|
+
- **Automatic language detection** — or specify the source language manually.
|
|
12
|
+
- **Translation** — translate subtitles to 100+ languages via Google Translate
|
|
13
|
+
(free, no API key).
|
|
14
|
+
- **Multiple output formats** — SRT and WebVTT.
|
|
15
|
+
- **VAD filtering** — Silero VAD removes silence and reduces hallucination.
|
|
16
|
+
- **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
|
|
17
|
+
(slow, most accurate).
|
|
18
|
+
- **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
|
|
19
|
+
- **Packagable as .exe** — single-file Windows executable via PyInstaller.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
### From source (recommended for development)
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/simoneraffaelli/subtitle-generator.git
|
|
27
|
+
cd subtitle-generator
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### From PyPI (once published)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install asub
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick start
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Transcribe a video and generate subtitles (auto-detect language)
|
|
41
|
+
asub video.mp4
|
|
42
|
+
|
|
43
|
+
# Use a specific model and output format
|
|
44
|
+
asub video.mp4 -m large-v3 -f vtt
|
|
45
|
+
|
|
46
|
+
# Transcribe and translate to Italian
|
|
47
|
+
asub video.mp4 -t it
|
|
48
|
+
|
|
49
|
+
# Specify source language, translate to German, verbose output
|
|
50
|
+
asub podcast.mp3 -l en -t de -v
|
|
51
|
+
|
|
52
|
+
# Use CPU with int8 quantisation
|
|
53
|
+
asub interview.wav --device cpu --compute-type int8
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## CLI reference
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
|
|
60
|
+
[--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
|
|
61
|
+
[--list-languages]
|
|
62
|
+
input
|
|
63
|
+
|
|
64
|
+
positional arguments:
|
|
65
|
+
input Path to an audio or video file.
|
|
66
|
+
|
|
67
|
+
options:
|
|
68
|
+
-o, --output Output subtitle file path (default: <input>.srt)
|
|
69
|
+
-f, --format Subtitle format: srt, vtt
|
|
70
|
+
-v, --verbose Increase verbosity (-v INFO, -vv DEBUG)
|
|
71
|
+
--version Show version and exit
|
|
72
|
+
--list-languages Print supported translation languages and exit
|
|
73
|
+
|
|
74
|
+
transcription:
|
|
75
|
+
-m, --model Whisper model size (default: medium)
|
|
76
|
+
--device auto | cpu | cuda (default: auto)
|
|
77
|
+
--compute-type Quantisation type (auto-selected if omitted)
|
|
78
|
+
-l, --language Source language code (auto-detected if omitted)
|
|
79
|
+
--no-vad Disable Voice Activity Detection
|
|
80
|
+
|
|
81
|
+
translation:
|
|
82
|
+
-t, --translate LANG Translate subtitles to this language code
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Python API
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from asub.transcriber import load_model, transcribe
|
|
89
|
+
from asub.translator import translate_segments
|
|
90
|
+
from asub.subtitle import write_subtitle_file, SubtitleFormat
|
|
91
|
+
|
|
92
|
+
# 1. Transcribe
|
|
93
|
+
model = load_model("medium", device="auto")
|
|
94
|
+
result = transcribe(model, "video.mp4")
|
|
95
|
+
|
|
96
|
+
# 2. Translate (optional)
|
|
97
|
+
translated = translate_segments(result.segments, source=result.language, target="it")
|
|
98
|
+
|
|
99
|
+
# 3. Write subtitle file
|
|
100
|
+
write_subtitle_file(translated, "video_it.srt")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Building a Windows .exe
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install ".[dev]"
|
|
107
|
+
pyinstaller asub.spec
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The executable will be in `dist/asub.exe`.
|
|
111
|
+
|
|
112
|
+
> **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
|
|
113
|
+
> on first run and cached in the default Hugging Face cache directory.
|
|
114
|
+
|
|
115
|
+
## Hugging Face token (optional)
|
|
116
|
+
|
|
117
|
+
On first run, Whisper model weights are downloaded from the Hugging Face Hub.
|
|
118
|
+
Without authentication you may see this warning:
|
|
119
|
+
|
|
120
|
+
> You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
|
|
121
|
+
> to enable higher rate limits and faster downloads
|
|
122
|
+
|
|
123
|
+
This is **not an error** — the download still works, just at lower rate limits.
|
|
124
|
+
To silence the warning and get faster downloads:
|
|
125
|
+
|
|
126
|
+
1. Create a free account at <https://huggingface.co>.
|
|
127
|
+
2. Go to **Settings → Access Tokens** and generate a token.
|
|
128
|
+
3. Set the token before running asub:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# Linux / macOS
|
|
132
|
+
export HF_TOKEN="hf_your_token_here"
|
|
133
|
+
|
|
134
|
+
# Windows PowerShell
|
|
135
|
+
$env:HF_TOKEN = "hf_your_token_here"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
To make this permanent, add the variable to your shell profile or set it via
|
|
139
|
+
**System → Environment Variables** on Windows.
|
|
140
|
+
|
|
141
|
+
## Available models
|
|
142
|
+
|
|
143
|
+
| Model | Parameters | Relative speed | VRAM |
|
|
144
|
+
| ---------------- | ---------- | -------------- | ------ |
|
|
145
|
+
| `tiny` | 39 M | ~10× | ~1 GB |
|
|
146
|
+
| `base` | 74 M | ~7× | ~1 GB |
|
|
147
|
+
| `small` | 244 M | ~4× | ~2 GB |
|
|
148
|
+
| `medium` | 769 M | ~2× | ~5 GB |
|
|
149
|
+
| `large-v3` | 1550 M | 1× | ~10 GB |
|
|
150
|
+
| `turbo` | 809 M | ~8× | ~6 GB |
|
|
151
|
+
| `distil-large-v3`| 756 M | ~6× | ~6 GB |
|
|
152
|
+
|
|
153
|
+
### Choosing the right model
|
|
154
|
+
|
|
155
|
+
Not every model is the best choice for every situation. Here's a breakdown to
|
|
156
|
+
help you pick:
|
|
157
|
+
|
|
158
|
+
- **`tiny`** — Fastest model by far. Good for quick previews or testing your
|
|
159
|
+
pipeline. Accuracy is noticeably lower, especially on non-English audio or
|
|
160
|
+
noisy recordings. Use it when speed matters more than quality.
|
|
161
|
+
- **`base`** — A small step up from `tiny`. Slightly more accurate, still very
|
|
162
|
+
fast. Suitable for clear speech in common languages.
|
|
163
|
+
- **`small`** — A solid mid-range option. Handles most languages well and runs
|
|
164
|
+
comfortably on CPU. Good balance for everyday use when you don't have a GPU.
|
|
165
|
+
- **`medium`** — The default. Significantly more accurate than `small`,
|
|
166
|
+
especially for accented speech, niche languages, and overlapping speakers.
|
|
167
|
+
Slower on CPU, but a great choice with a GPU.
|
|
168
|
+
- **`large-v3`** — The most accurate model. Best for professional-quality
|
|
169
|
+
subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
|
|
170
|
+
with at least 10 GB VRAM for practical use.
|
|
171
|
+
- **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
|
|
172
|
+
best "quality per second" option if you have a GPU with ≥6 GB VRAM.
|
|
173
|
+
- **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
|
|
174
|
+
on English, slightly worse on other languages. Fast and memory-efficient.
|
|
175
|
+
Best for English-heavy workloads on a GPU.
|
|
176
|
+
|
|
177
|
+
### Recommended commands
|
|
178
|
+
|
|
179
|
+
**Fastest result** — use `tiny` when you just need a rough draft quickly:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
asub video.mp4 -m tiny
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Best result** — use `large-v3` (GPU required) for maximum accuracy:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
asub video.mp4 -m large-v3
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
|
|
192
|
+
or `small` on CPU for a good quality-to-speed ratio:
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
# With a CUDA GPU (recommended)
|
|
196
|
+
asub video.mp4 -m turbo
|
|
197
|
+
|
|
198
|
+
# CPU only
|
|
199
|
+
asub video.mp4 -m small
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
> **Tip:** The device and compute type are auto-detected. If you have a CUDA
|
|
203
|
+
> GPU, asub will use it with `float16` automatically. On CPU it falls back
|
|
204
|
+
> to `int8` quantisation.
|
|
205
|
+
|
|
206
|
+
## Upgrading dependencies
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
pip install --upgrade faster-whisper deep-translator
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Contributing
|
|
213
|
+
|
|
214
|
+
1. Fork the repo and create a feature branch.
|
|
215
|
+
2. Install dev dependencies: `pip install -e ".[dev]"`
|
|
216
|
+
3. Run tests: `python -m pytest`
|
|
217
|
+
4. Lint: `ruff check src/ tests/`
|
|
218
|
+
5. Open a pull request.
|
|
219
|
+
|
|
220
|
+
## License
|
|
221
|
+
|
|
222
|
+
[MIT](LICENSE)
|
|
223
|
+
|
|
224
|
+
## Acknowledgements
|
|
225
|
+
|
|
226
|
+
Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "asub"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Generate and translate subtitles from audio/video files using Whisper."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "asub contributors" }]
|
|
13
|
+
keywords = ["subtitles", "whisper", "transcription", "translation", "srt", "vtt"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: End Users/Desktop",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
23
|
+
"Topic :: Multimedia :: Video",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"faster-whisper>=1.0.0,<2.0.0",
|
|
27
|
+
"deep-translator>=1.11.0,<2.0.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"pytest>=7.0",
|
|
33
|
+
"ruff>=0.4.0",
|
|
34
|
+
"pyinstaller>=6.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
asub = "asub.cli:main"
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Homepage = "https://github.com/simoneraffaelli/subtitle-generator"
|
|
42
|
+
Repository = "https://github.com/simoneraffaelli/subtitle-generator"
|
|
43
|
+
Issues = "https://github.com/simoneraffaelli/subtitle-generator/issues"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["src"]
|
|
47
|
+
|
|
48
|
+
[tool.ruff]
|
|
49
|
+
target-version = "py310"
|
|
50
|
+
line-length = 100
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "SIM", "RUF"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
testpaths = ["tests"]
|
asub-1.0.0/setup.cfg
ADDED