asub 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
asub-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 asub contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
asub-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,256 @@
1
+ Metadata-Version: 2.4
2
+ Name: asub
3
+ Version: 1.0.0
4
+ Summary: Generate and translate subtitles from audio/video files using Whisper.
5
+ Author: asub contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/simoneraffaelli/subtitle-generator
8
+ Project-URL: Repository, https://github.com/simoneraffaelli/subtitle-generator
9
+ Project-URL: Issues, https://github.com/simoneraffaelli/subtitle-generator/issues
10
+ Keywords: subtitles,whisper,transcription,translation,srt,vtt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
19
+ Classifier: Topic :: Multimedia :: Video
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: faster-whisper<2.0.0,>=1.0.0
24
+ Requires-Dist: deep-translator<2.0.0,>=1.11.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
28
+ Requires-Dist: pyinstaller>=6.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # asub
32
+
33
+ Generate and translate subtitles from any audio or video file — powered by
34
+ [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
35
+ [deep-translator](https://github.com/nidhaloff/deep-translator).
36
+
37
+ ## Features
38
+
39
+ - **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
40
+ accuracy, using CTranslate2.
41
+ - **Automatic language detection** — or specify the source language manually.
42
+ - **Translation** — translate subtitles to 100+ languages via Google Translate
43
+ (free, no API key).
44
+ - **Multiple output formats** — SRT and WebVTT.
45
+ - **VAD filtering** — Silero VAD removes silence and reduces hallucination.
46
+ - **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
47
+ (slow, most accurate).
48
+ - **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
49
+ - **Packagable as .exe** — single-file Windows executable via PyInstaller.
50
+
51
+ ## Installation
52
+
53
+ ### From source (recommended for development)
54
+
55
+ ```bash
56
+ git clone https://github.com/simoneraffaelli/subtitle-generator.git
57
+ cd subtitle-generator
58
+ pip install -e ".[dev]"
59
+ ```
60
+
61
+ ### From PyPI (once published)
62
+
63
+ ```bash
64
+ pip install asub
65
+ ```
66
+
67
+ ## Quick start
68
+
69
+ ```bash
70
+ # Transcribe a video and generate subtitles (auto-detect language)
71
+ asub video.mp4
72
+
73
+ # Use a specific model and output format
74
+ asub video.mp4 -m large-v3 -f vtt
75
+
76
+ # Transcribe and translate to Italian
77
+ asub video.mp4 -t it
78
+
79
+ # Specify source language, translate to German, verbose output
80
+ asub podcast.mp3 -l en -t de -v
81
+
82
+ # Use CPU with int8 quantisation
83
+ asub interview.wav --device cpu --compute-type int8
84
+ ```
85
+
86
+ ## CLI reference
87
+
88
+ ```
89
+ usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
90
+ [--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
91
+ [--list-languages]
92
+ input
93
+
94
+ positional arguments:
95
+ input Path to an audio or video file.
96
+
97
+ options:
98
+ -o, --output Output subtitle file path (default: <input>.srt)
99
+ -f, --format Subtitle format: srt, vtt
100
+ -v, --verbose Increase verbosity (-v INFO, -vv DEBUG)
101
+ --version Show version and exit
102
+ --list-languages Print supported translation languages and exit
103
+
104
+ transcription:
105
+ -m, --model Whisper model size (default: medium)
106
+ --device auto | cpu | cuda (default: auto)
107
+ --compute-type Quantisation type (auto-selected if omitted)
108
+ -l, --language Source language code (auto-detected if omitted)
109
+ --no-vad Disable Voice Activity Detection
110
+
111
+ translation:
112
+ -t, --translate LANG Translate subtitles to this language code
113
+ ```
114
+
115
+ ## Python API
116
+
117
+ ```python
118
+ from asub.transcriber import load_model, transcribe
119
+ from asub.translator import translate_segments
120
+ from asub.subtitle import write_subtitle_file, SubtitleFormat
121
+
122
+ # 1. Transcribe
123
+ model = load_model("medium", device="auto")
124
+ result = transcribe(model, "video.mp4")
125
+
126
+ # 2. Translate (optional)
127
+ translated = translate_segments(result.segments, source=result.language, target="it")
128
+
129
+ # 3. Write subtitle file
130
+ write_subtitle_file(translated, "video_it.srt")
131
+ ```
132
+
133
+ ## Building a Windows .exe
134
+
135
+ ```bash
136
+ pip install ".[dev]"
137
+ pyinstaller asub.spec
138
+ ```
139
+
140
+ The executable will be in `dist/asub.exe`.
141
+
142
+ > **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
143
+ > on first run and cached in the default Hugging Face cache directory.
144
+
145
+ ## Hugging Face token (optional)
146
+
147
+ On first run, Whisper model weights are downloaded from the Hugging Face Hub.
148
+ Without authentication you may see this warning:
149
+
150
+ > You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
151
+ > to enable higher rate limits and faster downloads
152
+
153
+ This is **not an error** — the download still works, just at lower rate limits.
154
+ To silence the warning and get faster downloads:
155
+
156
+ 1. Create a free account at <https://huggingface.co>.
157
+ 2. Go to **Settings → Access Tokens** and generate a token.
158
+ 3. Set the token before running asub:
159
+
160
+ ```bash
161
+ # Linux / macOS
162
+ export HF_TOKEN="hf_your_token_here"
163
+
164
+ # Windows PowerShell
165
+ $env:HF_TOKEN = "hf_your_token_here"
166
+ ```
167
+
168
+ To make this permanent, add the variable to your shell profile or set it via
169
+ **System → Environment Variables** on Windows.
170
+
171
+ ## Available models
172
+
173
+ | Model | Parameters | Relative speed | VRAM |
174
+ | ---------------- | ---------- | -------------- | ------ |
175
+ | `tiny` | 39 M | ~10× | ~1 GB |
176
+ | `base` | 74 M | ~7× | ~1 GB |
177
+ | `small` | 244 M | ~4× | ~2 GB |
178
+ | `medium` | 769 M | ~2× | ~5 GB |
179
+ | `large-v3` | 1550 M | 1× | ~10 GB |
180
+ | `turbo` | 809 M | ~8× | ~6 GB |
181
+ | `distil-large-v3`| 756 M | ~6× | ~6 GB |
182
+
183
+ ### Choosing the right model
184
+
185
+ Not every model is the best choice for every situation. Here's a breakdown to
186
+ help you pick:
187
+
188
+ - **`tiny`** — Fastest model by far. Good for quick previews or testing your
189
+ pipeline. Accuracy is noticeably lower, especially on non-English audio or
190
+ noisy recordings. Use it when speed matters more than quality.
191
+ - **`base`** — A small step up from `tiny`. Slightly more accurate, still very
192
+ fast. Suitable for clear speech in common languages.
193
+ - **`small`** — A solid mid-range option. Handles most languages well and runs
194
+ comfortably on CPU. Good balance for everyday use when you don't have a GPU.
195
+ - **`medium`** — The default. Significantly more accurate than `small`,
196
+ especially for accented speech, niche languages, and overlapping speakers.
197
+ Slower on CPU, but a great choice with a GPU.
198
+ - **`large-v3`** — The most accurate model. Best for professional-quality
199
+ subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
200
+ with at least 10 GB VRAM for practical use.
201
+ - **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
202
+ best "quality per second" option if you have a GPU with ≥6 GB VRAM.
203
+ - **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
204
+ on English, slightly worse on other languages. Fast and memory-efficient.
205
+ Best for English-heavy workloads on a GPU.
206
+
207
+ ### Recommended commands
208
+
209
+ **Fastest result** — use `tiny` when you just need a rough draft quickly:
210
+
211
+ ```bash
212
+ asub video.mp4 -m tiny
213
+ ```
214
+
215
+ **Best result** — use `large-v3` (GPU required) for maximum accuracy:
216
+
217
+ ```bash
218
+ asub video.mp4 -m large-v3
219
+ ```
220
+
221
+ **Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
222
+ or `small` on CPU for a good quality-to-speed ratio:
223
+
224
+ ```bash
225
+ # With a CUDA GPU (recommended)
226
+ asub video.mp4 -m turbo
227
+
228
+ # CPU only
229
+ asub video.mp4 -m small
230
+ ```
231
+
232
+ > **Tip:** The device and compute type are auto-detected. If you have a CUDA
233
+ > GPU, asub will use it with `float16` automatically. On CPU it falls back
234
+ > to `int8` quantisation.
235
+
236
+ ## Upgrading dependencies
237
+
238
+ ```bash
239
+ pip install --upgrade faster-whisper deep-translator
240
+ ```
241
+
242
+ ## Contributing
243
+
244
+ 1. Fork the repo and create a feature branch.
245
+ 2. Install dev dependencies: `pip install -e ".[dev]"`
246
+ 3. Run tests: `python -m pytest`
247
+ 4. Lint: `ruff check src/ tests/`
248
+ 5. Open a pull request.
249
+
250
+ ## License
251
+
252
+ [MIT](LICENSE)
253
+
254
+ ## Acknowledgements
255
+
256
+ Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.
asub-1.0.0/README.md ADDED
@@ -0,0 +1,226 @@
1
+ # asub
2
+
3
+ Generate and translate subtitles from any audio or video file — powered by
4
+ [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and
5
+ [deep-translator](https://github.com/nidhaloff/deep-translator).
6
+
7
+ ## Features
8
+
9
+ - **Fast transcription** — up to 4× faster than OpenAI Whisper with the same
10
+ accuracy, using CTranslate2.
11
+ - **Automatic language detection** — or specify the source language manually.
12
+ - **Translation** — translate subtitles to 100+ languages via Google Translate
13
+ (free, no API key).
14
+ - **Multiple output formats** — SRT and WebVTT.
15
+ - **VAD filtering** — Silero VAD removes silence and reduces hallucination.
16
+ - **Model choice** — from `tiny` (fast, less accurate) to `large-v3`
17
+ (slow, most accurate).
18
+ - **CPU & GPU** — works on both, with int8 quantisation for low-memory setups.
19
+ - **Packagable as .exe** — single-file Windows executable via PyInstaller.
20
+
21
+ ## Installation
22
+
23
+ ### From source (recommended for development)
24
+
25
+ ```bash
26
+ git clone https://github.com/simoneraffaelli/subtitle-generator.git
27
+ cd subtitle-generator
28
+ pip install -e ".[dev]"
29
+ ```
30
+
31
+ ### From PyPI (once published)
32
+
33
+ ```bash
34
+ pip install asub
35
+ ```
36
+
37
+ ## Quick start
38
+
39
+ ```bash
40
+ # Transcribe a video and generate subtitles (auto-detect language)
41
+ asub video.mp4
42
+
43
+ # Use a specific model and output format
44
+ asub video.mp4 -m large-v3 -f vtt
45
+
46
+ # Transcribe and translate to Italian
47
+ asub video.mp4 -t it
48
+
49
+ # Specify source language, translate to German, verbose output
50
+ asub podcast.mp3 -l en -t de -v
51
+
52
+ # Use CPU with int8 quantisation
53
+ asub interview.wav --device cpu --compute-type int8
54
+ ```
55
+
56
+ ## CLI reference
57
+
58
+ ```
59
+ usage: asub [-h] [-o OUTPUT] [-f {srt,vtt}] [-m MODEL] [--device {auto,cpu,cuda}]
60
+ [--compute-type TYPE] [-l LANG] [--no-vad] [-t LANG] [-v] [--version]
61
+ [--list-languages]
62
+ input
63
+
64
+ positional arguments:
65
+ input Path to an audio or video file.
66
+
67
+ options:
68
+ -o, --output Output subtitle file path (default: <input>.srt)
69
+ -f, --format Subtitle format: srt, vtt
70
+ -v, --verbose Increase verbosity (-v INFO, -vv DEBUG)
71
+ --version Show version and exit
72
+ --list-languages Print supported translation languages and exit
73
+
74
+ transcription:
75
+ -m, --model Whisper model size (default: medium)
76
+ --device auto | cpu | cuda (default: auto)
77
+ --compute-type Quantisation type (auto-selected if omitted)
78
+ -l, --language Source language code (auto-detected if omitted)
79
+ --no-vad Disable Voice Activity Detection
80
+
81
+ translation:
82
+ -t, --translate LANG Translate subtitles to this language code
83
+ ```
84
+
85
+ ## Python API
86
+
87
+ ```python
88
+ from asub.transcriber import load_model, transcribe
89
+ from asub.translator import translate_segments
90
+ from asub.subtitle import write_subtitle_file, SubtitleFormat
91
+
92
+ # 1. Transcribe
93
+ model = load_model("medium", device="auto")
94
+ result = transcribe(model, "video.mp4")
95
+
96
+ # 2. Translate (optional)
97
+ translated = translate_segments(result.segments, source=result.language, target="it")
98
+
99
+ # 3. Write subtitle file
100
+ write_subtitle_file(translated, "video_it.srt")
101
+ ```
102
+
103
+ ## Building a Windows .exe
104
+
105
+ ```bash
106
+ pip install ".[dev]"
107
+ pyinstaller asub.spec
108
+ ```
109
+
110
+ The executable will be in `dist/asub.exe`.
111
+
112
+ > **Note:** The .exe does not bundle Whisper model weights. Models are downloaded
113
+ > on first run and cached in the default Hugging Face cache directory.
114
+
115
+ ## Hugging Face token (optional)
116
+
117
+ On first run, Whisper model weights are downloaded from the Hugging Face Hub.
118
+ Without authentication you may see this warning:
119
+
120
+ > You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN
121
+ > to enable higher rate limits and faster downloads
122
+
123
+ This is **not an error** — the download still works, just at lower rate limits.
124
+ To silence the warning and get faster downloads:
125
+
126
+ 1. Create a free account at <https://huggingface.co>.
127
+ 2. Go to **Settings → Access Tokens** and generate a token.
128
+ 3. Set the token before running asub:
129
+
130
+ ```bash
131
+ # Linux / macOS
132
+ export HF_TOKEN="hf_your_token_here"
133
+
134
+ # Windows PowerShell
135
+ $env:HF_TOKEN = "hf_your_token_here"
136
+ ```
137
+
138
+ To make this permanent, add the variable to your shell profile or set it via
139
+ **System → Environment Variables** on Windows.
140
+
141
+ ## Available models
142
+
143
+ | Model | Parameters | Relative speed | VRAM |
144
+ | ---------------- | ---------- | -------------- | ------ |
145
+ | `tiny` | 39 M | ~10× | ~1 GB |
146
+ | `base` | 74 M | ~7× | ~1 GB |
147
+ | `small` | 244 M | ~4× | ~2 GB |
148
+ | `medium` | 769 M | ~2× | ~5 GB |
149
+ | `large-v3` | 1550 M | 1× | ~10 GB |
150
+ | `turbo` | 809 M | ~8× | ~6 GB |
151
+ | `distil-large-v3`| 756 M | ~6× | ~6 GB |
152
+
153
+ ### Choosing the right model
154
+
155
+ Not every model is the best choice for every situation. Here's a breakdown to
156
+ help you pick:
157
+
158
+ - **`tiny`** — Fastest model by far. Good for quick previews or testing your
159
+ pipeline. Accuracy is noticeably lower, especially on non-English audio or
160
+ noisy recordings. Use it when speed matters more than quality.
161
+ - **`base`** — A small step up from `tiny`. Slightly more accurate, still very
162
+ fast. Suitable for clear speech in common languages.
163
+ - **`small`** — A solid mid-range option. Handles most languages well and runs
164
+ comfortably on CPU. Good balance for everyday use when you don't have a GPU.
165
+ - **`medium`** — The default. Significantly more accurate than `small`,
166
+ especially for accented speech, niche languages, and overlapping speakers.
167
+ Slower on CPU, but a great choice with a GPU.
168
+ - **`large-v3`** — The most accurate model. Best for professional-quality
169
+ subtitles, rare languages, or heavily accented audio. Requires a CUDA GPU
170
+ with at least 10 GB VRAM for practical use.
171
+ - **`turbo`** — Near `large-v3` accuracy at roughly 8× the speed. This is the
172
+ best "quality per second" option if you have a GPU with ≥6 GB VRAM.
173
+ - **`distil-large-v3`** — A distilled version of `large-v3`. Similar accuracy
174
+ on English, slightly worse on other languages. Fast and memory-efficient.
175
+ Best for English-heavy workloads on a GPU.
176
+
177
+ ### Recommended commands
178
+
179
+ **Fastest result** — use `tiny` when you just need a rough draft quickly:
180
+
181
+ ```bash
182
+ asub video.mp4 -m tiny
183
+ ```
184
+
185
+ **Best result** — use `large-v3` (GPU required) for maximum accuracy:
186
+
187
+ ```bash
188
+ asub video.mp4 -m large-v3
189
+ ```
190
+
191
+ **Best compromise** — use `turbo` on GPU for near-best accuracy at high speed,
192
+ or `small` on CPU for a good quality-to-speed ratio:
193
+
194
+ ```bash
195
+ # With a CUDA GPU (recommended)
196
+ asub video.mp4 -m turbo
197
+
198
+ # CPU only
199
+ asub video.mp4 -m small
200
+ ```
201
+
202
+ > **Tip:** The device and compute type are auto-detected. If you have a CUDA
203
+ > GPU, asub will use it with `float16` automatically. On CPU it falls back
204
+ > to `int8` quantisation.
205
+
206
+ ## Upgrading dependencies
207
+
208
+ ```bash
209
+ pip install --upgrade faster-whisper deep-translator
210
+ ```
211
+
212
+ ## Contributing
213
+
214
+ 1. Fork the repo and create a feature branch.
215
+ 2. Install dev dependencies: `pip install -e ".[dev]"`
216
+ 3. Run tests: `python -m pytest`
217
+ 4. Lint: `ruff check src/ tests/`
218
+ 5. Open a pull request.
219
+
220
+ ## License
221
+
222
+ [MIT](LICENSE)
223
+
224
+ ## Acknowledgements
225
+
226
+ Built with the great help of [Claude Opus 4.6](https://www.anthropic.com/) by Anthropic.
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "asub"
7
+ version = "1.0.0"
8
+ description = "Generate and translate subtitles from audio/video files using Whisper."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "asub contributors" }]
13
+ keywords = ["subtitles", "whisper", "transcription", "translation", "srt", "vtt"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: End Users/Desktop",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
23
+ "Topic :: Multimedia :: Video",
24
+ ]
25
+ dependencies = [
26
+ "faster-whisper>=1.0.0,<2.0.0",
27
+ "deep-translator>=1.11.0,<2.0.0",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "pytest>=7.0",
33
+ "ruff>=0.4.0",
34
+ "pyinstaller>=6.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ asub = "asub.cli:main"
39
+
40
+ [project.urls]
41
+ Homepage = "https://github.com/simoneraffaelli/subtitle-generator"
42
+ Repository = "https://github.com/simoneraffaelli/subtitle-generator"
43
+ Issues = "https://github.com/simoneraffaelli/subtitle-generator/issues"
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["src"]
47
+
48
+ [tool.ruff]
49
+ target-version = "py310"
50
+ line-length = 100
51
+
52
+ [tool.ruff.lint]
53
+ select = ["E", "F", "W", "I", "N", "UP", "B", "SIM", "RUF"]
54
+
55
+ [tool.pytest.ini_options]
56
+ testpaths = ["tests"]
asub-1.0.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """asub — generate and translate subtitles from audio/video files."""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,7 @@
1
+ """Allow running asub as ``python -m asub``."""
2
+
3
+ import sys
4
+
5
+ from asub.cli import main
6
+
7
+ sys.exit(main())