fscript 0.2.5__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __all__ = ["main"]
@@ -0,0 +1,4 @@
1
+ from ._cli import main
2
+
3
+
4
+ raise SystemExit(main())
@@ -0,0 +1,35 @@
1
+ import os
2
+ import stat
3
+ import subprocess
4
+ import sys
5
+ from importlib.resources import files
6
+ from pathlib import Path
7
+
8
+
9
+ def bundled_binary_name() -> str:
10
+ return "fscript.exe" if os.name == "nt" else "fscript"
11
+
12
+
13
+ def bundled_binary_path() -> Path:
14
+ return Path(files("fscript").joinpath("bin", bundled_binary_name()))
15
+
16
+
17
+ def ensure_executable(path: Path) -> None:
18
+ mode = path.stat().st_mode
19
+ if mode & stat.S_IXUSR:
20
+ return
21
+ path.chmod(mode | stat.S_IXUSR)
22
+
23
+
24
+ def main() -> int:
25
+ binary = bundled_binary_path()
26
+ if not binary.exists():
27
+ print(
28
+ f"fscript wheel is missing its bundled binary: {binary}",
29
+ file=sys.stderr,
30
+ )
31
+ return 1
32
+
33
+ ensure_executable(binary)
34
+ completed = subprocess.run([str(binary), *sys.argv[1:]], check=False)
35
+ return completed.returncode
@@ -0,0 +1 @@
1
+ # Packaged binary container for platform wheels.
@@ -0,0 +1,247 @@
1
+ Metadata-Version: 2.4
2
+ Name: fscript
3
+ Version: 0.2.5
4
+ Summary: Fast local transcription for large lectures with NVIDIA Parakeet ONNX
5
+ Home-page: https://github.com/brenorb/fast-transcript
6
+ Author: Breno Brito
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/brenorb/fast-transcript
9
+ Project-URL: Repository, https://github.com/brenorb/fast-transcript
10
+ Project-URL: Issues, https://github.com/brenorb/fast-transcript/issues
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Rust
18
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: author
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: project-url
31
+ Dynamic: requires-python
32
+ Dynamic: summary
33
+
34
+ # fast-transcript
35
+
36
+ **`fast-transcript` is a local lecture transcription CLI built to beat the usual Apple Silicon tradeoff: either fast but flaky, or accurate but painfully slow.**
37
+
38
+ On the development machine, this project handled **30 minutes in 2!*** while staying around **2.51 GB RSS** on the long run. In the same local test set, it beat **`mlx-whisper`**, **`insanely-fast-whisper`**, and **`parakeet-mlx`**.
39
+
40
+ <sub>* Benchmark run on a MacBook Pro M1. The exact long-run measurement was **29m47s** of Portuguese lecture audio transcribed in about **2m14s** (**13.38x real-time**).</sub>
41
+
42
+ The CLI binary is called **`fscript`**:
43
+
44
+ ```bash
45
+ fscript lecture.mp3
46
+ ```
47
+
48
+ That is the whole point of this project. One command. Large audio. No babysitting.
49
+
50
+ ## Why this exists
51
+
52
+ I wanted a tool for **transcribing long classes and lectures quickly on a laptop while still using the computer for normal work**.
53
+
54
+ The existing options I tested had clear problems for this use case:
55
+
56
+ - **`insanely-fast-whisper`** was far too slow on this Mac once it fell back to CPU
57
+ - **`mlx-whisper`** was solid, but slower than I wanted for long lecture workflows
58
+ - **`parakeet-mlx`** had excellent memory numbers, but drifted into English on longer Portuguese segments unless heavily tuned
59
+
60
+ `fast-transcript` packages the ONNX Parakeet path that held up best in practice.
61
+
62
+ ## What it does
63
+
64
+ - downloads the default **Parakeet TDT 0.6B v3 int8** model automatically if it is missing
65
+ - stores the extracted model in a persistent per-user application data directory
66
+ - keeps the downloaded tarball in the user cache directory
67
+ - accepts `mp3`, `wav`, and other audio formats supported by `ffmpeg`
68
+ - auto-converts unsupported audio to **16 kHz mono PCM16 WAV**
69
+ - uses **120s chunks** with **2s overlap** by default
70
+ - writes `<audio>.transcript.json` next to the input unless you choose a different output path
71
+
72
+ ## Install
73
+
74
+ ### Requirements
75
+
76
+ - `ffmpeg`
77
+ - `ffprobe`
78
+
79
+ ### Install with Homebrew
80
+
81
+ ```bash
82
+ brew install brenorb/fast-transcript/fast-transcript
83
+ ```
84
+
85
+ On Apple Silicon macOS, Homebrew now installs `fast-transcript` from a proper bottle.
86
+ On Linux x86_64, the formula still installs from the published release binary.
87
+
88
+ ### PyPI / uv
89
+
90
+ The PyPI package name for this project is **`fscript`** so the target UX is:
91
+
92
+ ```bash
93
+ uvx fscript lecture.mp3
94
+ uv tool install fscript
95
+ ```
96
+
97
+ The repo already includes platform wheel builds for:
98
+
99
+ - macOS arm64
100
+ - Linux x86_64
101
+
102
+ PyPI publishing is currently enabled for:
103
+
104
+ - macOS arm64
105
+
106
+ The remaining step is registering the PyPI project/publisher on the PyPI side.
107
+ See [`docs/pypi-publishing.md`](./docs/pypi-publishing.md) for the exact fields.
108
+
109
+ ### Install a prebuilt binary directly
110
+
111
+ Download the archive for your platform from the [GitHub Releases page](https://github.com/brenorb/fast-transcript/releases), then put `fscript` on your `PATH`.
112
+
113
+ ### Build from source
114
+
115
+ ```bash
116
+ cargo install --git https://github.com/brenorb/fast-transcript
117
+ ```
118
+
119
+ Or from a local clone:
120
+
121
+ ```bash
122
+ cargo install --path .
123
+ ```
124
+
125
+ ## Quick start
126
+
127
+ ```bash
128
+ fscript lecture.mp3
129
+ ```
130
+
131
+ This will:
132
+
133
+ 1. ensure the default model exists
134
+ 2. normalize the audio if needed
135
+ 3. transcribe with the default chunking strategy
136
+ 4. write `lecture.transcript.json`
137
+
138
+ ## Usage
139
+
140
+ ```bash
141
+ fscript <audio> [output.json]
142
+ ```
143
+
144
+ Optional overrides:
145
+
146
+ ```bash
147
+ fscript lecture.wav custom-output.json
148
+ fscript lecture.wav --chunk-seconds 180 --chunk-overlap-seconds 3
149
+ fscript lecture.wav --chunk-seconds 0
150
+ fscript lecture.wav --model-dir ./models/parakeet/custom-copy
151
+ fscript lecture.wav --model-package ./models/parakeet-v3-int8.tar.gz
152
+ fscript lecture.wav --model-url https://example.com/parakeet-v3-int8.tar.gz
153
+ ```
154
+
155
+ Environment overrides:
156
+
157
+ - `FSCRIPT_MODEL_DIR`
158
+ - `FSCRIPT_MODEL_PACKAGE`
159
+ - `FSCRIPT_MODEL_URL`
160
+
161
+ ## Defaults
162
+
163
+ - model dir:
164
+ - macOS: `~/Library/Application Support/fast-transcript/models/parakeet-tdt-0.6b-v3-int8`
165
+ - Linux: `~/.local/share/fast-transcript/models/parakeet-tdt-0.6b-v3-int8`
166
+ - model package cache:
167
+ - macOS: `~/Library/Caches/fast-transcript/parakeet-v3-int8.tar.gz`
168
+ - Linux: `~/.cache/fast-transcript/parakeet-v3-int8.tar.gz`
169
+ - model URL: `https://huggingface.co/brenorb/parakeet-tdt-0.6b-v3-int8-onnx-bundle/resolve/main/parakeet-v3-int8.tar.gz?download=1`
170
+ - chunk seconds: `120`
171
+ - chunk overlap seconds: `2`
172
+ - output path: `<audio>.transcript.json`
173
+
174
+ ## Benchmarks
175
+
176
+ These are **local development benchmarks**, not universal claims. They were run on the same Apple Silicon Mac used during development, using a Portuguese lecture clip and the same broader workflow comparison.
177
+
178
+ ### 2-minute lecture clip
179
+
180
+ | Engine | Setup | Speed | Peak RSS | Notes |
181
+ | --- | --- | ---: | ---: | --- |
182
+ | **fast-transcript** | Parakeet ONNX | **13.06x** real-time | **2.25 GB** | Best balance of speed and reliability |
183
+ | `mlx-whisper` | `whisper-large-v3-turbo` | `5.25x` | `1.70 GB` | Good quality, slower |
184
+ | `parakeet-mlx` | tuned for quality | `4.92x` | `1.29 GB` | Needed substantial tuning |
185
+ | `parakeet-mlx` | raw greedy | `10.16x` | `0.57 GB` | Faster on short audio, drifted into English on longer PT-BR |
186
+ | `insanely-fast-whisper` | `whisper-large-v3` CPU | `0.30x` | `6.18 GB` | Accurate, but too slow here |
187
+ | `insanely-fast-whisper` | MPS + fallback | `0.31x` | `3.04 GB` | Small gain, same general problem |
188
+
189
+ ### Long lecture run
190
+
191
+ | Engine | Audio | Speed | Peak RSS | Notes |
192
+ | --- | --- | ---: | ---: | --- |
193
+ | **fast-transcript** | `29m47s` lecture | **13.38x** real-time | **2.51 GB** | Stable long run with default chunking |
194
+
195
+ ### Practical reading
196
+
197
+ - `fast-transcript` was not the absolute fastest thing we saw in every synthetic case
198
+ - it **was** the best result once long Portuguese lecture audio, transcript quality, and unattended runs all mattered at the same time
199
+ - that is the target workload for this repo
200
+
201
+ ## Output format
202
+
203
+ The output is JSON and includes:
204
+
205
+ - merged transcript text
206
+ - model path
207
+ - original input path
208
+ - prepared WAV path
209
+ - whether `ffmpeg` normalization was used
210
+ - load time
211
+ - transcribe time
212
+ - chunk configuration
213
+ - per-chunk timing
214
+
215
+ ## Motivation
216
+
217
+ This project is optimized for **large lectures and classes**, including files in the **30-minute to 2-hour** range, where:
218
+
219
+ - startup friction matters
220
+ - background CPU usage matters
221
+ - memory spikes matter
222
+ - brittle hand-tuned command lines become a tax
223
+
224
+ The design goal is not “highest benchmark on a cherry-picked GPU server”.
225
+ The goal is “transcribe big local lecture audio fast enough that you actually keep using it”.
226
+
227
+ ## Inspiration
228
+
229
+ This project was heavily informed by:
230
+
231
+ - [Handy](https://github.com/cjpais/Handy)
232
+ - [GLaDOS](https://github.com/dnhkng/GLaDOS)
233
+ - [transcribe-rs](https://github.com/cjpais/transcribe-rs)
234
+
235
+ In particular, the ONNX Parakeet path here was shaped by the packaging and implementation ideas used in Handy and GLaDOS.
236
+
237
+ ## Default model bundle
238
+
239
+ The default auto-download bundle is published in our own Hugging Face model repository:
240
+
241
+ - [brenorb/parakeet-tdt-0.6b-v3-int8-onnx-bundle](https://huggingface.co/brenorb/parakeet-tdt-0.6b-v3-int8-onnx-bundle)
242
+
243
+ This keeps the default install path tied to the exact validated tarball instead of an app-specific blob host.
244
+
245
+ ## License
246
+
247
+ MIT
@@ -0,0 +1,11 @@
1
+ fscript-0.2.5.data/purelib/fscript/__init__.py,sha256=fXR6_CuIYynV4wmoflnujz6yIvrCEDUcebsHQu4RxCY,19
2
+ fscript-0.2.5.data/purelib/fscript/__main__.py,sha256=LCUdzGRmKutUgmErR7q7a7K1v12-OkRwDtkjL-HDyGY,50
3
+ fscript-0.2.5.data/purelib/fscript/_cli.py,sha256=EOa5VtVxFs6f9OdLGVYiz3kJdKlmM2sZpPWJCdXXBi8,836
4
+ fscript-0.2.5.data/purelib/fscript/bin/__init__.py,sha256=ivTwE_3ACa4QzJ1l4O7jmLFojlnf8nvEENYum6gguHg,49
5
+ fscript-0.2.5.data/purelib/fscript/bin/fscript,sha256=EY0x5C8VQei-zIejiVIfGUUk7TKA8qce_FQBKaDdflQ,29868928
6
+ fscript-0.2.5.dist-info/licenses/LICENSE,sha256=Bs4m-RSIXZzQBDKe3zAu9qM2dTqPBXcmb2-AIlj46vw,1068
7
+ fscript-0.2.5.dist-info/METADATA,sha256=ihIPVYsIygeGXKVdog7Ozb4BKzOw36A9beA6dE_2BBo,8282
8
+ fscript-0.2.5.dist-info/WHEEL,sha256=y62QLMRYueUMr6wFCkpgoRBou_pT5wl6sE1iogoecpg,112
9
+ fscript-0.2.5.dist-info/entry_points.txt,sha256=ZxHYcRdH77lufpsXr3TG6KP8Ir_-5hrkBaHHtTqCsnU,46
10
+ fscript-0.2.5.dist-info/top_level.txt,sha256=EeaLo_7Bt4GoBfdbKiAK6s9O45N_tSCeng1s48ROjbg,8
11
+ fscript-0.2.5.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: false
4
+ Tag: py3-none-macosx_10_13_universal2
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ fscript = fscript._cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Breno Brito
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ fscript