scribe-cli 0.18.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/.gitignore +1 -0
  2. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/PKG-INFO +58 -22
  3. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/README.md +35 -20
  4. scribe_cli-1.0.0/docs/app-tray-menu.png +0 -0
  5. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/docs/backends.md +125 -40
  6. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/docs/cli.md +67 -16
  7. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/docs/desktop-install.md +1 -1
  8. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/docs/installation.md +1 -1
  9. scribe_cli-0.18.0/docs/keyboard.md → scribe_cli-1.0.0/docs/output.md +98 -36
  10. scribe_cli-1.0.0/docs/tray.md +127 -0
  11. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/pyproject.toml +27 -6
  12. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/_version.py +3 -3
  13. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/app.py +298 -132
  14. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/groq.py +4 -3
  15. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/openai_api.py +11 -3
  16. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/openai_realtime.py +59 -5
  17. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/vosk.py +20 -4
  18. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/whisper.py +12 -3
  19. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/whisper_futo.py +10 -3
  20. scribe_cli-1.0.0/scribe/dialog.py +56 -0
  21. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/menu.py +698 -163
  22. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/models.py +145 -75
  23. scribe_cli-1.0.0/scribe/output.py +237 -0
  24. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/session.py +29 -4
  25. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/PKG-INFO +58 -22
  26. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/SOURCES.txt +6 -1
  27. scribe_cli-1.0.0/tests/test_backend_matrix.py +295 -0
  28. scribe_cli-1.0.0/tests/test_output.py +165 -0
  29. scribe_cli-1.0.0/tests/test_output_file_picker.py +57 -0
  30. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/tests/test_pseudo_streaming.py +120 -43
  31. scribe_cli-0.18.0/docs/app-tray-menu.png +0 -0
  32. scribe_cli-0.18.0/docs/tray.md +0 -97
  33. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/.github/FUNDING.yml +0 -0
  34. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/.github/workflows/pypi.yml +0 -0
  35. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/LICENSE +0 -0
  36. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/docs/roadmap-libei.md +0 -0
  37. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/icon.xcf +0 -0
  38. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/__init__.py +0 -0
  39. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/audio.py +0 -0
  40. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/backends/__init__.py +0 -0
  41. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/install_desktop.py +0 -0
  42. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/keyboard.py +0 -0
  43. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/models.toml +0 -0
  44. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/saverecording.py +0 -0
  45. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/testpynput.py +0 -0
  46. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/__init__.py +0 -0
  47. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/base.py +0 -0
  48. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/eitype.py +0 -0
  49. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/pynput.py +0 -0
  50. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/wtype.py +0 -0
  51. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/typers/ydotool.py +0 -0
  52. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe/util.py +0 -0
  53. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/dependency_links.txt +0 -0
  54. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/entry_points.txt +0 -0
  55. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/requires.txt +0 -0
  56. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_cli.egg-info/top_level.txt +0 -0
  57. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/__init__.py +0 -0
  58. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/share/icon.png +0 -0
  59. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/share/icon_recording.png +0 -0
  60. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/share/icon_writing.png +0 -0
  61. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/silero_vad.LICENSE +0 -0
  62. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/silero_vad.onnx +0 -0
  63. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scribe_data/templates/scribe.desktop +0 -0
  64. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scripts/bench_whisper_local.py +0 -0
  65. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/scripts/test_python_versions_install.sh +0 -0
  66. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/setup.cfg +0 -0
  67. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/tests/test_openai_realtime_coalesce.py +0 -0
  68. {scribe_cli-0.18.0 → scribe_cli-1.0.0}/tests/test_whisper_futo.py +0 -0
@@ -7,3 +7,4 @@ scribe/_version.py
7
7
 
8
8
  # Autonomous roadmap workflows (local coordination artifacts; never committed)
9
9
  workflows/
10
+ .worktrees/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scribe-cli
3
- Version: 0.18.0
3
+ Version: 1.0.0
4
4
  Summary: Speech-to-text CLI and system-tray app for dictating into any focused window. Local (vosk, faster-whisper) or cloud (groq, openai) backends, batch or streaming.
5
5
  Author-email: Mahé Perrette <mahe.perrette@gmail.com>
6
6
  License: MIT License
@@ -33,13 +33,34 @@ License: MIT License
33
33
  licenses of all dependencies before using or distributing this software to
34
34
  ensure compliance with their respective terms.
35
35
  Project-URL: Homepage, https://github.com/perrette/scribe
36
- Keywords: speech-to-text,speech recognition,transcription,dictation,voice-typing,voice-to-text,realtime,streaming,language,AI,local,API,cli,tray,vosk,whisper,openai,groq,gpt-4o,linux,wayland,keyboard,clipboard
36
+ Project-URL: Source, https://github.com/perrette/scribe
37
+ Project-URL: Issues, https://github.com/perrette/scribe/issues
38
+ Project-URL: Changelog, https://github.com/perrette/scribe/releases
39
+ Project-URL: Funding, https://github.com/sponsors/perrette
40
+ Keywords: speech-to-text,stt,transcription,dictation,voice-typing,voice-recognition,multilingual,realtime,streaming,cli,tray,vosk,whisper,faster-whisper,openai,groq,gpt-4o,linux,wayland,keyboard,clipboard,microphone,audio
41
+ Classifier: Development Status :: 5 - Production/Stable
42
+ Classifier: Intended Audience :: End Users/Desktop
43
+ Classifier: Intended Audience :: Developers
44
+ Classifier: License :: OSI Approved :: MIT License
37
45
  Classifier: Programming Language :: Python :: 3.9
38
46
  Classifier: Programming Language :: Python :: 3.10
39
47
  Classifier: Programming Language :: Python :: 3.11
40
48
  Classifier: Programming Language :: Python :: 3.12
41
49
  Classifier: Programming Language :: Python :: 3.13
42
50
  Classifier: Operating System :: OS Independent
51
+ Classifier: Environment :: Console
52
+ Classifier: Environment :: X11 Applications
53
+ Classifier: Environment :: MacOS X
54
+ Classifier: Environment :: Win32 (MS Windows)
55
+ Classifier: Natural Language :: English
56
+ Classifier: Natural Language :: French
57
+ Classifier: Natural Language :: German
58
+ Classifier: Natural Language :: Italian
59
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
60
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
61
+ Classifier: Topic :: Office/Business
62
+ Classifier: Topic :: Text Processing :: Linguistic
63
+ Classifier: Topic :: Utilities
43
64
  Requires-Python: >=3.9
44
65
  Description-Content-Type: text/markdown
45
66
  License-File: LICENSE
@@ -92,11 +113,13 @@ cloud-based APIs, batch and streaming workflows.
92
113
 
93
114
  ## What it does
94
115
 
95
- - Records from your mic and transcribes via one of four backends —
96
- **Vosk** (local, streaming), **Whisper** (local, batch), **OpenAI**
97
- (cloud, batch *or* streaming), **Groq** (cloud, batch).
98
- - Delivers the transcript three ways: paste into the focused window
99
- (default), copy to clipboard, or print to the terminal.
116
+ - Records from your mic and transcribes via one of five backends —
117
+ **Vosk** (local, streaming), **Whisper** (local, batch),
118
+ **Whisper FUTO** (local, batch ACFT-tuned for short dictations),
119
+ **OpenAI** (cloud, batch *or* streaming), **Groq** (cloud, batch).
120
+ - Delivers the transcript four ways: paste into the focused window
121
+ (default), copy to clipboard, print to the terminal, or write to
122
+ a file.
100
123
  - Runs as a **system tray icon** with a single Record button, or as an
101
124
  interactive **terminal TUI** — same menu in both.
102
125
  - Hooks into your DE's keyboard shortcuts via `SIGUSR1` (toggle
@@ -126,8 +149,8 @@ scribe
126
149
  This launches the system tray icon. Press Record, speak, press Stop —
127
150
  the transcription lands in the focused window. Scribe picks the first
128
151
  backend whose key / dependency is present, in order **`groq` →
129
- `openai` → `whisper` → `vosk`**, so with `GROQ_API_KEY` set the
130
- command above is equivalent to:
152
+ `openai` → `whisper-futo` → `whisper` → `vosk`**, so with `GROQ_API_KEY`
153
+ set the command above is equivalent to:
131
154
 
132
155
  ```bash
133
156
  scribe --backend groq --model whisper-large-v3-turbo
@@ -142,15 +165,17 @@ scribe --backend openai --model gpt-4o-mini-transcribe # OpenAI sweet spot
142
165
  scribe --backend openai --model gpt-realtime-whisper # OpenAI streaming
143
166
  scribe --backend whisper --model small # local, no API key
144
167
  scribe --frontend terminal # interactive TUI menu
145
- scribe --frontend terminal --no-interactive # record immediately, no menu
168
+ scribe --record # start recording immediately on launch (works in tray or terminal)
169
+ scribe --record --frontend terminal --mode file # one-shot batched dictation → file
170
+ scribe --record --frontend terminal --mode file --stream # streamed: chunks appended live as you speak
146
171
  scribe --mode clipboard # copy to clipboard, no keystroke
147
172
  scribe --mode terminal # only print to stdout
148
- scribe -o transcript.txt # also append to a file
173
+ scribe --mode file -o transcript.txt # append to a file (no keystroke / clipboard)
149
174
  ```
150
175
 
151
176
  With `--no-interactive` (terminal frontend only), scribe skips the
152
177
  interactive menu and starts recording right away — handy for scripted,
153
- one-shot transcriptions. `--no-prompt` is kept as a deprecated alias.
178
+ one-shot transcriptions.
154
179
 
155
180
  Bias the recogniser toward names, jargon, or a domain glossary with
156
181
  `--prompt "free text hint"` and `--words word1 word2 ...` (each also
@@ -161,12 +186,13 @@ for what each backend does with them.
161
186
 
162
187
  ## Backends at a glance
163
188
 
164
- | Backend | `--backend` | Default model | Streaming model(s) | Requires |
165
- |-----------------|-------------|----------------------------|---------------------------|-------------------------------------|
166
- | Groq (cloud) | `groq` | `whisper-large-v3-turbo` | — | `GROQ_API_KEY` |
167
- | OpenAI (cloud) | `openai` | `gpt-4o-mini-transcribe` | `gpt-realtime-whisper` | `OPENAI_API_KEY` |
168
- | Whisper (local) | `whisper` | `small` | — | `pip install scribe-cli[whisper]` |
169
- | Vosk (local) | `vosk` | language-dependent | all Vosk models | `pip install scribe-cli[vosk]` |
189
+ | Backend | `--backend` | Default model | Streaming model(s) | Requires |
190
+ |----------------------|-----------------|----------------------------|---------------------------|----------------------------------------|
191
+ | Groq (cloud) | `groq` | `whisper-large-v3-turbo` | — | `GROQ_API_KEY` |
192
+ | OpenAI (cloud) | `openai` | `gpt-4o-mini-transcribe` | `gpt-realtime-whisper` | `OPENAI_API_KEY` |
193
+ | Whisper FUTO (local) | `whisper-futo` | `small` | — | `pip install scribe-cli[whisper-futo]` |
194
+ | Whisper (local) | `whisper` | `small` | | `pip install scribe-cli[whisper]` |
195
+ | Vosk (local) | `vosk` | language-dependent | all Vosk models | `pip install scribe-cli[vosk]` |
170
196
 
171
197
  Whether a transcription appears live as you speak or all at once when
172
198
  you stop depends on the **model** picked — see
@@ -175,8 +201,11 @@ you stop depends on the **model** picked — see
175
201
 
176
202
  ### Getting an API key
177
203
 
178
- Groq is a good cloud backend to start with very fast, quite accurate, and the
179
- **free tier** is generous enough for everyday dictation. Sign up at
204
+ Groq is the **recommended cloud backend by default**extremely fast
205
+ (by a wide margin compared to other cloud STT options, especially in
206
+ **Stream** mode where the per-chunk roundtrip latency dominates the
207
+ perceived speed), quite accurate, and the **free tier** is generous
208
+ enough for everyday dictation. Sign up at
180
209
  [console.groq.com](https://console.groq.com/), create an API key
181
210
  under **Settings → API Keys**, and export it as `GROQ_API_KEY`.
182
211
 
@@ -189,7 +218,7 @@ I personally use [OpenAI](https://openai.com/api/) with `gpt-4o-mini-transcribe`
189
218
  extras, Ubuntu / GNOME tray libs.
190
219
  - [Backends in detail](docs/backends.md) — model lists, when to pick
191
220
  which, the realtime model.
192
- - [Keyboard modes & typer backends](docs/keyboard.md) — keystroke vs
221
+ - [Output modes & typer backends](docs/output.md) — keystroke vs
193
222
  clipboard, Wayland / `eitype`, `--type-direct`.
194
223
  - [System tray & global hotkeys](docs/tray.md) — menu tree, icon
195
224
  states, `SIGUSR1`/`SIGUSR2`.
@@ -198,10 +227,17 @@ I personally use [OpenAI](https://openai.com/api/) with `gpt-4o-mini-transcribe`
198
227
  - [Fine tuning & CLI reference](docs/cli.md) — every `scribe --help`
199
228
  flag with examples.
200
229
 
230
+ ## Related projects
231
+
232
+ - **[bard](https://github.com/perrette/bard)** — TTS sibling of scribe,
233
+ same tray/CLI architecture in reverse: highlight text, hear it
234
+ spoken. Shares the [`desktop-ai-core`](https://github.com/perrette/desktop-ai-core)
235
+ backbone (frontends, providers, dialog helpers).
236
+
201
237
  ## Compatibility
202
238
 
203
239
  Initially developed for Python 3 on Ubuntu 24.04 (GNOME + Wayland);
204
240
  works on macOS and Windows too. Wayland keystroke injection is
205
- convoluted but [solved](docs/keyboard.md). For dependencies of
241
+ convoluted but [solved](docs/output.md). For dependencies of
206
242
  individual subsystems, check `pynput` (keyboard) and `pystray` (tray
207
243
  icon).
@@ -9,11 +9,13 @@ cloud-based APIs, batch and streaming workflows.
9
9
 
10
10
  ## What it does
11
11
 
12
- - Records from your mic and transcribes via one of four backends —
13
- **Vosk** (local, streaming), **Whisper** (local, batch), **OpenAI**
14
- (cloud, batch *or* streaming), **Groq** (cloud, batch).
15
- - Delivers the transcript three ways: paste into the focused window
16
- (default), copy to clipboard, or print to the terminal.
12
+ - Records from your mic and transcribes via one of five backends —
13
+ **Vosk** (local, streaming), **Whisper** (local, batch),
14
+ **Whisper FUTO** (local, batch ACFT-tuned for short dictations),
15
+ **OpenAI** (cloud, batch *or* streaming), **Groq** (cloud, batch).
16
+ - Delivers the transcript four ways: paste into the focused window
17
+ (default), copy to clipboard, print to the terminal, or write to
18
+ a file.
17
19
  - Runs as a **system tray icon** with a single Record button, or as an
18
20
  interactive **terminal TUI** — same menu in both.
19
21
  - Hooks into your DE's keyboard shortcuts via `SIGUSR1` (toggle
@@ -43,8 +45,8 @@ scribe
43
45
  This launches the system tray icon. Press Record, speak, press Stop —
44
46
  the transcription lands in the focused window. Scribe picks the first
45
47
  backend whose key / dependency is present, in order **`groq` →
46
- `openai` → `whisper` → `vosk`**, so with `GROQ_API_KEY` set the
47
- command above is equivalent to:
48
+ `openai` → `whisper-futo` → `whisper` → `vosk`**, so with `GROQ_API_KEY`
49
+ set the command above is equivalent to:
48
50
 
49
51
  ```bash
50
52
  scribe --backend groq --model whisper-large-v3-turbo
@@ -59,15 +61,17 @@ scribe --backend openai --model gpt-4o-mini-transcribe # OpenAI sweet spot
59
61
  scribe --backend openai --model gpt-realtime-whisper # OpenAI streaming
60
62
  scribe --backend whisper --model small # local, no API key
61
63
  scribe --frontend terminal # interactive TUI menu
62
- scribe --frontend terminal --no-interactive # record immediately, no menu
64
+ scribe --record # start recording immediately on launch (works in tray or terminal)
65
+ scribe --record --frontend terminal --mode file # one-shot batched dictation → file
66
+ scribe --record --frontend terminal --mode file --stream # streamed: chunks appended live as you speak
63
67
  scribe --mode clipboard # copy to clipboard, no keystroke
64
68
  scribe --mode terminal # only print to stdout
65
- scribe -o transcript.txt # also append to a file
69
+ scribe --mode file -o transcript.txt # append to a file (no keystroke / clipboard)
66
70
  ```
67
71
 
68
72
  With `--no-interactive` (terminal frontend only), scribe skips the
69
73
  interactive menu and starts recording right away — handy for scripted,
70
- one-shot transcriptions. `--no-prompt` is kept as a deprecated alias.
74
+ one-shot transcriptions.
71
75
 
72
76
  Bias the recogniser toward names, jargon, or a domain glossary with
73
77
  `--prompt "free text hint"` and `--words word1 word2 ...` (each also
@@ -78,12 +82,13 @@ for what each backend does with them.
78
82
 
79
83
  ## Backends at a glance
80
84
 
81
- | Backend | `--backend` | Default model | Streaming model(s) | Requires |
82
- |-----------------|-------------|----------------------------|---------------------------|-------------------------------------|
83
- | Groq (cloud) | `groq` | `whisper-large-v3-turbo` | — | `GROQ_API_KEY` |
84
- | OpenAI (cloud) | `openai` | `gpt-4o-mini-transcribe` | `gpt-realtime-whisper` | `OPENAI_API_KEY` |
85
- | Whisper (local) | `whisper` | `small` | — | `pip install scribe-cli[whisper]` |
86
- | Vosk (local) | `vosk` | language-dependent | all Vosk models | `pip install scribe-cli[vosk]` |
85
+ | Backend | `--backend` | Default model | Streaming model(s) | Requires |
86
+ |----------------------|-----------------|----------------------------|---------------------------|----------------------------------------|
87
+ | Groq (cloud) | `groq` | `whisper-large-v3-turbo` | — | `GROQ_API_KEY` |
88
+ | OpenAI (cloud) | `openai` | `gpt-4o-mini-transcribe` | `gpt-realtime-whisper` | `OPENAI_API_KEY` |
89
+ | Whisper FUTO (local) | `whisper-futo` | `small` | — | `pip install scribe-cli[whisper-futo]` |
90
+ | Whisper (local) | `whisper` | `small` | | `pip install scribe-cli[whisper]` |
91
+ | Vosk (local) | `vosk` | language-dependent | all Vosk models | `pip install scribe-cli[vosk]` |
87
92
 
88
93
  Whether a transcription appears live as you speak or all at once when
89
94
  you stop depends on the **model** picked — see
@@ -92,8 +97,11 @@ you stop depends on the **model** picked — see
92
97
 
93
98
  ### Getting an API key
94
99
 
95
- Groq is a good cloud backend to start with very fast, quite accurate, and the
96
- **free tier** is generous enough for everyday dictation. Sign up at
100
+ Groq is the **recommended cloud backend by default**extremely fast
101
+ (by a wide margin compared to other cloud STT options, especially in
102
+ **Stream** mode where the per-chunk roundtrip latency dominates the
103
+ perceived speed), quite accurate, and the **free tier** is generous
104
+ enough for everyday dictation. Sign up at
97
105
  [console.groq.com](https://console.groq.com/), create an API key
98
106
  under **Settings → API Keys**, and export it as `GROQ_API_KEY`.
99
107
 
@@ -106,7 +114,7 @@ I personally use [OpenAI](https://openai.com/api/) with `gpt-4o-mini-transcribe`
106
114
  extras, Ubuntu / GNOME tray libs.
107
115
  - [Backends in detail](docs/backends.md) — model lists, when to pick
108
116
  which, the realtime model.
109
- - [Keyboard modes & typer backends](docs/keyboard.md) — keystroke vs
117
+ - [Output modes & typer backends](docs/output.md) — keystroke vs
110
118
  clipboard, Wayland / `eitype`, `--type-direct`.
111
119
  - [System tray & global hotkeys](docs/tray.md) — menu tree, icon
112
120
  states, `SIGUSR1`/`SIGUSR2`.
@@ -115,10 +123,17 @@ I personally use [OpenAI](https://openai.com/api/) with `gpt-4o-mini-transcribe`
115
123
  - [Fine tuning & CLI reference](docs/cli.md) — every `scribe --help`
116
124
  flag with examples.
117
125
 
126
+ ## Related projects
127
+
128
+ - **[bard](https://github.com/perrette/bard)** — TTS sibling of scribe,
129
+ same tray/CLI architecture in reverse: highlight text, hear it
130
+ spoken. Shares the [`desktop-ai-core`](https://github.com/perrette/desktop-ai-core)
131
+ backbone (frontends, providers, dialog helpers).
132
+
118
133
  ## Compatibility
119
134
 
120
135
  Initially developed for Python 3 on Ubuntu 24.04 (GNOME + Wayland);
121
136
  works on macOS and Windows too. Wayland keystroke injection is
122
- convoluted but [solved](docs/keyboard.md). For dependencies of
137
+ convoluted but [solved](docs/output.md). For dependencies of
123
138
  individual subsystems, check `pynput` (keyboard) and `pystray` (tray
124
139
  icon).
Binary file
@@ -70,7 +70,7 @@ Vosk transcribes in real time and is very good at one language at a
70
70
  time, but tends to make more mistakes than Whisper and does not produce
71
71
  punctuation. It becomes really useful in longer, interactive sessions
72
72
  where the live "appears as you speak" UX matters — see
73
- [keyboard.md](keyboard.md) for how the keystroke mode interacts with
73
+ [output.md](output.md) for how the keystroke mode interacts with
74
74
  streaming models.
75
75
 
76
76
  There are many [Vosk models](https://alphacephei.com/vosk/models)
@@ -117,12 +117,15 @@ for the full picture.
117
117
  ## `groq` (Groq cloud)
118
118
 
119
119
  Talks to Groq's OpenAI-compatible API and defaults to
120
- `whisper-large-v3-turbo`. Typically the fastest cloud option for
121
- full-utterance transcription:
120
+ `whisper-large-v3-turbo`. **Extremely fast** thanks to Groq's
121
+ inference hardware — the recommended cloud backend by default, and
122
+ the natural pick for `--stream` mode where per-chunk roundtrip
123
+ latency dominates perceived speed:
122
124
 
123
125
  ```bash
124
126
  export GROQ_API_KEY=YOURAPIKEY
125
- scribe --backend groq
127
+ scribe --backend groq # Clip mode (default)
128
+ scribe --backend groq --stream # live transcription, per-chunk
126
129
  ```
127
130
 
128
131
  The `groq` backend reuses the `openai` Python client under the hood, so
@@ -146,14 +149,14 @@ style, domain, or word list. The concept is generic across the
146
149
  whisper-family backends but each backend exposes it slightly
147
150
  differently:
148
151
 
149
- | Backend | `--prompt` | `--words` |
150
- |--------------------------------------|-------------------------------|--------------------------------------------------------|
151
- | `whisper` (faster-whisper, local) | passed as `initial_prompt=` | passed as `hotwords=` — a **dedicated biasing channel** separate from the prompt |
152
- | `whisper-futo` (pywhispercpp, local) | passed as `initial_prompt=` | joined onto the prompt string (no separate hotwords channel here) |
153
- | `openai` batch (`gpt-4o*-transcribe`) | passed as `prompt=` | joined onto the prompt string |
154
- | `groq` (`whisper-large-v3-turbo`) | passed as `prompt=` | joined onto the prompt string |
155
- | `openai` realtime (`gpt-realtime-whisper`) | *silently ignored* — the model rejects the prompt parameter server-side (HTTP 400 *"The 'prompt' parameter is not supported for this model."*). The kwarg stays accepted for plumbing compatibility but never reaches the API. | same — joined into the (ignored) prompt |
156
- | `vosk` | *ignored* (no soft prompt) | *ignored* (Vosk only supports a hard `grammar` allowlist; not yet exposed) |
152
+ | Backend | `--prompt` | `--words` | `--language` |
153
+ |--------------------------------------|-------------------------------|--------------------------------------------------------|---------------------------------------------------------|
154
+ | `whisper` (faster-whisper, local) | passed as `initial_prompt=` | passed as `hotwords=` — a **dedicated biasing channel** separate from the prompt | passed as `language=` (ISO 639-1); `-l en` also auto-substitutes `small.en` etc. |
155
+ | `whisper-futo` (pywhispercpp, local) | passed as `initial_prompt=` | joined onto the prompt string (no separate hotwords channel here) | passed as `language=` (ISO 639-1); `-l en` auto-substitutes `small.en` etc. |
156
+ | `openai` batch (`gpt-4o*-transcribe`) | passed as `prompt=` | joined onto the prompt string | passed as `language=` hint (ISO 639-1) |
157
+ | `groq` (`whisper-large-v3-turbo`) | passed as `prompt=` | joined onto the prompt string | passed as `language=` hint (ISO 639-1) |
158
+ | `openai` realtime (`gpt-realtime-whisper`) | *silently ignored* — the model rejects the prompt parameter server-side (HTTP 400 *"The 'prompt' parameter is not supported for this model."*). The kwarg stays accepted for plumbing compatibility but never reaches the API. | same — joined into the (ignored) prompt | passed as `language=` (ISO 639-1) |
159
+ | `vosk` | *ignored* (no soft prompt) | *ignored* (Vosk only supports a hard `grammar` allowlist; not yet exposed) | picks a per-language model from `scribe/models.toml`; no runtime parameter |
157
160
 
158
161
  The whisper-family APIs cap the prompt around ~224 tokens; longer
159
162
  hints are silently truncated. Faster-whisper's `hotwords` channel is
@@ -184,34 +187,117 @@ invocation, pass an explicit empty value: `--prompt ""` (or
184
187
  arguments (or `--words-file ""`) suppresses the words default. Each
185
188
  side is independent.
186
189
 
187
- ## Pseudo-streaming (experimental)
188
-
189
- `--pseudo-streaming` makes a batch backend behave streaming-like by
190
- cutting the running buffer into chunks driven by silence:
190
+ ## Language
191
+
192
+ `-l / --language LANG` tells the backend which language to expect.
193
+ What that means in practice varies by backend (see the per-backend
194
+ column in the table above):
195
+
196
+ - **Whisper-family** (`whisper`, `whisper-futo`, `openai` batch +
197
+ realtime, `groq`) — the language is passed to the model as a hard
198
+ lock: the decoder generates that language regardless of what it
199
+ hears acoustically. Accepts any [ISO 639-1 short code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)
200
+ Whisper recognises (~99 languages). When unset, Whisper auto-detects
201
+ per chunk.
202
+ - **English-only model variants** — for `whisper` and `whisper-futo`,
203
+ `-l en` *also* auto-substitutes the English-only model when one
204
+ exists (`small` → `small.en`, etc.). These variants trade
205
+ multilingual coverage for English accuracy.
206
+ - **Vosk** — language isn't a runtime parameter; vosk ships a
207
+ separate model per language. `-l fr` looks up the vosk model
208
+ pre-mapped to French in [`scribe/models.toml`](../scribe/models.toml)
209
+ and instantiates that one. Vosk has no auto-detect path, so the
210
+ Language menu's `Auto` entry on vosk falls back to a sensible
211
+ default — the tray shows `Auto (🇬🇧 en)` to make this explicit
212
+ without mutating the stored `language=None`.
213
+
214
+ The tray's **Language** submenu exposes the four curated languages
215
+ (`en` / `fr` / `de` / `it`) with origin-country flag prefixes
216
+ (🇬🇧 / 🇫🇷 / 🇩🇪 / 🇮🇹). The CLI accepts these plus any other ISO 639-1
217
+ code the active backend recognises.
218
+
219
+ ## Stream mode (works with any backend)
220
+
221
+ `--stream` (or **Mode: Stream** in the tray) emits transcribed text
222
+ **live as you speak**, regardless of which backend you picked. This
223
+ is the headline v1.0.0 improvement: scribe abstracts over the two
224
+ different mechanisms that backends use to deliver live output, so
225
+ `--stream` works uniformly across every supported backend.
226
+
227
+ - **Native streaming backends** (Vosk, `gpt-realtime-whisper`) push
228
+ partial results from the server as audio is received — scribe just
229
+ forwards them to the chosen output (focused window / clipboard /
230
+ terminal / file). These backends are *always* in Stream mode; the
231
+ Mode toggle reads "Mode: Stream (native)" for them and is read-only.
232
+ - **Batch backends** (Whisper local, Whisper FUTO, OpenAI
233
+ `gpt-4o-*-transcribe`, Groq `whisper-large-v3-turbo`) don't accept
234
+ partial audio. scribe instead cuts the recording buffer on
235
+ detected silence and issues a separate transcription request for
236
+ each chunk — internally called *pseudo-streaming*. The user sees
237
+ the same live experience.
191
238
 
192
239
  ```bash
193
- scribe --pseudo-streaming --streaming-window 5
240
+ scribe --stream # any backend, live transcription
241
+ scribe --stream --backend groq # Groq + Stream is the sweet spot
242
+ scribe --stream --backend whisper # local, live, no API key
194
243
  ```
195
244
 
196
- After `--streaming-window` seconds of buffered audio, scribe cuts at
197
- the first silence of at least `--silence-duration` and transcribes the
198
- chunk; if no silence arrives by `2 × --streaming-window`, it
199
- force-cuts. The session continues until you stop it. Default `5` s
200
- trades a little Whisper context for snappier "text appears as you
201
- speak" UX; raise it (10–30 s) if accuracy on long sentences matters
202
- more than latency.
203
-
204
- This is experimental and off by default. The tray menu surfaces the
205
- same toggle under Options ▶ Advanced ▶ Pseudo-streaming.
245
+ ### How pseudo-streaming carves up a recording
246
+
247
+ Once the buffer has grown to at least `--stream-chunk-min` (default
248
+ 1.5 s), silence of at least `--stream-chunk-silence-break` (default
249
+ 0.6 s) triggers a chunk cut. A force-cut fires at `--stream-chunk-max`
250
+ (default 10 s) regardless of silence, to cap latency. The session
251
+ continues until you stop it manually.
252
+
253
+ ### Does pseudo-streaming change the API cost?
254
+
255
+ For cloud backends, going from one big transcription to N chunked
256
+ requests **does not normally change the bill**:
257
+
258
+ - **Groq** (`whisper-large-v3-turbo`) is billed per second of audio.
259
+ Total audio is unchanged → same cost.
260
+ - **OpenAI `whisper-1`** (legacy) is billed per minute of audio. Same
261
+ logic, same cost.
262
+ - **OpenAI `gpt-4o-transcribe` / `gpt-4o-mini-transcribe`** are token-
263
+ billed (audio-in + text-out + prompt-in). Audio and output stay
264
+ identical; the only delta is the rolling cross-chunk *prompt*
265
+ context (~200 chars ≈ 50–60 tokens per chunk after the first).
266
+ At gpt-4o-mini-transcribe input rates this is negligible — well
267
+ under a cent per long session.
268
+
269
+ That said, your real cost depends on your usage and your account's
270
+ pricing tier — **verify on your provider's billing dashboard** if
271
+ cost is a hard constraint.
272
+
273
+ Two special values for `--stream-chunk-silence-break` (set via the
274
+ tray's **Silence break** picker or `--stream-chunk-silence-break 0`
275
+ at the CLI):
276
+
277
+ - **Auto** (`0`) — disables the fixed-threshold trigger. At force-cut
278
+ time scribe picks the *longest* silence interval within the window
279
+ whose start position is at least `--stream-chunk-min` into the chunk,
280
+ re-cutting there for a more natural word boundary. Falls back to a
281
+ brute force-cut if no qualifying silence is found.
282
+ - **Max** — disables silence-based cuts entirely; only the force-cut at
283
+ `--stream-chunk-max` fires. Useful when you want uniform chunk sizes
284
+ regardless of speech patterns. (Only selectable from the tray picker.)
285
+
286
+ Stream mode is off by default — the default `Clip` mode transcribes the
287
+ whole recording at end (`--clip`). The tray menu surfaces the same
288
+ toggle as the top-level **Mode: Stream / Clip** item. Native
289
+ streamers (vosk, `gpt-realtime-whisper`) are always streaming and the
290
+ menu shows **Mode: Stream (native)** for them.
206
291
 
207
292
  ### Cross-chunk prompt context
208
293
 
209
- In pseudo-streaming mode scribe automatically augments each chunk's
210
- prompt with the trailing ~200 characters of the *previous* chunk's
211
- transcription. This rolling tail is concatenated onto whatever static
212
- `--prompt` / `--words` you configured and reaches the backend through
213
- the same channel as the static prompt (the vocabulary biasing table
214
- above). The motivation is cross-chunk continuity:
294
+ In Stream mode (pseudo-streaming) scribe automatically augments
295
+ each chunk's prompt with the trailing ~200 characters of the
296
+ *previous* chunk's transcription. This rolling tail is concatenated
297
+ onto whatever static `--prompt` / `--words` you configured and
298
+ reaches the backend through the same channel as the static prompt
299
+ (the vocabulary biasing table above). The motivation is cross-chunk
300
+ continuity:
215
301
 
216
302
  - **Capitalization drift** — without context, a chunk that starts
217
303
  right after a period might come back lowercased.
@@ -226,13 +312,12 @@ sits well under that and leaves room for your static prompt + words
226
312
  list.
227
313
 
228
314
  The rolling tail is **dropped** when the silence between two
229
- utterances exceeds 1.5 seconds — a long pause is treated as a new
230
- sentence/idea boundary, where carrying a possibly-bad prior chunk
231
- forward biases the next one more than it helps. This mirrors
232
- `whisper.cpp`'s `--keep-context off` default: prior-text conditioning
233
- can self-reinforce errors (hallucinations, decoder repetition loops)
234
- more readily than it provides useful continuity, so we cap it at
235
- natural sentence boundaries.
315
+ utterances exceeds `--stream-context-reset-silence` ×
316
+ `--stream-chunk-silence-break` (default 3 × 0.6 s = 1.8 s) — a long
317
+ pause is treated as a new sentence/idea boundary, where carrying a
318
+ possibly-bad prior chunk forward biases the next one more than it
319
+ helps. Use `--stream-context-reset-silence inf` to keep context across
320
+ arbitrarily long pauses.
236
321
 
237
322
  Short pauses (mid-sentence punctuation) keep the context; the cut at
238
323
  the start of every new recording also clears it.
@@ -13,10 +13,11 @@ The flags are grouped to mirror the source-of-truth in
13
13
 
14
14
  | Flag | Purpose |
15
15
  |---------------------------------|-------------------------------------------------------------------------|
16
- | `--backend {vosk,whisper,openai,groq}` | Speech-recognition backend (prompted if omitted). |
16
+ | `--backend {vosk,whisper,whisper-futo,openai,groq}` | Speech-recognition backend (prompted if omitted). |
17
17
  | `--model NAME` | Model name for the chosen backend. Auto-routes to the right backend for known model names (e.g. `--model gpt-realtime-whisper` selects `openai`). |
18
- | `-l, --language LANG` | Language alias selecting a preset Vosk model (`en`/`fr`/`de`/`it`), or `en` for English-only Whisper models. |
18
+ | `-l, --language LANG` | Language alias selecting a preset Vosk model (`en`/`fr`/`de`/`it`), or `en` for English-only Whisper / Whisper-FUTO models. |
19
19
  | `--download-folder-whisper DIR` | Folder to store Whisper models. |
20
+ | `--download-folder-whisper-futo DIR` | Folder to store Whisper-FUTO ACFT ggml models (default: `$XDG_CACHE_HOME/whisper-futo`). |
20
21
  | `--download-folder-vosk DIR` | Folder to store Vosk models. |
21
22
 
22
23
  ## Prompting & vocabulary biasing
@@ -55,22 +56,23 @@ flag suppresses only its own side (giving `--prompt ""` still loads
55
56
  | Flag | Purpose |
56
57
  |-----------------------|----------------------------------------------------------|
57
58
  | `--input-device N` | Microphone device index (see `python -m sounddevice`). |
59
+ | `--dry-run` | Short-circuit the STT request boundary in every backend: model load is skipped and the network/SDK call returns a canned `[dry-run transcript]`. Used by the backend × mode smoke-test matrix; handy for plumbing without network access. |
58
60
 
59
61
  ## Output
60
62
 
61
63
  | Flag | Purpose |
62
64
  |-----------------------------|---------------------------------------------------------------------------------------------|
63
- | `-m, --mode {keystroke,clipboard,terminal}` | Where transcribed text goes (default `keystroke`). See [keyboard.md](keyboard.md). |
65
+ | `-m, --mode {keystroke,clipboard,terminal,file}` | Where transcribed text goes (default `keystroke`). `file` routes the transcript exclusively to `--output-file` and suppresses keyboard/clipboard output. See [output.md](output.md). |
64
66
  | `--typer {auto,eitype,pynput,wtype,ydotool}` | Keystroke-injection backend (default `auto`). |
65
67
  | `--type-direct` | In keystroke mode, type the transcription as keystrokes instead of synthesising Ctrl+V. |
66
- | `-o, --output-file FILE` | Also append the transcription to this file. |
68
+ | `-o, --output-file FILE` | Path the transcription is appended to when `--mode file`. Defaults to `<user-desktop>/scribe-notes.txt` (the platform Desktop folder — `~/Desktop` on Linux/macOS, `%USERPROFILE%\Desktop` on Windows; falls back to home dir if Desktop is absent). Ignored when `--mode` is anything other than `file` (the four output modes are mutually exclusive). |
67
69
 
68
70
  ## Silence detection
69
71
 
70
- | Flag | Default | Purpose |
71
- |----------------------------|---------|------------------------------------------------------------------------|
72
- | `--duration SECS` | `120` | Max recording duration in seconds. |
73
- | `--silence-duration SECS` | `0.6` | How long silence must persist before triggering a backend's silence behavior (realtime auto-commit, pseudo-streaming cut). |
72
+ > **Deprecated aliases** (still accepted, hidden from `--help`):
73
+ > `--duration N` maps to `--clip-timeout N`; `--silence-duration N`
74
+ > sets both `--stream-chunk-silence-break` and `--realtime-commit-silence`
75
+ > to `N`. Existing scripts using these flags continue to work.
74
76
 
75
77
  ## Voice activity detection
76
78
 
@@ -94,22 +96,53 @@ mode's knobs are ignored.
94
96
 
95
97
  ## Realtime (`gpt-realtime-whisper`)
96
98
 
97
- | Flag | Default | Purpose |
98
- |---------------------------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
99
- | `--realtime-delay {minimal,low,medium,high,xhigh}` | `medium` | Trade off latency vs accuracy on `gpt-realtime-whisper`. Lower = faster partials but more paste churn in the focused window. |
100
- | `--realtime-gate` / `--no-realtime-gate` | on | Drop silent frames (per the active `--vad-mode`) before sending them over the WebSocket so silent audio isn't billed as input tokens. After `--silence-duration` of silence, also commit mid-session so trailing words flush live. |
99
+ | Flag | Default | Purpose |
100
+ |---------------------------------------------------|--------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
101
+ | `--realtime-delay {minimal,low,medium,high,xhigh}` | `medium` | Trade off latency vs accuracy on `gpt-realtime-whisper`. Lower = faster partials but more paste churn in the focused window. |
102
+ | `--realtime-gate` / `--no-realtime-gate` | on | Drop silent frames (per the active `--vad-mode`) before sending them over the WebSocket so silent audio isn't billed as input tokens. |
103
+ | `--realtime-commit-silence SECS` | `0.6` | Seconds of silence before a mid-session commit flushes trailing words to the server (default `0.6`). Set to `0` to rely solely on the server's turn detection. |
104
+
105
+ The tray's **Stream (advanced) › Stream** picker unifies `--realtime-gate`
106
+ and `--realtime-commit-silence` into a single choice: **Live** (gate
107
+ off, commit disabled — server turn detection only) or **Offline after
108
+ Xs** (gate on, commit after X seconds of silence). At the CLI level the
109
+ two flags remain independent. The auto-stop is documented under
110
+ **Listening mode → `--stream-timeout`** below (covers both native
111
+ streamers and pseudo-streaming on batch backends).
101
112
 
102
113
  Streaming models (Vosk, `gpt-realtime-whisper`) ignore the batch
103
114
  silence-chunking knobs; they have their own end-of-utterance signal.
104
115
 
116
+ ## Listening mode
117
+
118
+ | Flag | Default | Purpose |
119
+ |-----------------------------------|---------|-------------------------------------------------------------------------------------------|
120
+ | `--stream` | — | Force a batch backend (whisper, whisper-futo, openai non-realtime, groq) into pseudo-streaming — live chunks driven by `--stream-chunk-max` and `--stream-chunk-silence-break`. Same as the tray's **Mode: Stream**. |
121
+ | `--clip` | default | Transcribe the whole recording at end. Same as the tray's **Mode: Clip**. |
122
+ | `--stream-chunk-max SECS` | `10` | Maximum chunk duration in seconds. Force-cut fires at this threshold when no silence pause has been detected (default `10`). |
123
+ | `--stream-chunk-min SECS` | `1.5` | Minimum chunk size before a silence-cut is allowed (default `1.5`). Prevents very short clips that cause Whisper hallucinations. |
124
+ | `--stream-chunk-silence-break SECS` | `0.6` | Silence duration that triggers a chunk cut (default `0.6`). Special value `0` enables Auto mode (best-silence-in-window at force-cut time). |
125
+ | `--stream-context-reset-silence X` | `3.0` | Multiplier of `--stream-chunk-silence-break` above which the rolling cross-chunk prompt context is discarded (default `3.0`, i.e. 1.8 s at default silence-break). Use `inf` to never reset. |
126
+ | `--clip-timeout SECS` | `120` | Auto-stop after this many seconds in Clip mode (default `120`). |
127
+ | `--stream-timeout SECS` | `None` | Auto-stop after this many seconds in Stream mode (`None` = Always On, no auto-stop). Tray equivalent: **Stream timeout** in the Stream (advanced) submenu. |
128
+
129
+ Native streamers (vosk, `gpt-realtime-whisper`) are always streaming
130
+ and ignore `--clip`. `--realtime`, `--pseudo-streaming`,
131
+ `--streaming-window`, and `--realtime-timeout` are kept as hidden
132
+ back-compat aliases (`--streaming-window N` maps to
133
+ `--stream-chunk-max 2N` to preserve the old effective force-cut
134
+ threshold; `--realtime-timeout` maps to `--stream-timeout`).
135
+
105
136
  ## Frontend
106
137
 
107
138
  | Flag | Purpose |
108
139
  |-----------------------------|----------------------------------------------------------------------|
109
140
  | `--frontend {tray,terminal}` | UI to launch (default `tray`). |
110
- | `--no-interactive` | In terminal mode, skip the interactive menu and record immediately. (`--no-prompt` is kept as a deprecated alias.) |
141
+ | `--no-interactive` | In terminal mode, skip the interactive menu and record immediately. |
142
+ | `--record` | Start recording immediately on launch, frontend-agnostic. In terminal it's a one-line shortcut for `--no-interactive`; in tray it auto-fires the Record action ~0.5 s after the icon comes up. Useful for hotkey bindings (`scribe --record` triggers a recording from anywhere) and batched / scripted invocations. |
111
143
  | `--vosk-models M [M ...]` | Vosk models offered in the tray menu. |
112
144
  | `--whisper-models M [M ...]` | Whisper models offered in the tray menu. |
145
+ | `--whisper-futo-models M [M ...]` | Whisper-FUTO ACFT models offered in the tray menu. |
113
146
 
114
147
  ## Examples
115
148
 
@@ -134,13 +167,31 @@ environment) — you'll pay for silent audio while the session is open:
134
167
  scribe --model gpt-realtime-whisper --no-realtime-gate
135
168
  ```
136
169
 
137
- Run scribe headlessly into a file without touching the clipboard or
138
- focused window:
170
+ **Batched / scripted use** record one dictation headlessly, write
171
+ it where you want, exit. No tray, no menu, no clipboard:
139
172
 
140
173
  ```bash
141
- scribe --frontend terminal --no-interactive --mode terminal -o session.txt
174
+ # Append to a file (default <Desktop>/scribe-notes.txt override with -o)
175
+ scribe --record --frontend terminal --mode file
176
+
177
+ # Same with a custom path
178
+ scribe --record --frontend terminal --mode file -o /tmp/notes.txt
179
+
180
+ # Pipe-friendly: transcript on stdout
181
+ scribe --record --frontend terminal --mode terminal
182
+
183
+ # Streamed: chunks appended live (as you speak) instead of all-at-once
184
+ # at end-of-recording. Useful for long dictations and tail-following:
185
+ # tail -f /tmp/notes.txt
186
+ scribe --record --frontend terminal --mode file --stream -o /tmp/notes.txt
142
187
  ```
143
188
 
189
+ `--record` starts the recording immediately, `--frontend terminal`
190
+ skips the tray icon, `--mode file` (or `terminal`) picks where the
191
+ transcript lands, `--stream` (optional) emits chunks live instead of
192
+ the default Clip-mode all-at-once. Combine with a hotkey or cron for
193
+ one-shot capture.
194
+
144
195
  Bias the recogniser toward domain jargon (medical terms, proper names):
145
196
 
146
197
  ```bash