@iamsamuelrodda/dictate 2026.5.18-1 → 2026.5.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,21 +1,201 @@
1
- MIT License
2
-
3
- Copyright (c) 2026 Samuel Rodda
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ https://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ https://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
package/NOTICE ADDED
@@ -0,0 +1,3 @@
1
+ Dictate
2
+
3
+ This product includes software from the Dictate project.
package/README.md CHANGED
@@ -1,175 +1,367 @@
1
- # 🎙️ Dictate
2
-
3
- Desktop dictation that types into the focused app.
4
-
5
- `dictate` runs as a small tray app. Press your configured push-to-talk shortcut,
6
- speak, and it transcribes into whatever app you are already using.
7
-
8
- Current status: early desktop app. Linux and Windows 11 installs, tray controls,
9
- startup integration, recent history, model selection, API key storage, update,
10
- and uninstall paths are implemented. Signed Windows installer packaging is still
11
- future work.
1
+ # dictate
2
+
3
+ Local voice-to-text for desktop dictation: hold `Right Ctrl`, speak, release to transcribe and type into the focused window.
4
+
5
+ Linux is the primary tray desktop. Windows 11 is supported as a separate headless compatibility stream.
6
+
7
+ ## Features
8
+
9
+ - Speech-to-text backends kept intentionally small:
10
+ - `faster-whisper` with `turbo` (default local Whisper path)
11
+ - `openai` with `gpt-4o-mini-transcribe`
12
+ - `xai` with `grok-speech-to-text`
13
+ - `gemini` with `gemini-3-flash-preview`
14
+ - Capability-aware backend contract (`hotwords`, prompt bias, language hint handling) so unsupported options fail soft with clear warnings.
15
+ - Backend-agnostic lexical adaptation modes: `native`, `prompt`, `post`, `hybrid`.
16
+ - Push-to-talk daemon: `Right Ctrl` hold/release to record/transcribe/type.
17
+ - Configurable push-to-talk key (`ctrl_r` default, `ctrl_l` supported for Wayland/laptop compatibility).
18
+ - System tray toggle (pause/resume dictation).
19
+ - Tray backend switcher (change local/API transcription route without restart).
20
+ - Recent history with copy/paste actions and pagination for up to 20 dictations.
21
+ - One-shot mode for terminal workflows (print to stdout or copy to clipboard).
22
+ - Typing backend auto-selection (`xdotool` on X11, `wtype`/`ydotool` on Wayland, `pynput` on Windows).
23
+ - Explicit backend resource release on switch.
24
+
25
+ ## Requirements
26
+
27
+ - Linux (X11 recommended; Wayland supported depending on typing backend and hotkey support).
28
+ - Windows 11 for headless push-to-talk and one-shot modes.
29
+ - Python 3.11 or 3.12.
30
+ - Microphone/audio: `sounddevice` + a working PortAudio setup.
31
+ - Typing backend (for daemon modes):
32
+ - X11: `xdotool` (recommended)
33
+ - Wayland: `wtype` or `ydotool`
34
+ - Windows 11: `pynput`
35
+ - Clipboard (for `--once --copy`): `xclip` on Linux or `pyperclip` on Windows.
36
+ - Tray icon dependencies (for default tray mode):
37
+ - GTK + GI bindings (`python3-gi`)
38
+ - Ayatana indicator bindings (`gir1.2-ayatanaappindicator3-0.1` or equivalent for your distro)
12
39
 
13
40
  ## Install
14
41
 
15
- Windows 11 normal install:
16
-
17
- ```powershell
18
- powershell -ExecutionPolicy Bypass -Command "iwr -useb https://cdn.jsdelivr.net/npm/@iamsamuelrodda/dictate@latest/install.ps1 | iex"
19
- ```
20
-
21
- Open **Dictate** from the Start Menu after install.
22
-
23
- Ubuntu/Debian source install:
42
+ Ubuntu/Debian install with system packages, seeded default hotwords config, and prepared `faster-whisper/turbo`:
24
43
 
25
44
  ```bash
26
45
  ./install-ubuntu.sh
27
46
  ```
28
47
 
29
- Generic Linux source install:
48
+ Generic repo install (assumes OS packages and `uv` are already present):
30
49
 
31
50
  ```bash
32
51
  ./install.sh
33
52
  ```
34
53
 
35
- Open **Dictate** from the app launcher after install.
54
+ `install.sh` seeds the Linux config file from [`config/default-config.yaml`](config/default-config.yaml) on first install and prepares the `faster-whisper/turbo` model by default. Existing user config is left untouched.
36
55
 
37
- Windows source install, from the repo/source directory:
56
+ Installer verification/model preparation can be skipped if needed:
38
57
 
39
- ```powershell
40
- powershell -ExecutionPolicy Bypass -File .\install-windows-wizard.ps1
58
+ ```bash
59
+ ./install.sh --no-verify --no-prepare-turbo
41
60
  ```
42
61
 
43
- The local `.ps1` installer scripts must be run from a checkout or extracted
44
- source directory. They will not work from `C:\Windows\System32`.
45
-
46
- Node/npm users can also run:
62
+ Windows 11 install from PowerShell:
47
63
 
48
64
  ```powershell
49
- npx @iamsamuelrodda/dictate install
65
+ powershell -ExecutionPolicy Bypass -File .\install-windows.ps1
50
66
  ```
51
67
 
52
- ## Workflow
68
+ Windows setup wizard:
53
69
 
54
- ```text
55
- Open Dictate
56
- Select Model
57
- Set API key if using a hosted model
58
- Set push-to-talk shortcut if desired
59
- Hold shortcut, speak, release
60
- Review Recent History when needed
70
+ ```powershell
71
+ powershell -ExecutionPolicy Bypass -File .\install-windows-wizard.ps1
61
72
  ```
62
73
 
63
- By default, Dictate installs a normal app launcher entry and starts on sign-in.
64
- Startup can be changed from Settings.
65
-
66
- ## Update And Uninstall
67
-
68
- Windows installed from the hosted installer:
74
+ Hosted Windows one-liner:
69
75
 
70
76
  ```powershell
71
- powershell -ExecutionPolicy Bypass -Command "iwr -useb https://cdn.jsdelivr.net/npm/@iamsamuelrodda/dictate@latest/update.ps1 | iex"
72
- powershell -ExecutionPolicy Bypass -Command "iwr -useb https://cdn.jsdelivr.net/npm/@iamsamuelrodda/dictate@latest/uninstall.ps1 | iex"
77
+ powershell -ExecutionPolicy Bypass -Command "iwr -useb https://cdn.jsdelivr.net/npm/@iamsamuelrodda/dictate@latest/install.ps1 | iex"
73
78
  ```
74
79
 
75
- Windows from source:
80
+ Node/npm users can also run:
76
81
 
77
82
  ```powershell
78
- powershell -ExecutionPolicy Bypass -File .\update-windows.ps1
79
- powershell -ExecutionPolicy Bypass -File .\uninstall-windows.ps1
83
+ npx @iamsamuelrodda/dictate install
80
84
  ```
81
85
 
82
- Linux:
86
+ This creates `.venv`, installs the Windows dependencies, seeds config, writes launcher scripts, prepares the default model, runs diagnostics, and installs a Start Menu shortcut named `Dictate`. The `Dictate` shortcut starts the Windows tray app. See [Windows 11 support](docs/windows-11.md) for details.
87
+ The Windows installer also verifies the Microsoft Visual C++ runtime needed by the native transcription wheels and installs it when it is missing.
88
+
89
+ Update or uninstall from a source checkout:
83
90
 
84
91
  ```bash
85
92
  ./update.sh
86
93
  ./uninstall.sh
87
94
  ```
88
95
 
89
- Use `-RemoveUserData` on Windows or `--remove-user-data` on Linux only when you
90
- also want to remove config, logs, history, and downloaded model data.
91
-
92
- ## What It Does Today
96
+ ```powershell
97
+ powershell -ExecutionPolicy Bypass -File .\update-windows.ps1
98
+ powershell -ExecutionPolicy Bypass -File .\uninstall-windows.ps1
99
+ ```
93
100
 
94
- - Starts from the Windows Start Menu or Linux app launcher
95
- - Runs as a tray app
96
- - Types dictated text into the focused app
97
- - Supports configurable push-to-talk
98
- - Shows selected model/status in the app UI
99
- - Supports launch on startup
100
- - Stores hosted-provider API keys in the OS secret store
101
- - Keeps a small Recent History for copy/paste recovery
102
- - Provides installer, updater, uninstaller, and doctor paths
101
+ Use `--remove-user-data` on Linux or `-RemoveUserData` on Windows only when you also want to remove config, logs, history, and downloaded model data.
103
102
 
104
- ## Models
103
+ Dictate is built to be useful, but it has real-world risks. Support and maintenance are best-effort, and important output should be checked before you rely on it. Report issues so they can be fixed; if Dictate saves you time and you have the means, paid support helps keep the work moving. Arc Forge terms are at <https://arcforge.au/terms>.
105
104
 
106
- Supported provider defaults:
105
+ ## Usage
107
106
 
108
- - `faster-whisper/turbo` for local transcription
109
- - `openai/gpt-4o-mini-transcribe`
110
- - `xai/grok-speech-to-text`
111
- - `gemini/gemini-3-flash-preview`
107
+ Tray mode (default):
112
108
 
113
- Local transcription can use CPU or GPU where supported. Hosted providers require
114
- an API key before they can be selected.
109
+ ```bash
110
+ dictate
111
+ ```
115
112
 
116
- ## Commands
113
+ Headless daemon (no tray icon):
117
114
 
118
115
  ```bash
119
- dictate
120
116
  dictate --no-tray
117
+ ```
118
+
119
+ One-shot (record until Enter, print to stdout):
120
+
121
+ ```bash
121
122
  dictate --once
123
+ ```
124
+
125
+ One-shot (record until Enter, copy to clipboard):
126
+
127
+ ```bash
122
128
  dictate --once --copy
129
+ ```
130
+
131
+ Run diagnostics:
132
+
133
+ ```bash
123
134
  dictate doctor --quick
124
135
  dictate doctor --quick --fix
136
+ dictate doctor --quick --update-paths
125
137
  dictate doctor --check-model-load
126
138
  ```
127
139
 
128
- Hotword and model options are available from Settings. CLI flags still exist for
129
- automation and testing:
140
+ Select model/device/compute-type/language:
130
141
 
131
142
  ```bash
132
143
  dictate --stt-backend faster-whisper --model turbo
133
144
  dictate --stt-backend openai --model gpt-4o-mini-transcribe
134
145
  dictate --stt-backend xai --model grok-speech-to-text
135
146
  dictate --stt-backend gemini --model gemini-3-flash-preview
136
- dictate --add-hotword AcmeWidget
147
+ dictate --device cpu
148
+ dictate --compute-type float16
149
+ dictate --language en
150
+ dictate --lexicon-mode hybrid
151
+ ```
152
+
153
+ `--compute-type` affects `faster-whisper` only. Hosted API backends ignore local device and compute settings.
154
+ For hosted backends, use the tray **API Key** provider menu or Windows controls to store keys in the OS secret store
155
+ (Secret Service/libsecret on Linux, Windows Credential Manager on Windows). Dictate never writes raw API keys
156
+ to `config.yaml`. Environment variables such as `DICTATE_OPENAI_API_KEY`, `DICTATE_XAI_API_KEY`, and
157
+ `DICTATE_GEMINI_API_KEY` still take priority, and advanced users can keep using `*_api_key_command` config
158
+ entries that call their own secret manager.
159
+
160
+ Manage lexical post-corrections:
161
+
162
+ ```bash
163
+ dictate --add-lexicon-replacement kinneri=canary
164
+ dictate --remove-lexicon-replacement kinneri
165
+ dictate --list-lexicon-replacements
166
+ ```
167
+
168
+ ## STT Backends
169
+
170
+ Supported defaults for low-latency dictation:
171
+
172
+ - Local default: `faster-whisper` with `turbo`
173
+ - Hosted OpenAI route: `openai` with `gpt-4o-mini-transcribe`
174
+ - Hosted xAI route: `xai` with `grok-speech-to-text`
175
+ - Hosted Gemini route: `gemini` with `gemini-3-flash-preview`
176
+
177
+ Examples:
178
+
179
+ ```bash
180
+ # Local Whisper Turbo
181
+ dictate --stt-backend faster-whisper --model turbo --language en
182
+
183
+ # Hosted transcription path, useful when local GPU should be reserved for other work
184
+ OPENAI_API_KEY=... dictate --stt-backend openai --model gpt-4o-mini-transcribe --language en
185
+
186
+ # Hosted xAI path with keyterm biasing from configured hotwords
187
+ XAI_API_KEY=... dictate --stt-backend xai --model grok-speech-to-text --language en
188
+
189
+ # Hosted Gemini path using Gemini audio understanding
190
+ GEMINI_API_KEY=... dictate --stt-backend gemini --model gemini-3-flash-preview --language en
191
+
192
+ # Desktop autostart can use keys stored from the tray API Key provider menu.
193
+ # Advanced external secret-manager helpers are still supported:
194
+ # stt_backend: openai
195
+ # stt_model: gpt-4o-mini-transcribe
196
+ # openai_api_key_command: /home/samuelrodda/.local/bin/dictate-openai-key
197
+ # stt_backend: xai
198
+ # stt_model: grok-speech-to-text
199
+ # xai_api_key_command: /home/samuelrodda/.local/bin/dictate-xai-key
200
+ # stt_backend: gemini
201
+ # stt_model: gemini-3-flash-preview
202
+ # gemini_api_key_command: /home/samuelrodda/.local/bin/dictate-gemini-key
203
+ ```
204
+
205
+ The app UI exposes one local Whisper option: `faster-whisper/turbo`.
206
+
207
+ Force typing backend (daemon modes):
208
+
209
+ ```bash
210
+ dictate --type-backend xdotool
211
+ dictate --type-backend wtype
212
+ dictate --type-backend ydotool
213
+ dictate --type-backend pynput
214
+ ```
215
+
216
+ ## Hotwords
217
+
218
+ Hotwords improve recognition of custom vocabulary (project names, technical terms, etc.).
219
+
220
+ Manage saved hotwords:
221
+
222
+ ```bash
223
+ dictate --add-hotword Kubernetes
224
+ dictate --add-hotword OpenBao,Vikunja
225
+ dictate --remove-hotword Vikunja
137
226
  dictate --list-hotwords
138
227
  ```
139
228
 
140
- ## State
229
+ Hotwords are saved to the platform config file. On Linux this is usually `~/.config/dictate/config.yaml`; on Windows this is `%APPDATA%\dictate\config.yaml`. Fresh installs created through the repo installer seed this file from [`config/default-config.yaml`](config/default-config.yaml).
230
+
231
+ You can also pass one-off hotwords without saving them:
232
+
233
+ ```bash
234
+ dictate --hotwords "Kubernetes,OpenBao"
235
+ ```
236
+
237
+ CLI `--hotwords` and saved hotwords are merged at startup.
238
+
239
+ Lexical adaptation modes (backend-agnostic):
240
+
241
+ - `native` (default): use backend-native hotword biasing (effective on `faster-whisper`)
242
+ - `prompt`: use prompt/context biasing (effective on prompt-capable API backends)
243
+ - `post`: run lightweight post-correction against hotwords/replacements
244
+ - `hybrid`: apply all supported strategies
245
+
246
+ Mode behavior by backend:
247
+
248
+ - `faster-whisper`: `native`/`hybrid` applies decode-time hotword bias.
249
+ - `openai` and `gemini`: use `prompt` or `hybrid` for prompt/context biasing, and/or `post`/`hybrid` for post-correction.
250
+ - `xai`: `native`/`hybrid` passes keyterms to the hosted STT API.
251
+ - In `native` mode on backends without native hotwords, hotwords are ignored with a warning.
252
+
253
+ Examples:
254
+
255
+ ```bash
256
+ # Use prompt biasing with your hotwords list on a prompt-capable API backend
257
+ dictate --stt-backend gemini --lexicon-mode prompt
258
+
259
+ # Hybrid mode combines native/prompt/post where available
260
+ dictate --lexicon-mode hybrid
261
+
262
+ # Add explicit post-correction replacements
263
+ dictate --add-lexicon-replacement kinneri=canary
264
+ dictate --remove-lexicon-replacement kinneri
265
+ dictate --list-lexicon-replacements
266
+ ```
267
+
268
+ ## How It Works
269
+
270
+ **Hold Right Ctrl** to record, **release** to transcribe and type into the focused window.
271
+
272
+ If your desktop or keyboard reports `Right Ctrl` unreliably, set this in your platform config file:
273
+
274
+ ```yaml
275
+ push_to_talk_combo: ctrl_l
276
+ ```
277
+
278
+ In tray mode, a microphone icon appears in the system tray with a right-click menu:
279
+
280
+ - **Active** — checkbox to pause/resume listening for the hotkey. The icon switches to a muted microphone when paused.
281
+ - **Select Model** — switch between Local, OpenAI, xAI, and Gemini live.
282
+ - **Hotwords** — manage saved vocabulary when the selected backend can use it.
283
+ - **API Key** — store or clear OpenAI, xAI, and Gemini keys in the OS secret store.
284
+ - **Hotkeys** — configure the recording shortcut.
285
+ - **Recent History** — copy or paste a previous dictation. History keeps up to 20 entries and paginates the list.
286
+ - If switching fails, Dictate keeps the previous backend active and shows an error dialog.
287
+ - **Quit** — stops the daemon.
288
+
289
+ You can also quit from the terminal with `Ctrl+C`.
290
+
291
+ ## Notes And Troubleshooting
292
+
293
+ - First local run will likely download Whisper model files. Network is required once per model.
294
+ - Tray backend selections are persisted in the platform config file and used on startup unless CLI flags override them.
295
+ - Persisted STT selection keys:
296
+ - `stt_backend`
297
+ - `stt_model`
298
+ - `stt_device`
299
+ - `stt_compute_type`
300
+ - Additional recognized config keys:
301
+ - `push_to_talk_combo` (examples: `ctrl_r`, `ctrl_l`, `ctrl+space`, `ctrl+shift`)
302
+ - `lexicon_mode` (optional startup default; set manually in config)
303
+ - `lexicon_replacements` (managed by CLI replacement commands)
304
+ - `openai_api_key_command`, `xai_api_key_command`, `gemini_api_key_command`
305
+ - Preflight now checks STT backend readiness (dependency imports + CUDA visibility) before model load.
306
+ - Startup stderr is mirrored to logs:
307
+ - Linux latest run: `~/.local/share/dictate/logs/latest.log`
308
+ - Linux last non-zero exit: `~/.local/share/dictate/logs/last_failure.log`
309
+ - Windows logs: `%LOCALAPPDATA%\dictate\logs\`
310
+ - fallback when home path is not writable: system temp directory `dictate-logs`
311
+ - On Wayland:
312
+ - `xdotool` generally will not work for native Wayland apps.
313
+ - Prefer `wtype` (simple) or `ydotool` (may require extra setup/permissions).
314
+ - Global hotkeys can be restricted on some Wayland compositors; if your combo does not fire, try `push_to_talk_combo: ctrl_l` or `push_to_talk_combo: ctrl+space`, use `--once`, or run an X11 session.
315
+ - On Windows 11, the Start Menu shortcut named `Dictate` starts the native tray app. Use `dictate --no-tray --type-backend pynput` only when you explicitly want a headless daemon.
316
+ - If preflight reports missing tools, install them via your distro package manager (e.g. `xdotool`, `xclip`, `wtype`) or install the Windows extra with `pip install -e ".[windows]"`.
317
+ - Dictation uses the system default microphone input device. If your default input is misconfigured, fix it in your OS audio settings.
318
+ - If the app does not launch from GUI, run `dictate doctor --quick` and inspect the reported active log directory.
319
+
320
+ ## Benchmarking
321
+
322
+ Use the local benchmark harness to compare backends/models on your own accent and vocabulary:
141
323
 
142
- User state is local:
324
+ ```bash
325
+ dictate benchmark \
326
+ --manifest benchmarks/example_manifest.csv \
327
+ --audio-root benchmarks \
328
+ --stt-backend faster-whisper \
329
+ --model turbo \
330
+ --device auto \
331
+ --language en
332
+ ```
143
333
 
144
- ```text
145
- Linux:
146
- ~/.config/dictate/config.yaml
147
- ~/.local/share/dictate/
334
+ Legacy wrapper still works:
148
335
 
149
- Windows:
150
- %APPDATA%\dictate\config.yaml
151
- %LOCALAPPDATA%\dictate\
336
+ ```bash
337
+ uv run python scripts/benchmark_stt.py --help
152
338
  ```
153
339
 
154
- Repo defaults intentionally ship with `hotwords: []`. Hotwords are user-specific
155
- and should not be packaged into the public repo default config.
340
+ Create your own manifest with Australian-accent phrases and proper nouns. Format docs: `benchmarks/README.md`.
156
341
 
157
- ## Safety
342
+ ## Testing
158
343
 
159
- - Dictate does not intentionally write raw API keys to `config.yaml`.
160
- - API keys configured in the app use the OS secret store.
161
- - Dictation text can be sensitive; check logs and issue reports before sharing.
162
- - Important transcriptions should be verified before relying on them.
163
- - Support and maintenance are best-effort.
344
+ Run regression tests:
345
+
346
+ ```bash
347
+ uv run python -m unittest discover -s tests
348
+ ```
164
349
 
165
- ## Docs
350
+ ## Development
166
351
 
167
- - [Windows 11 support](docs/windows-11.md)
168
- - [Recent History spec](docs/recent-dictation-history-spec.md)
169
- - [Release/versioning](docs/release-versioning.md)
170
- - [Development streams](docs/development-streams.md)
171
- - [Security policy](SECURITY.md)
352
+ - Entry point: `dictate` is `dictate.__main__:main_with_logging` (see `src/dictate/__main__.py`).
353
+ - Core pipeline modules:
354
+ - audio capture: `src/dictate/audio.py`
355
+ - transcription engine: `src/dictate/engine.py`
356
+ - typing/clipboard outputs: `src/dictate/outputs.py`
357
+ - environment checks: `src/dictate/preflight.py`
358
+ - STT backends + registry: `src/dictate/stt/`
359
+ - Platform docs:
360
+ - Windows 11 stream: `docs/windows-11.md`
361
+ - Development streams: `docs/development-streams.md`
362
+ - CalVer releases: `docs/release-versioning.md`
172
363
 
173
364
  ## License
174
365
 
175
- MIT. See [LICENSE](LICENSE).
366
+ Apache-2.0 (see `LICENSE`). Preserve `NOTICE` when redistributing the project.
367
+ Contributions are accepted under the terms in `CONTRIBUTING.md`.
@@ -1,4 +1,32 @@
1
- hotwords: []
1
+ hotwords:
2
+ - Todoist
3
+ - Joplin
4
+ - Kubernetes
5
+ - Arc
6
+ - Bitwarden
7
+ - OpenBao
8
+ - Claude Code
9
+ - Prometheus
10
+ - Grafana
11
+ - Pi
12
+ - Arc Forge
13
+ - ASIC
14
+ - GIMP
15
+ - Ghostty
16
+ - Inkscape
17
+ - See
18
+ - see
19
+ - Seazona
20
+ - OpenClaw
21
+ - Aegis
22
+ - Cato
23
+ - Rodda
24
+ - Sure
25
+ - Moasure
26
+ - Schemantics
27
+ - root
28
+ - Xero
29
+ - Pixel Forge
2
30
  push_to_talk_combo: ctrl_r
3
31
  stt_backend: faster-whisper
4
32
  stt_compute_type: int8
@@ -179,7 +179,6 @@ function New-DictateShortcut {
179
179
  param(
180
180
  [string]$ShortcutPath,
181
181
  [string]$TargetPath,
182
- [string]$Arguments = "",
183
182
  [string]$WorkingDirectory,
184
183
  [string]$Description
185
184
  )
@@ -188,7 +187,6 @@ function New-DictateShortcut {
188
187
  $shell = New-Object -ComObject WScript.Shell
189
188
  $shortcut = $shell.CreateShortcut($ShortcutPath)
190
189
  $shortcut.TargetPath = $TargetPath
191
- $shortcut.Arguments = $Arguments
192
190
  $shortcut.WorkingDirectory = $WorkingDirectory
193
191
  $shortcut.Description = $Description
194
192
  if (Test-Path $iconPath) {
@@ -200,7 +198,6 @@ function New-DictateShortcut {
200
198
  function Install-StartMenuShortcut {
201
199
  param(
202
200
  [string]$TargetPath,
203
- [string]$Arguments = "",
204
201
  [string]$WorkingDirectory
205
202
  )
206
203
 
@@ -217,7 +214,7 @@ function Install-StartMenuShortcut {
217
214
  }
218
215
 
219
216
  Remove-Item -Force -ErrorAction SilentlyContinue -Path $legacyShortcutPath
220
- New-DictateShortcut -ShortcutPath $shortcutPath -TargetPath $TargetPath -Arguments $Arguments -WorkingDirectory $WorkingDirectory -Description "Start Dictate push-to-talk tray"
217
+ New-DictateShortcut -ShortcutPath $shortcutPath -TargetPath $TargetPath -WorkingDirectory $WorkingDirectory -Description "Start Dictate push-to-talk tray"
221
218
 
222
219
  Write-Host "==> Installed Start Menu shortcut: $shortcutPath"
223
220
  }
@@ -225,7 +222,6 @@ function Install-StartMenuShortcut {
225
222
  function Install-StartupShortcut {
226
223
  param(
227
224
  [string]$TargetPath,
228
- [string]$Arguments = "",
229
225
  [string]$WorkingDirectory
230
226
  )
231
227
 
@@ -243,7 +239,7 @@ function Install-StartupShortcut {
243
239
  New-Item -ItemType Directory -Force -Path $startupDir | Out-Null
244
240
 
245
241
  $shortcutPath = Join-Path $startupDir "Dictate.lnk"
246
- New-DictateShortcut -ShortcutPath $shortcutPath -TargetPath $TargetPath -Arguments $Arguments -WorkingDirectory $WorkingDirectory -Description "Start Dictate automatically at sign-in"
242
+ New-DictateShortcut -ShortcutPath $shortcutPath -TargetPath $TargetPath -WorkingDirectory $WorkingDirectory -Description "Start Dictate automatically at sign-in"
247
243
 
248
244
  Write-Host "==> Installed startup shortcut: $shortcutPath"
249
245
  }
@@ -257,7 +253,7 @@ function Register-InstalledApp {
257
253
  $keyPath = "HKCU:\Software\Microsoft\Windows\CurrentVersion\Uninstall\Dictate"
258
254
  New-Item -Force -Path $keyPath | Out-Null
259
255
  New-ItemProperty -Force -Path $keyPath -Name "DisplayName" -Value "Dictate" -PropertyType String | Out-Null
260
- New-ItemProperty -Force -Path $keyPath -Name "DisplayVersion" -Value "2026.5.18-1" -PropertyType String | Out-Null
256
+ New-ItemProperty -Force -Path $keyPath -Name "DisplayVersion" -Value "2026.5.18" -PropertyType String | Out-Null
261
257
  New-ItemProperty -Force -Path $keyPath -Name "Publisher" -Value "Arc Forge Labs" -PropertyType String | Out-Null
262
258
  New-ItemProperty -Force -Path $keyPath -Name "InstallLocation" -Value $InstallLocation -PropertyType String | Out-Null
263
259
  if (Test-Path $DisplayIcon) {
@@ -295,11 +291,8 @@ Invoke-Checked -Exe $venvPython -ArgumentList @("-m", "pip", "install", "-e", "$
295
291
 
296
292
  Seed-Config
297
293
  Write-LauncherScripts -ScriptsDir $scriptsDir
298
- $trayVbs = Join-Path $scriptsDir "dictate-tray.vbs"
299
- $wscript = Join-Path $env:WINDIR "System32\wscript.exe"
300
- $trayArgs = "`"$trayVbs`""
301
- Install-StartMenuShortcut -TargetPath $wscript -Arguments $trayArgs -WorkingDirectory $PSScriptRoot
302
- Install-StartupShortcut -TargetPath $wscript -Arguments $trayArgs -WorkingDirectory $PSScriptRoot
294
+ Install-StartMenuShortcut -TargetPath (Join-Path $scriptsDir "dictate-tray.vbs") -WorkingDirectory $PSScriptRoot
295
+ Install-StartupShortcut -TargetPath (Join-Path $scriptsDir "dictate-tray.vbs") -WorkingDirectory $PSScriptRoot
303
296
  Register-InstalledApp -InstallLocation $PSScriptRoot -DisplayIcon (Join-Path $PSScriptRoot "assets\dictate.ico")
304
297
 
305
298
  if (-not $NoPrepareTurbo) {
package/install.ps1 CHANGED
@@ -10,7 +10,7 @@ param(
10
10
  )
11
11
 
12
12
  $ErrorActionPreference = "Stop"
13
- $DictateVersion = "2026.5.18-1"
13
+ $DictateVersion = "2026.5.18"
14
14
 
15
15
  if (-not $ArchiveUrl) {
16
16
  $ArchiveUrl = "https://github.com/arcforgelabs/dictate/archive/refs/tags/v$DictateVersion.zip"
package/install.sh CHANGED
@@ -127,7 +127,7 @@ rm -f "$DESKTOP_DIR/dictate-settings.desktop"
127
127
  cat > "$DESKTOP_DIR/dictate.desktop" <<EOF
128
128
  [Desktop Entry]
129
129
  Name=Dictate
130
- Comment=Dictate into the focused app
130
+ Comment=Local voice-to-text with push-to-talk
131
131
  Exec=$HOME/.local/bin/dictate
132
132
  Icon=$ICON_PATH
133
133
  Type=Application
@@ -144,7 +144,7 @@ if [ "$STARTUP" -eq 1 ]; then
144
144
  cat > "$AUTOSTART_DIR/dictate.desktop" <<EOF
145
145
  [Desktop Entry]
146
146
  Name=Dictate
147
- Comment=Dictate into the focused app
147
+ Comment=Local voice-to-text with push-to-talk
148
148
  Exec=$HOME/.local/bin/dictate
149
149
  Icon=$ICON_PATH
150
150
  Type=Application
@@ -24,7 +24,7 @@ const scripts = isWindows
24
24
  ? {
25
25
  install: ["powershell.exe", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-File", join(packageRoot, "install.ps1")]],
26
26
  update: ["powershell.exe", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-File", join(packageRoot, "update.ps1")]],
27
- uninstall: ["powershell.exe", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-File", join(packageRoot, "uninstall.ps1")]],
27
+ uninstall: ["powershell.exe", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-File", join(packageRoot, "uninstall-windows.ps1")]],
28
28
  wizard: ["powershell.exe", ["-NoProfile", "-ExecutionPolicy", "Bypass", "-File", join(packageRoot, "install.ps1"), "-Wizard"]],
29
29
  }
30
30
  : {
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@iamsamuelrodda/dictate",
3
- "version": "2026.5.18-1",
4
- "description": "Installer shim for Dictate desktop dictation.",
5
- "license": "MIT",
3
+ "version": "2026.5.18",
4
+ "description": "Installer shim for Dictate desktop voice-to-text.",
5
+ "license": "Apache-2.0",
6
6
  "type": "module",
7
7
  "homepage": "https://github.com/arcforgelabs/dictate#readme",
8
8
  "repository": {
@@ -23,7 +23,6 @@
23
23
  "install-windows.ps1",
24
24
  "install-windows-wizard.ps1",
25
25
  "update-windows.ps1",
26
- "uninstall.ps1",
27
26
  "uninstall-windows.ps1",
28
27
  "install.sh",
29
28
  "update.sh",
@@ -35,7 +34,8 @@
35
34
  "assets/dictate-listening.png",
36
35
  "npm/",
37
36
  "README.md",
38
- "LICENSE"
37
+ "LICENSE",
38
+ "NOTICE"
39
39
  ],
40
40
  "publishConfig": {
41
41
  "access": "public",
package/update.ps1 CHANGED
@@ -10,7 +10,7 @@ param(
10
10
  )
11
11
 
12
12
  $ErrorActionPreference = "Stop"
13
- $DictateVersion = "2026.5.18-1"
13
+ $DictateVersion = "2026.5.18"
14
14
 
15
15
  if (-not $ArchiveUrl) {
16
16
  $ArchiveUrl = "https://github.com/arcforgelabs/dictate/archive/refs/tags/v$DictateVersion.zip"
package/uninstall.ps1 DELETED
@@ -1,51 +0,0 @@
1
- param(
2
- [string]$InstallRoot,
3
- [switch]$Quiet,
4
- [switch]$RemoveUserData
5
- )
6
-
7
- $ErrorActionPreference = "Stop"
8
-
9
- if (-not $InstallRoot) {
10
- $base = $env:LOCALAPPDATA
11
- if (-not $base) {
12
- $base = Join-Path $HOME "AppData\Local"
13
- }
14
- $InstallRoot = Join-Path $base "Dictate"
15
- }
16
-
17
- $installRootPath = [System.IO.Path]::GetFullPath($InstallRoot)
18
- $sourceDir = Join-Path $installRootPath "source"
19
- $uninstaller = Join-Path $sourceDir "uninstall-windows.ps1"
20
-
21
- if (Test-Path $uninstaller) {
22
- $args = @("-NoProfile", "-ExecutionPolicy", "Bypass", "-File", $uninstaller)
23
- if ($Quiet) { $args += "-Quiet" }
24
- if ($RemoveUserData) { $args += "-RemoveUserData" }
25
- & powershell @args
26
- if ($LASTEXITCODE -ne 0) {
27
- throw "Dictate Windows uninstaller failed with exit code $LASTEXITCODE."
28
- }
29
- } else {
30
- if (-not $Quiet) {
31
- Write-Host "Dictate source uninstaller not found: $uninstaller"
32
- Write-Host "Removing known Start Menu, startup, and Installed Apps entries."
33
- }
34
- $programsDir = Join-Path $env:APPDATA "Microsoft\Windows\Start Menu\Programs"
35
- if (-not $env:APPDATA) {
36
- $programsDir = Join-Path $HOME "AppData\Roaming\Microsoft\Windows\Start Menu\Programs"
37
- }
38
- Remove-Item -Force -ErrorAction SilentlyContinue -Path `
39
- (Join-Path $programsDir "Dictate.lnk"), `
40
- (Join-Path $programsDir "Dictate Controls.lnk"), `
41
- (Join-Path $programsDir "Startup\Dictate.lnk")
42
- Remove-Item -Recurse -Force -ErrorAction SilentlyContinue -Path "HKCU:\Software\Microsoft\Windows\CurrentVersion\Uninstall\Dictate"
43
- }
44
-
45
- if (Test-Path $installRootPath) {
46
- Remove-Item -Recurse -Force -Path $installRootPath
47
- }
48
-
49
- if (-not $Quiet) {
50
- Write-Host "Dictate removed from: $installRootPath"
51
- }