dikt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/cli.mjs +971 -0
  4. package/package.json +35 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 johxyz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # dikt
2
+
3
+ Voice dictation for the terminal. Record, transcribe, copy — zero npm dependencies.
4
+
5
+ Uses [Mistral's Voxtral](https://docs.mistral.ai/capabilities/audio/) for speech-to-text.
6
+
7
+ ## Install
8
+
9
+ ```
10
+ npm install -g dikt
11
+ ```
12
+
13
+ Requires [sox](https://sox.sourceforge.net/) for audio recording:
14
+
15
+ ```bash
16
+ # macOS
17
+ brew install sox
18
+
19
+ # Ubuntu/Debian
20
+ sudo apt install sox
21
+
22
+ # Arch
23
+ sudo pacman -S sox
24
+ ```
25
+
26
+ ## Setup
27
+
28
+ On first run, dikt will prompt you for your Mistral API key and model preferences:
29
+
30
+ ```
31
+ dikt setup
32
+ ```
33
+
34
+ Config is stored in `~/.config/dikt/config.json`.
35
+
36
+ ## Usage
37
+
38
+ ```
39
+ dikt
40
+ ```
41
+
42
+ This opens an interactive TUI where you can record, transcribe, and copy text.
43
+
44
+ ### Keys
45
+
46
+ | Key | Action |
47
+ |---|---|
48
+ | `Space` | Start / stop recording |
49
+ | `c` / `Enter` | Copy transcript to clipboard |
50
+ | `a` | Toggle auto-copy |
51
+ | `h` | Cycle through history |
52
+ | `r` | Re-transcribe last recording |
53
+ | `Esc` | Cancel recording |
54
+ | `s` | Re-run setup |
55
+ | `?` | Show keybindings |
56
+ | `q` | Quit |
57
+
58
+ ### Single-shot mode
59
+
60
+ ```bash
61
+ # Print transcript to stdout
62
+ dikt -q
63
+
64
+ # Output JSON
65
+ dikt --json
66
+
67
+ # Pipe to another tool
68
+ dikt -q | claude
69
+ ```
70
+
71
+ ## Environment variables
72
+
73
+ | Variable | Description |
74
+ |---|---|
75
+ | `DIKT_API_KEY` | Override API key |
76
+ | `DIKT_MODEL` | Override model (default: `voxtral-mini-latest`) |
77
+ | `DIKT_LANGUAGE` | Override language (default: auto) |
78
+ | `DIKT_TEMPERATURE` | Override temperature |
79
+ | `DIKT_CONTEXT_BIAS` | Override context bias |
80
+
81
+ ## License
82
+
83
+ MIT
package/cli.mjs ADDED
@@ -0,0 +1,971 @@
1
+ #!/usr/bin/env node
2
+ // dikt — voice dictation for the terminal
3
+ // Zero npm dependencies. Node.js built-ins only.
4
+
5
+ import fs from 'node:fs';
6
+ import path from 'node:path';
7
+ import os from 'node:os';
8
+ import readline from 'node:readline';
9
+ import { spawn, execFileSync } from 'node:child_process';
10
+
11
+ // ── ANSI helpers ──────────────────────────────────────────────────────────────
12
+
13
+ const ESC = '\x1b[';
14
+ let RESET = `${ESC}0m`;
15
+ let BOLD = `${ESC}1m`;
16
+ let DIM = `${ESC}2m`;
17
+ let RED = `${ESC}31m`;
18
+ let GREEN = `${ESC}32m`;
19
+ let YELLOW = `${ESC}33m`;
20
+ let GREY = `${ESC}90m`;
21
+ let WHITE = `${ESC}37m`;
22
+ let RED_BG = `${ESC}41m`;
23
+ const HIDE_CURSOR = `${ESC}?25l`;
24
+ const SHOW_CURSOR = `${ESC}?25h`;
25
+ const CLEAR_LINE = `${ESC}2K`;
26
+ const CLEAR_DOWN = `${ESC}J`;
27
+ const CLEAR_SCREEN = `${ESC}2J${ESC}H`;
28
+
29
+ if (process.env.NO_COLOR != null || process.env.TERM === 'dumb' || process.argv.includes('--no-color')) {
30
+ RESET = BOLD = DIM = RED = GREEN = YELLOW = GREY = WHITE = RED_BG = '';
31
+ }
32
+
33
+ const moveTo = (row, col = 1) => `${ESC}${row};${col}H`;
34
+
35
+ // ── Constants ─────────────────────────────────────────────────────────────────
36
+
37
+ const VERSION = '1.0.0';
38
+ const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
39
+ const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
40
+ const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
41
+ const MAX_HISTORY = 10;
42
+ const MIN_RECORDING_MS = 500;
43
+ const COST_PER_MIN = 0.003;
44
+ const SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
45
+
46
+ const EXIT_OK = 0;
47
+ const EXIT_DEPENDENCY = 1;
48
+ const EXIT_NO_TTY = 2;
49
+ const EXIT_CONFIG = 3;
50
+ const EXIT_TRANSCRIPTION = 4;
51
+
52
+ // ── Config ────────────────────────────────────────────────────────────────────
53
+
54
+ function loadConfig() {
55
+ try {
56
+ return JSON.parse(fs.readFileSync(CONFIG_FILE, 'utf8'));
57
+ } catch {
58
+ return null;
59
+ }
60
+ }
61
+
62
+ function saveConfig(cfg) {
63
+ fs.mkdirSync(CONFIG_DIR, { recursive: true });
64
+ fs.writeFileSync(CONFIG_FILE, JSON.stringify(cfg, null, 2) + '\n', { mode: 0o600 });
65
+ }
66
+
67
+ function applyEnvOverrides(cfg) {
68
+ if (process.env.DIKT_API_KEY) cfg.apiKey = process.env.DIKT_API_KEY;
69
+ if (process.env.DIKT_MODEL) cfg.model = process.env.DIKT_MODEL;
70
+ if (process.env.DIKT_LANGUAGE) cfg.language = process.env.DIKT_LANGUAGE;
71
+ if (process.env.DIKT_TEMPERATURE) cfg.temperature = parseFloat(process.env.DIKT_TEMPERATURE);
72
+ if (process.env.DIKT_CONTEXT_BIAS) cfg.contextBias = process.env.DIKT_CONTEXT_BIAS;
73
+ }
74
+
75
+ function validateConfig(cfg) {
76
+ const errors = [];
77
+ if (!cfg.apiKey || typeof cfg.apiKey !== 'string') {
78
+ errors.push('apiKey: must be a non-empty string');
79
+ }
80
+ if (!cfg.model || typeof cfg.model !== 'string') {
81
+ errors.push('model: must be a non-empty string');
82
+ }
83
+ if (cfg.temperature != null && (typeof cfg.temperature !== 'number' || isNaN(cfg.temperature) || cfg.temperature < 0 || cfg.temperature > 2)) {
84
+ errors.push('temperature: must be a number between 0 and 2');
85
+ }
86
+ return { valid: errors.length === 0, errors };
87
+ }
88
+
89
+ // ── Secret input ──────────────────────────────────────────────────────────────
90
+
91
+ function readSecret(prompt) {
92
+ return new Promise((resolve) => {
93
+ process.stderr.write(prompt);
94
+ const { stdin } = process;
95
+ stdin.setRawMode(true);
96
+ stdin.resume();
97
+ stdin.setEncoding('utf8');
98
+
99
+ let secret = '';
100
+
101
+ const cleanup = () => {
102
+ stdin.removeListener('data', onData);
103
+ stdin.setRawMode(false);
104
+ stdin.pause();
105
+ };
106
+
107
+ const onData = (ch) => {
108
+ switch (ch) {
109
+ case '\n':
110
+ case '\r':
111
+ case '\u0004': // Ctrl+D
112
+ cleanup();
113
+ process.stderr.write('\n');
114
+ resolve(secret);
115
+ break;
116
+ case '\u0003': // Ctrl+C
117
+ cleanup();
118
+ process.stderr.write('\n');
119
+ process.exit(EXIT_CONFIG);
120
+ break;
121
+ case '\u007F': // Backspace (macOS)
122
+ case '\b': // Backspace
123
+ if (secret.length > 0) {
124
+ secret = secret.slice(0, -1);
125
+ process.stderr.write('\b \b');
126
+ }
127
+ break;
128
+ default:
129
+ if (ch.charCodeAt(0) >= 32) {
130
+ secret += ch;
131
+ process.stderr.write('*');
132
+ }
133
+ break;
134
+ }
135
+ };
136
+
137
+ stdin.on('data', onData);
138
+ });
139
+ }
140
+
141
+ // ── Setup wizard ──────────────────────────────────────────────────────────────
142
+
143
+ async function setupWizard() {
144
+ const existing = loadConfig() || {};
145
+
146
+ process.stderr.write(`\n${BOLD} dikt — setup${RESET}\n`);
147
+ process.stderr.write(` ${DIM}Press Enter to keep the default shown in brackets.${RESET}\n\n`);
148
+
149
+ const apiKey = (await readSecret(` Mistral API key [${existing.apiKey ? '••••' + existing.apiKey.slice(-4) : ''}]: `)).trim()
150
+ || existing.apiKey || '';
151
+ if (!apiKey) {
152
+ process.stderr.write(`\n ${RED}API key is required.${RESET}\n\n`);
153
+ process.exit(EXIT_CONFIG);
154
+ }
155
+
156
+ const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
157
+ const ask = (q) => new Promise((res) => rl.question(q, res));
158
+
159
+ const model = (await ask(` Model [${existing.model || 'voxtral-mini-latest'}]: `)).trim()
160
+ || existing.model || 'voxtral-mini-latest';
161
+ const language = (await ask(` Language [${existing.language || 'auto'}]: `)).trim()
162
+ || existing.language || '';
163
+ const tempStr = (await ask(` Temperature [${existing.temperature ?? 'default'}]: `)).trim();
164
+ const temperature = tempStr ? parseFloat(tempStr) : (existing.temperature ?? null);
165
+ const contextBias = (await ask(` Context bias [${existing.contextBias || ''}]: `)).trim()
166
+ || existing.contextBias || '';
167
+
168
+ rl.close();
169
+
170
+ const cfg = { apiKey, model, language: language === 'auto' ? '' : language, temperature, contextBias, autoCopy: existing.autoCopy || false };
171
+ saveConfig(cfg);
172
+ process.stderr.write(`\n ${GREEN}✓${RESET} Saved to ${DIM}${CONFIG_FILE}${RESET}\n\n`);
173
+ return cfg;
174
+ }
175
+
176
+ // ── Prerequisites ─────────────────────────────────────────────────────────────
177
+
178
+ function checkSox() {
179
+ try {
180
+ execFileSync('sox', ['--version'], { stdio: 'pipe' });
181
+ } catch {
182
+ process.stderr.write(`\n${RED}${BOLD} sox not found.${RESET}\n\n`);
183
+ process.stderr.write(` dikt requires sox for audio recording. Install it:\n\n`);
184
+ if (process.platform === 'darwin') {
185
+ process.stderr.write(` ${BOLD}brew install sox${RESET}\n\n`);
186
+ } else if (process.platform === 'win32') {
187
+ process.stderr.write(` ${BOLD}choco install sox${RESET} or ${BOLD}scoop install sox${RESET}\n\n`);
188
+ } else {
189
+ process.stderr.write(` ${BOLD}sudo apt install sox${RESET} (Debian/Ubuntu)\n`);
190
+ process.stderr.write(` ${BOLD}sudo dnf install sox${RESET} (Fedora)\n`);
191
+ process.stderr.write(` ${BOLD}sudo pacman -S sox${RESET} (Arch)\n\n`);
192
+ }
193
+ process.exit(EXIT_DEPENDENCY);
194
+ }
195
+ }
196
+
197
+ function checkTTY() {
198
+ if (!process.stdin.isTTY) {
199
+ process.stderr.write('dikt must run in a terminal (TTY).\n');
200
+ process.exit(EXIT_NO_TTY);
201
+ }
202
+ }
203
+
204
+ // ── State ─────────────────────────────────────────────────────────────────────
205
+
206
+ const state = {
207
+ mode: 'idle', // idle | recording | transcribing | ready | copied | help
208
+ prevMode: '',
209
+ transcript: '',
210
+ wordCount: 0,
211
+ duration: 0, // recording duration in seconds
212
+ latency: 0, // transcription API time in ms
213
+ error: '',
214
+ history: [], // [{transcript, wordCount, duration, latency}]
215
+ historyIndex: -1, // -1 = current, 0..n = browsing history
216
+ recProc: null,
217
+ recStart: 0,
218
+ recFile: '',
219
+ timerInterval: null,
220
+ spinnerInterval: null,
221
+ spinnerFrame: 0,
222
+ copiedTimeout: null,
223
+ lastCtrlC: 0,
224
+ };
225
+
226
+ // ── TUI Rendering ─────────────────────────────────────────────────────────────
227
+
228
+ let config = {};
229
+
230
+ function getTermWidth() {
231
+ return process.stdout.columns || 60;
232
+ }
233
+
234
+ function render() {
235
+ const w = getTermWidth();
236
+ const header = ` dikt`;
237
+ const right = `[?] [q]uit `;
238
+ const pad = Math.max(0, w - header.length - right.length);
239
+
240
+ let out = moveTo(1);
241
+
242
+ if (state.mode === 'help') {
243
+ out += CLEAR_LINE + '\n';
244
+ out += CLEAR_LINE + '\n';
245
+ out += CLEAR_LINE + '\n';
246
+ out += CLEAR_LINE + '\n';
247
+ out += CLEAR_LINE + '\n';
248
+ out += CLEAR_LINE + '\n';
249
+ out += CLEAR_LINE + '\n';
250
+ out += renderHelp();
251
+ } else {
252
+ out += CLEAR_LINE + BOLD + header + ' '.repeat(pad) + DIM + right + RESET + '\n';
253
+ out += CLEAR_LINE + ` ${'─'.repeat(Math.max(0, w - 2))}` + '\n';
254
+ out += CLEAR_LINE + '\n';
255
+ out += CLEAR_LINE + renderKeybar() + '\n';
256
+ out += CLEAR_LINE + '\n';
257
+ out += CLEAR_LINE + renderStatus() + '\n';
258
+ out += CLEAR_LINE + '\n';
259
+ }
260
+
261
+ if (state.mode !== 'help') {
262
+ if (state.mode === 'idle' && !state.transcript) {
263
+ out += CLEAR_LINE + ` ${DIM}Press SPACE to start dictating.${RESET}` + '\n';
264
+ out += CLEAR_LINE + ` ${DIM}Press ? for all keybindings.${RESET}` + '\n';
265
+ } else {
266
+ const lines = wrapTranscript(w);
267
+ for (const line of lines) {
268
+ out += CLEAR_LINE + line + '\n';
269
+ }
270
+ }
271
+ }
272
+
273
+ if (state.mode !== 'help') out += CLEAR_LINE + renderMeta();
274
+ out += CLEAR_DOWN;
275
+
276
+ process.stdout.write(out);
277
+ }
278
+
279
+ function renderKeybar() {
280
+ if (state.mode === 'recording') {
281
+ return ` ${DIM}[SPACE]${RESET} Stop ${DIM}[ESC]${RESET} Cancel`;
282
+ }
283
+ const copyKey = state.transcript && !config.autoCopy ? `${DIM}[c/↵]${RESET} Copy ` : '';
284
+ const autoCopyKey = config.autoCopy ? `${DIM}[a]${RESET} Auto-copy ✓ ` : `${DIM}[a]${RESET} Auto-copy `;
285
+ const histKey = state.history.length ? `${DIM}[h]${RESET} History ` : '';
286
+ const retryKey = state.recFile ? `${DIM}[r]${RESET} Retry ` : '';
287
+ return ` ${DIM}[SPACE]${RESET} Record ${copyKey}${autoCopyKey}${histKey}${retryKey}`.trimEnd();
288
+ }
289
+
290
+ function renderStatus() {
291
+ switch (state.mode) {
292
+ case 'idle':
293
+ return ` ${GREY}● Idle${RESET}`;
294
+ case 'recording': {
295
+ const secs = state.duration.toFixed(1);
296
+ return ` ${RED}${BOLD}● Recording${RESET} ${RED}${secs}s${RESET}`;
297
+ }
298
+ case 'transcribing': {
299
+ const sp = SPINNER[state.spinnerFrame % SPINNER.length];
300
+ const hint = (Date.now() - state.lastCtrlC < 2000) ? ` ${DIM}Ctrl+C again to quit${RESET}` : '';
301
+ return ` ${YELLOW}${sp} Transcribing...${RESET}${hint}`;
302
+ }
303
+ case 'ready':
304
+ return ` ${GREEN}● Ready${RESET}`;
305
+ case 'copied':
306
+ return ` ${GREEN}${BOLD}● Copied!${RESET}`;
307
+ case 'help':
308
+ return ` ${GREY}? Help${RESET}`;
309
+ case 'error':
310
+ return ` ${RED}● ${state.error}${RESET}`;
311
+ default:
312
+ return ` ${GREY}● ${state.mode}${RESET}`;
313
+ }
314
+ }
315
+
316
+ function wrapTranscript(termWidth) {
317
+ const text = state.transcript;
318
+ if (!text) return [];
319
+ const indent = ' ';
320
+ const maxLen = termWidth - indent.length - 1; // leave 1 col margin
321
+ if (maxLen < 10) return [`${indent}${text}`];
322
+
323
+ const words = text.split(/(\s+)/);
324
+ const lines = [];
325
+ let cur = '';
326
+
327
+ for (const word of words) {
328
+ if (cur.length + word.length > maxLen && cur.length > 0) {
329
+ lines.push(cur);
330
+ cur = word.replace(/^\s+/, ''); // trim leading space on new line
331
+ } else {
332
+ cur += word;
333
+ }
334
+ }
335
+ if (cur) lines.push(cur);
336
+
337
+ return lines.map((line, i) => {
338
+ if (i === 0 && lines.length === 1) return `${indent}${DIM}"${RESET}${line}${DIM}"${RESET}`;
339
+ if (i === 0) return `${indent}${DIM}"${RESET}${line}`;
340
+ if (i === lines.length - 1) return `${indent}${line}${DIM}"${RESET}`;
341
+ return `${indent}${line}`;
342
+ });
343
+ }
344
+
345
+ function renderMeta() {
346
+ if (!state.transcript) return '';
347
+ const cost = (state.duration / 60 * COST_PER_MIN).toFixed(4);
348
+ const latencyStr = state.latency ? `${(state.latency / 1000).toFixed(1)}s` : '—';
349
+ const histLabel = state.historyIndex >= 0 ? ` · history ${state.historyIndex + 1}/${state.history.length}` : '';
350
+ return ` ${DIM}${state.wordCount} words · ${state.duration.toFixed(1)}s · latency ${latencyStr} · $${cost}${histLabel}${RESET}`;
351
+ }
352
+
353
+ function renderHelp() {
354
+ let out = '';
355
+ out += CLEAR_LINE + ` ${BOLD}Keybindings${RESET}` + '\n';
356
+ out += CLEAR_LINE + '\n';
357
+ out += CLEAR_LINE + ` ${BOLD}SPACE${RESET} Start / stop recording` + '\n';
358
+ out += CLEAR_LINE + ` ${BOLD}c${RESET} ${BOLD}Enter${RESET} Copy transcript to clipboard` + '\n';
359
+ out += CLEAR_LINE + ` ${BOLD}a${RESET} Toggle auto-copy to clipboard` + '\n';
360
+ out += CLEAR_LINE + ` ${BOLD}h${RESET} Cycle through history` + '\n';
361
+ out += CLEAR_LINE + ` ${BOLD}r${RESET} Re-transcribe last recording` + '\n';
362
+ out += CLEAR_LINE + ` ${BOLD}Esc${RESET} Cancel current recording` + '\n';
363
+ out += CLEAR_LINE + ` ${BOLD}s${RESET} Re-run setup wizard` + '\n';
364
+ out += CLEAR_LINE + ` ${BOLD}?${RESET} Show this help` + '\n';
365
+ out += CLEAR_LINE + ` ${BOLD}q${RESET} Quit (also Ctrl+C)` + '\n';
366
+ out += CLEAR_LINE + '\n';
367
+ out += CLEAR_LINE + ` ${DIM}Press any key to return.${RESET}` + '\n';
368
+ return out;
369
+ }
370
+
371
+ function renderStatusLine() {
372
+ process.stdout.write(moveTo(6) + CLEAR_LINE + renderStatus());
373
+ // Also update keybar since available keys change with state
374
+ process.stdout.write(moveTo(4) + CLEAR_LINE + renderKeybar());
375
+ }
376
+
377
+ function renderAll() {
378
+ render();
379
+ }
380
+
381
+ // ── Clipboard ─────────────────────────────────────────────────────────────────
382
+
383
+ let clipboardCmd = null;
384
+ let clipboardChecked = false;
385
+
386
+ function getClipboardCommand() {
387
+ if (clipboardChecked) return clipboardCmd;
388
+ clipboardChecked = true;
389
+
390
+ if (process.platform === 'darwin') {
391
+ clipboardCmd = ['pbcopy'];
392
+ return clipboardCmd;
393
+ }
394
+
395
+ // Check for WSL
396
+ try {
397
+ const procVersion = fs.readFileSync('/proc/version', 'utf8');
398
+ if (/microsoft/i.test(procVersion)) {
399
+ clipboardCmd = ['clip.exe'];
400
+ return clipboardCmd;
401
+ }
402
+ } catch {}
403
+
404
+ // Linux/FreeBSD — try xclip, then xsel
405
+ for (const cmd of [['xclip', '-selection', 'clipboard'], ['xsel', '--clipboard']]) {
406
+ try {
407
+ execFileSync('which', [cmd[0]], { stdio: 'pipe' });
408
+ clipboardCmd = cmd;
409
+ return clipboardCmd;
410
+ } catch {}
411
+ }
412
+
413
+ return null;
414
+ }
415
+
416
+ function copy(text) {
417
+ if (!text) return;
418
+
419
+ const cmd = getClipboardCommand();
420
+ if (!cmd) {
421
+ state.mode = 'error';
422
+ state.error = 'No clipboard tool found (install xclip)';
423
+ renderAll();
424
+ return;
425
+ }
426
+
427
+ const proc = spawn(cmd[0], cmd.slice(1), { stdio: ['pipe', 'ignore', 'ignore'] });
428
+ proc.stdin.end(text);
429
+
430
+ state.mode = 'copied';
431
+ renderAll();
432
+
433
+ clearTimeout(state.copiedTimeout);
434
+ state.copiedTimeout = setTimeout(() => {
435
+ if (state.mode === 'copied') {
436
+ state.mode = 'ready';
437
+ renderAll();
438
+ }
439
+ }, 1500);
440
+ }
441
+
442
+ function toggleAutoCopy() {
443
+ config.autoCopy = !config.autoCopy;
444
+ saveConfig(config);
445
+ renderAll();
446
+ }
447
+
448
+ // ── Recording ─────────────────────────────────────────────────────────────────
449
+
450
+ function startRecording() {
451
+ state.error = '';
452
+
453
+ // Clean up previous recording file
454
+ if (state.recFile) {
455
+ try { fs.unlinkSync(state.recFile); } catch {}
456
+ }
457
+
458
+ state.recFile = path.join(os.tmpdir(), `dikt-${Date.now()}.wav`);
459
+ state.recStart = Date.now();
460
+ state.duration = 0;
461
+ state.mode = 'recording';
462
+ state.historyIndex = -1;
463
+
464
+ state.recProc = spawn('rec', ['-q', '-r', '16000', '-c', '1', '-b', '16', state.recFile], {
465
+ stdio: ['ignore', 'ignore', 'pipe'],
466
+ });
467
+
468
+ state.recProc.stderr.on('data', () => {}); // suppress sox warnings
469
+
470
+ state.recProc.on('error', (err) => {
471
+ state.mode = 'error';
472
+ state.error = err.code === 'ENOENT' ? 'sox/rec not found' : err.message;
473
+ state.recProc = null;
474
+ clearInterval(state.timerInterval);
475
+ renderAll();
476
+ });
477
+
478
+ state.recProc.on('close', () => {
479
+ state.recProc = null;
480
+ });
481
+
482
+ state.timerInterval = setInterval(() => {
483
+ state.duration = (Date.now() - state.recStart) / 1000;
484
+ renderStatusLine();
485
+ }, 200);
486
+
487
+ renderAll();
488
+ }
489
+
490
+ function stopRecording() {
491
+ if (!state.recProc) return;
492
+
493
+ clearInterval(state.timerInterval);
494
+ state.duration = (Date.now() - state.recStart) / 1000;
495
+
496
+ const proc = state.recProc;
497
+ state.recProc = null;
498
+
499
+ if (state.duration * 1000 < MIN_RECORDING_MS) {
500
+ proc.kill('SIGTERM');
501
+ state.mode = 'error';
502
+ state.error = 'Recording too short';
503
+ renderAll();
504
+ return;
505
+ }
506
+
507
+ // Wait for rec to finish writing the WAV file before transcribing
508
+ state.mode = 'transcribing';
509
+ state.spinnerFrame = 0;
510
+ renderAll();
511
+
512
+ proc.on('close', () => {
513
+ transcribe(state.recFile);
514
+ });
515
+ proc.kill('SIGTERM');
516
+ }
517
+
518
+ function cancelRecording() {
519
+ if (!state.recProc) return;
520
+
521
+ clearInterval(state.timerInterval);
522
+ state.recProc.kill('SIGTERM');
523
+ state.recProc = null;
524
+
525
+ // Clean up temp file
526
+ if (state.recFile) {
527
+ try { fs.unlinkSync(state.recFile); } catch {}
528
+ state.recFile = '';
529
+ }
530
+
531
+ // Restore previous duration/latency from history if available
532
+ if (state.history.length) {
533
+ state.duration = state.history[0].duration;
534
+ state.latency = state.history[0].latency;
535
+ }
536
+
537
+ state.mode = state.transcript ? 'ready' : 'idle';
538
+ renderAll();
539
+ }
540
+
541
+ // ── Transcription ─────────────────────────────────────────────────────────────
542
+
543
+ async function transcribe(wavPath) {
544
+ state.mode = 'transcribing';
545
+ state.spinnerFrame = 0;
546
+ renderAll();
547
+
548
+ state.spinnerInterval = setInterval(() => {
549
+ state.spinnerFrame++;
550
+ renderStatusLine();
551
+ }, 80);
552
+
553
+ try {
554
+ const blob = await fs.openAsBlob(wavPath, { type: 'audio/wav' });
555
+ const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
556
+ const fd = new FormData();
557
+ fd.append('file', file);
558
+ fd.append('model', config.model);
559
+ if (config.language) fd.append('language', config.language);
560
+ if (config.temperature != null) fd.append('temperature', String(config.temperature));
561
+ if (config.contextBias) fd.append('context_bias', config.contextBias);
562
+
563
+ const t0 = Date.now();
564
+ const resp = await fetch('https://api.mistral.ai/v1/audio/transcriptions', {
565
+ method: 'POST',
566
+ headers: { Authorization: `Bearer ${config.apiKey}` },
567
+ body: fd,
568
+ signal: AbortSignal.timeout(30_000),
569
+ });
570
+ state.latency = Date.now() - t0;
571
+
572
+ if (!resp.ok) {
573
+ const raw = await resp.text().catch(() => '');
574
+ let msg;
575
+ try {
576
+ const e = JSON.parse(raw);
577
+ msg = e.message;
578
+ if (!msg && Array.isArray(e.detail)) {
579
+ msg = e.detail.map(d => [d.loc?.join('.'), d.msg].filter(Boolean).join(': ')).join('; ');
580
+ } else if (!msg && e.detail) {
581
+ msg = typeof e.detail === 'string' ? e.detail : JSON.stringify(e.detail);
582
+ }
583
+ if (!msg) msg = raw;
584
+ } catch {
585
+ msg = raw || `HTTP ${resp.status}`;
586
+ }
587
+ if (resp.status === 401) msg += ' — press [s] to reconfigure';
588
+ throw new Error(msg);
589
+ }
590
+
591
+ const data = await resp.json();
592
+ const text = (data.text || '').trim();
593
+
594
+ if (!text) {
595
+ state.mode = 'error';
596
+ state.error = 'No speech detected';
597
+ } else {
598
+ state.transcript = text;
599
+ state.wordCount = text.split(/\s+/).filter(Boolean).length;
600
+ state.mode = 'ready';
601
+
602
+ // Push to history
603
+ state.history.unshift({ transcript: text, wordCount: state.wordCount, duration: state.duration, latency: state.latency });
604
+ if (state.history.length > MAX_HISTORY) state.history.pop();
605
+ state.historyIndex = -1;
606
+ }
607
+ } catch (err) {
608
+ state.mode = 'error';
609
+ state.error = err.name === 'TimeoutError' ? 'Transcription timed out' : err.message;
610
+ } finally {
611
+ clearInterval(state.spinnerInterval);
612
+ cleanupRecFile();
613
+ if (config.autoCopy && state.mode === 'ready') copy(state.transcript);
614
+ renderAll();
615
+ }
616
+ }
617
+
618
+ function cleanupRecFile() {
619
+ // On success: delete the file (user got their transcript)
620
+ // On error: keep the file so user can press [r] to retry
621
+ if (state.mode !== 'error' && state.recFile) {
622
+ try { fs.unlinkSync(state.recFile); } catch {}
623
+ state.recFile = '';
624
+ }
625
+ }
626
+
627
+ function cleanupTempFiles() {
628
+ if (state.recFile) {
629
+ try { fs.unlinkSync(state.recFile); } catch {}
630
+ }
631
+ }
632
+
633
+ // ── History ───────────────────────────────────────────────────────────────────
634
+
635
+ function cycleHistory() {
636
+ if (!state.history.length) return;
637
+
638
+ state.historyIndex++;
639
+ if (state.historyIndex >= state.history.length) {
640
+ state.historyIndex = 0;
641
+ }
642
+
643
+ const entry = state.history[state.historyIndex];
644
+ state.transcript = entry.transcript;
645
+ state.wordCount = entry.wordCount;
646
+ state.duration = entry.duration;
647
+ state.latency = entry.latency;
648
+ state.mode = 'ready';
649
+ renderAll();
650
+ }
651
+
652
+ // ── Keypress Handler ──────────────────────────────────────────────────────────
653
+
654
+ function handleKey(str, key) {
655
+ // Ctrl+C handling — double-press required during transcription
656
+ if (key && key.ctrl && key.name === 'c') {
657
+ if (state.mode === 'transcribing') {
658
+ const now = Date.now();
659
+ if (now - state.lastCtrlC < 2000) {
660
+ quit();
661
+ return;
662
+ }
663
+ state.lastCtrlC = now;
664
+ renderStatusLine();
665
+ return;
666
+ }
667
+ quit();
668
+ return;
669
+ }
670
+
671
+ const ch = str || '';
672
+
673
+ switch (state.mode) {
674
+ case 'help':
675
+ state.mode = state.prevMode || 'idle';
676
+ state.prevMode = '';
677
+ renderAll();
678
+ break;
679
+
680
+ case 'recording':
681
+ if (ch === ' ') stopRecording();
682
+ else if (key && key.name === 'escape') cancelRecording();
683
+ else if (ch === 'q') quit();
684
+ break;
685
+
686
+ case 'transcribing':
687
+ // Only quit allowed during transcription
688
+ if (ch === 'q') quit();
689
+ break;
690
+
691
+ default: // idle, ready, copied, error
692
+ if (ch === '?') {
693
+ clearTimeout(state.copiedTimeout);
694
+ state.prevMode = state.mode === 'copied' ? 'ready' : state.mode;
695
+ state.mode = 'help';
696
+ renderAll();
697
+ }
698
+ else if (ch === ' ') startRecording();
699
+ else if (ch === 'c' || (key && key.name === 'return')) copy(state.transcript);
700
+ else if (ch === 'a') toggleAutoCopy();
701
+ else if (ch === 'h') cycleHistory();
702
+ else if (ch === 'r' && state.recFile) retranscribe();
703
+ else if (ch === 's') runSetup();
704
+ else if (ch === 'q') quit();
705
+ break;
706
+ }
707
+ }
708
+
709
+ async function retranscribe() {
710
+ if (!state.recFile) return;
711
+ try {
712
+ fs.accessSync(state.recFile);
713
+ } catch {
714
+ state.mode = 'error';
715
+ state.error = 'Recording file no longer exists';
716
+ renderAll();
717
+ return;
718
+ }
719
+ transcribe(state.recFile);
720
+ }
721
+
722
+ async function runSetup() {
723
+ // Temporarily exit raw mode and detach keypress handler for the setup wizard
724
+ process.stdin.removeListener('keypress', handleKey);
725
+ process.stdin.setRawMode(false);
726
+ process.stdout.write(SHOW_CURSOR + CLEAR_SCREEN);
727
+
728
+ config = await setupWizard();
729
+ applyEnvOverrides(config);
730
+
731
+ process.stdin.resume();
732
+ process.stdin.setRawMode(true);
733
+ process.stdin.on('keypress', handleKey);
734
+ process.stdout.write(HIDE_CURSOR + CLEAR_SCREEN);
735
+ renderAll();
736
+ }
737
+
738
+ // ── Single-shot mode ──────────────────────────────────────────────────────────
739
+
740
+ async function runOnce(flags) {
741
+ const recFile = path.join(os.tmpdir(), `dikt-${Date.now()}.wav`);
742
+
743
+ try {
744
+ // Record with silence detection via sox silence effect
745
+ const recProc = spawn('rec', [
746
+ '-q', '-r', '16000', '-c', '1', '-b', '16',
747
+ recFile,
748
+ 'silence', '1', '0.1', '1%', '1', '2.0', '1%',
749
+ ], {
750
+ stdio: ['ignore', 'ignore', 'pipe'],
751
+ });
752
+
753
+ recProc.stderr.on('data', () => {});
754
+
755
+ // Ctrl+C stops recording gracefully
756
+ const sigHandler = () => recProc.kill('SIGTERM');
757
+ process.on('SIGINT', sigHandler);
758
+
759
+ const recStart = Date.now();
760
+ await new Promise((resolve) => recProc.on('close', resolve));
761
+ process.removeListener('SIGINT', sigHandler);
762
+ const duration = (Date.now() - recStart) / 1000;
763
+
764
+ if (duration < MIN_RECORDING_MS / 1000) {
765
+ process.stderr.write('Recording too short\n');
766
+ return EXIT_TRANSCRIPTION;
767
+ }
768
+
769
+ // Transcribe — Ctrl+C during this aborts the request
770
+ const ac = new AbortController();
771
+ const abortHandler = () => ac.abort();
772
+ process.on('SIGINT', abortHandler);
773
+
774
+ const blob = await fs.openAsBlob(recFile, { type: 'audio/wav' });
775
+ const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
776
+ const fd = new FormData();
777
+ fd.append('file', file);
778
+ fd.append('model', config.model);
779
+ if (config.language) fd.append('language', config.language);
780
+ if (config.temperature != null) fd.append('temperature', String(config.temperature));
781
+ if (config.contextBias) fd.append('context_bias', config.contextBias);
782
+
783
+ const t0 = Date.now();
784
+ const resp = await fetch('https://api.mistral.ai/v1/audio/transcriptions', {
785
+ method: 'POST',
786
+ headers: { Authorization: `Bearer ${config.apiKey}` },
787
+ body: fd,
788
+ signal: ac.signal,
789
+ });
790
+ const latency = Date.now() - t0;
791
+ process.removeListener('SIGINT', abortHandler);
792
+
793
+ if (!resp.ok) {
794
+ const raw = await resp.text().catch(() => '');
795
+ process.stderr.write(`Error: ${raw || `HTTP ${resp.status}`}\n`);
796
+ return EXIT_TRANSCRIPTION;
797
+ }
798
+
799
+ const data = await resp.json();
800
+ const text = (data.text || '').trim();
801
+
802
+ if (!text) {
803
+ process.stderr.write('No speech detected\n');
804
+ return EXIT_TRANSCRIPTION;
805
+ }
806
+
807
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
808
+
809
+ if (flags.json) {
810
+ process.stdout.write(JSON.stringify({ text, duration: parseFloat(duration.toFixed(1)), latency, words: wordCount }) + '\n');
811
+ } else {
812
+ process.stdout.write(text + '\n');
813
+ }
814
+
815
+ return EXIT_OK;
816
+ } catch (err) {
817
+ if (err.name === 'AbortError') {
818
+ process.stderr.write('Aborted\n');
819
+ } else {
820
+ process.stderr.write(`Error: ${err.message}\n`);
821
+ }
822
+ return EXIT_TRANSCRIPTION;
823
+ } finally {
824
+ try { fs.unlinkSync(recFile); } catch {}
825
+ }
826
+ }
827
+
828
+ // ── Graceful Exit ─────────────────────────────────────────────────────────────
829
+
830
+ function quit() {
831
+ clearInterval(state.timerInterval);
832
+ clearInterval(state.spinnerInterval);
833
+ clearTimeout(state.copiedTimeout);
834
+
835
+ if (state.recProc) {
836
+ state.recProc.kill('SIGTERM');
837
+ }
838
+
839
+ cleanupTempFiles();
840
+
841
+ const h = process.stdout.rows || 24;
842
+ process.stdout.write(SHOW_CURSOR + moveTo(h) + '\n');
843
+ process.stdin.setRawMode(false);
844
+ process.exit(EXIT_OK);
845
+ }
846
+
847
+ // ── Main ──────────────────────────────────────────────────────────────────────
848
+
849
+ async function main() {
850
+ const args = process.argv.slice(2);
851
+ const flags = {
852
+ json: args.includes('--json'),
853
+ quiet: args.includes('--quiet') || args.includes('-q'),
854
+ noInput: args.includes('--no-input'),
855
+ setup: args.includes('--setup') || args[0] === 'setup',
856
+ };
857
+
858
+ if (args.includes('--version')) {
859
+ console.log(`dikt v${VERSION}`);
860
+ process.exit(EXIT_OK);
861
+ }
862
+
863
+ if (args.includes('--help') || args.includes('-h')) {
864
+ console.log(`dikt v${VERSION} — voice dictation for the terminal
865
+
866
+ Usage: dikt [options] [command]
867
+
868
+ Commands:
869
+ setup Reconfigure API key and model
870
+
871
+ Options:
872
+ --setup Run setup wizard
873
+ --json Record once, output JSON to stdout
874
+ -q, --quiet Record once, print transcript to stdout
875
+ --no-input Fail if config is missing (no wizard)
876
+ --no-color Disable colored output
877
+ --version Show version
878
+ -h, --help Show this help
879
+
880
+ Keys (interactive mode):
881
+ SPACE Start/stop recording c / Enter Copy to clipboard
882
+ a Toggle auto-copy h Cycle history
883
+ r Re-transcribe Esc Cancel recording
884
+ s Re-run setup ? Show keybindings
885
+ q Quit (also Ctrl+C)
886
+
887
+ Examples:
888
+ dikt Start interactive dictation
889
+ dikt setup Reconfigure API key and model
890
+ dikt -q Record once, print transcript to stdout
891
+ dikt --json Record once, output JSON to stdout
892
+ dikt -q | claude Dictate a prompt to Claude Code
893
+
894
+ Environment variables:
895
+ DIKT_API_KEY Override API key from config
896
+ DIKT_MODEL Override model (default: voxtral-mini-latest)
897
+ DIKT_LANGUAGE Override language (default: auto)
898
+ DIKT_TEMPERATURE Override temperature
899
+ DIKT_CONTEXT_BIAS Override context bias
900
+
901
+ Exit codes:
902
+ 0 Success
903
+ 1 Missing dependency (sox)
904
+ 2 Not a terminal
905
+ 3 Configuration error
906
+ 4 Transcription error
907
+
908
+ Config: ${CONFIG_DIR}/config.json
909
+ Requires: sox (brew install sox)`);
910
+ process.exit(EXIT_OK);
911
+ }
912
+
913
+ checkSox();
914
+
915
+ // Load or setup config
916
+ if (flags.setup) {
917
+ checkTTY();
918
+ config = await setupWizard();
919
+ } else {
920
+ config = loadConfig();
921
+ if (!config) {
922
+ if (flags.noInput) {
923
+ process.stderr.write('No config found. Run `dikt setup` to configure.\n');
924
+ process.exit(EXIT_CONFIG);
925
+ }
926
+ checkTTY();
927
+ config = await setupWizard();
928
+ }
929
+ }
930
+
931
+ applyEnvOverrides(config);
932
+
933
+ const validation = validateConfig(config);
934
+ if (!validation.valid) {
935
+ for (const err of validation.errors) {
936
+ process.stderr.write(`Config error: ${err}\n`);
937
+ }
938
+ process.exit(EXIT_CONFIG);
939
+ }
940
+
941
+ // Single-shot mode: record once, output, exit
942
+ if (flags.json || flags.quiet) {
943
+ process.exit(await runOnce(flags));
944
+ }
945
+
946
+ // Interactive TUI mode
947
+ checkTTY();
948
+
949
+ // Enter raw TUI mode
950
+ process.stdout.write(HIDE_CURSOR + CLEAR_SCREEN);
951
+
952
+ readline.emitKeypressEvents(process.stdin);
953
+ process.stdin.setRawMode(true);
954
+ process.stdin.resume();
955
+ process.stdin.on('keypress', handleKey);
956
+
957
+ // Handle resize
958
+ process.stdout.on('resize', () => renderAll());
959
+
960
+ // Handle signals
961
+ process.on('SIGINT', quit);
962
+ process.on('SIGTERM', quit);
963
+
964
+ renderAll();
965
+ }
966
+
967
+ main().catch((err) => {
968
+ process.stdout.write(SHOW_CURSOR);
969
+ console.error(err);
970
+ process.exit(EXIT_DEPENDENCY);
971
+ });
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "dikt",
3
+ "version": "1.0.0",
4
+ "description": "Voice dictation for the terminal.",
5
+ "type": "module",
6
+ "bin": {
7
+ "dikt": "cli.mjs"
8
+ },
9
+ "files": [
10
+ "cli.mjs"
11
+ ],
12
+ "keywords": [
13
+ "dictation",
14
+ "voice",
15
+ "transcription",
16
+ "speech-to-text",
17
+ "cli",
18
+ "terminal",
19
+ "whisper",
20
+ "mistral"
21
+ ],
22
+ "author": "johxyz",
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/johxyz/dikt.git"
26
+ },
27
+ "homepage": "https://github.com/johxyz/dikt",
28
+ "license": "MIT",
29
+ "engines": {
30
+ "node": ">=20.0.0"
31
+ },
32
+ "scripts": {
33
+ "postinstall": "node -e \"try{require('child_process').execFileSync('sox',['--version'],{stdio:'pipe'})}catch{console.log('\\n Note: dikt requires sox for audio recording.\\n Install it: brew install sox (macOS) / sudo apt install sox (Linux)\\n')}\""
34
+ }
35
+ }