dikt 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +79 -7
  2. package/cli.mjs +141 -25
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -10,7 +10,7 @@ Uses [Mistral's Voxtral](https://docs.mistral.ai/capabilities/audio/) for speech
10
10
  npm install -g dikt
11
11
  ```
12
12
 
13
- Requires [sox](https://sox.sourceforge.net/) for audio recording:
13
+ Requires [sox](https://sox.sourceforge.net/) for audio recording (not needed for `--file`):
14
14
 
15
15
  ```bash
16
16
  # macOS
@@ -55,12 +55,6 @@ This opens an interactive TUI where you can record, transcribe, and copy text.
55
55
  | `?` | Show keybindings |
56
56
  | `q` | Quit |
57
57
 
58
- ### Update
59
-
60
- ```
61
- dikt update
62
- ```
63
-
64
58
  ### Single-shot mode
65
59
 
66
60
  ```bash
@@ -72,6 +66,84 @@ dikt --json
72
66
 
73
67
  # Pipe to another tool
74
68
  dikt -q | claude
69
+
70
+ # Wait longer before auto-stopping
71
+ dikt -q --silence 5
72
+ ```
73
+
74
+ ### Stream mode
75
+
76
+ Continuously transcribe, emitting chunks on pauses:
77
+
78
+ ```bash
79
+ dikt --stream
80
+
81
+ # Stream as JSON Lines
82
+ dikt --stream --json
83
+
84
+ # Stream as continuous flowing text
85
+ dikt --stream -n
86
+
87
+ # Stream continuously until Ctrl+C
88
+ dikt --stream --silence 0
89
+ ```
90
+
91
+ ### File mode
92
+
93
+ Transcribe an existing audio file (wav, mp3, m4a, flac, ogg, webm — no sox needed):
94
+
95
+ ```bash
96
+ dikt --file meeting.wav
97
+
98
+ # Save to a file (.json auto-enables JSON output)
99
+ dikt --file meeting.wav -o transcript.json
100
+ dikt --file meeting.wav -o transcript.txt
101
+
102
+ # With JSON output
103
+ dikt --file recording.mp3 --json
104
+ ```
105
+
106
+ ### Speaker identification & timestamps
107
+
108
+ ```bash
109
+ # Speaker labels
110
+ dikt -q --diarize
111
+
112
+ # Timestamps
113
+ dikt -q --timestamps segment
114
+ dikt -q --timestamps word
115
+ dikt -q --timestamps segment,word
116
+
117
+ # Combined with JSON
118
+ dikt -q --json --diarize
119
+ ```
120
+
121
+ ### Options
122
+
123
+ | Flag | Description |
124
+ |---|---|
125
+ | `--file <path>` | Transcribe an audio file (no mic needed) |
126
+ | `-o`, `--output <path>` | Write output to file (`.json` auto-enables JSON) |
127
+ | `--stream` | Stream transcription chunks on pauses |
128
+ | `--json` | Output JSON (single-shot or stream) |
129
+ | `-q`, `--quiet` | Record once, print transcript to stdout |
130
+ | `--silence <seconds>` | Silence duration before auto-stop (default: 2.0) |
131
+ | `--pause <seconds>` | Pause duration to split stream chunks (default: 1.0) |
132
+ | `--language <code>` | Language code, e.g. en, de, fr (default: auto) |
133
+ | `--timestamps <granularity>` | Add timestamps: segment, word, or segment,word |
134
+ | `--diarize` | Enable speaker identification |
135
+ | `-n`, `--no-newline` | Join stream chunks without newlines |
136
+ | `--no-color` | Disable colored output |
137
+ | `--no-input` | Fail if config is missing (no wizard) |
138
+ | `--setup` | Run setup wizard |
139
+ | `--update` | Update to latest version |
140
+ | `--version` | Show version |
141
+ | `-h`, `--help` | Show help |
142
+
143
+ ### Update
144
+
145
+ ```
146
+ dikt update
75
147
  ```
76
148
 
77
149
  ## Environment variables
package/cli.mjs CHANGED
@@ -39,7 +39,7 @@ const moveTo = (row, col = 1) => `${ESC}${row};${col}H`;
39
39
 
40
40
  // ── Constants ─────────────────────────────────────────────────────────────────
41
41
 
42
- const VERSION = '1.1.2';
42
+ const VERSION = '1.2.0';
43
43
  const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
44
44
  const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
45
45
  const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
@@ -576,6 +576,7 @@ function copy(text) {
576
576
  }
577
577
 
578
578
  const proc = spawn(cmd[0], cmd.slice(1), { stdio: ['pipe', 'ignore', 'ignore'] });
579
+ proc.on('error', () => {}); // swallow — clipboard is best-effort
579
580
  proc.stdin.end(text);
580
581
 
581
582
  state.mode = 'copied';
@@ -869,6 +870,9 @@ async function runSetup() {
869
870
  config = await setupWizard();
870
871
  applyEnvOverrides(config);
871
872
 
873
+ state.mode = state.transcript ? 'ready' : 'idle';
874
+ state.error = '';
875
+
872
876
  process.stdin.resume();
873
877
  process.stdin.setRawMode(true);
874
878
  process.stdin.on('keypress', handleKey);
@@ -1070,7 +1074,12 @@ async function runFile(flags) {
1070
1074
  const mime = mimeTypes[ext] || 'audio/wav';
1071
1075
  const file = new File([blob], path.basename(flags.file), { type: mime });
1072
1076
 
1073
- const result = await callTranscribeAPI(file, { timestamps: flags.timestamps, diarize: flags.diarize });
1077
+ const ac = new AbortController();
1078
+ const abortHandler = () => ac.abort();
1079
+ process.on('SIGINT', abortHandler);
1080
+
1081
+ const result = await callTranscribeAPI(file, { signal: ac.signal, timestamps: flags.timestamps, diarize: flags.diarize });
1082
+ process.removeListener('SIGINT', abortHandler);
1074
1083
 
1075
1084
  if (!result.text) {
1076
1085
  process.stderr.write('No speech detected\n');
@@ -1079,20 +1088,32 @@ async function runFile(flags) {
1079
1088
 
1080
1089
  const wordCount = result.text.split(/\s+/).filter(Boolean).length;
1081
1090
 
1091
+ let output;
1082
1092
  if (flags.json) {
1083
1093
  const out = buildJsonOutput(
1084
1094
  { text: result.text, latency: result.latency, words: wordCount },
1085
1095
  { segments: result.segments, words: result.words, timestamps: flags.timestamps, diarize: flags.diarize },
1086
1096
  );
1087
- process.stdout.write(JSON.stringify(out) + '\n');
1097
+ output = JSON.stringify(out, null, flags.output ? 2 : 0) + '\n';
1088
1098
  } else if (flags.diarize && result.segments) {
1089
- process.stdout.write(formatDiarizedText(result.segments) + '\n');
1099
+ output = formatDiarizedText(result.segments) + '\n';
1100
+ } else {
1101
+ output = result.text + '\n';
1102
+ }
1103
+
1104
+ if (flags.output) {
1105
+ fs.writeFileSync(flags.output, output);
1106
+ process.stderr.write(`Saved to ${flags.output}\n`);
1090
1107
  } else {
1091
- process.stdout.write(result.text + '\n');
1108
+ process.stdout.write(output);
1092
1109
  }
1093
1110
 
1094
1111
  return EXIT_OK;
1095
1112
  } catch (err) {
1113
+ if (err.name === 'AbortError') {
1114
+ process.stderr.write('Aborted\n');
1115
+ return EXIT_TRANSCRIPTION;
1116
+ }
1096
1117
  process.stderr.write(`Error: ${err.message}\n`);
1097
1118
  return EXIT_TRANSCRIPTION;
1098
1119
  }
@@ -1158,16 +1179,24 @@ async function runOnce(flags) {
1158
1179
 
1159
1180
  const wordCount = result.text.split(/\s+/).filter(Boolean).length;
1160
1181
 
1182
+ let output;
1161
1183
  if (flags.json) {
1162
1184
  const out = buildJsonOutput(
1163
1185
  { text: result.text, duration: parseFloat(duration.toFixed(1)), latency: result.latency, words: wordCount },
1164
1186
  { segments: result.segments, words: result.words, timestamps: flags.timestamps, diarize: flags.diarize },
1165
1187
  );
1166
- process.stdout.write(JSON.stringify(out) + '\n');
1188
+ output = JSON.stringify(out, null, flags.output ? 2 : 0) + '\n';
1167
1189
  } else if (flags.diarize && result.segments) {
1168
- process.stdout.write(formatDiarizedText(result.segments) + '\n');
1190
+ output = formatDiarizedText(result.segments) + '\n';
1191
+ } else {
1192
+ output = result.text + '\n';
1193
+ }
1194
+
1195
+ if (flags.output) {
1196
+ fs.writeFileSync(flags.output, output);
1197
+ process.stderr.write(`Saved to ${flags.output}\n`);
1169
1198
  } else {
1170
- process.stdout.write(result.text + '\n');
1199
+ process.stdout.write(output);
1171
1200
  }
1172
1201
 
1173
1202
  return EXIT_OK;
@@ -1204,6 +1233,7 @@ async function runStream(flags) {
1204
1233
  let chunkStart = Date.now();
1205
1234
  let chunkIndex = 0;
1206
1235
  const pending = [];
1236
+ const outputParts = []; // collect output for --output
1207
1237
 
1208
1238
  recProc.stdout.on('data', (chunk) => {
1209
1239
  chunks.push(chunk);
@@ -1230,17 +1260,23 @@ async function runStream(flags) {
1230
1260
  .then((result) => {
1231
1261
  if (!result.text) return;
1232
1262
  const wordCount = result.text.split(/\s+/).filter(Boolean).length;
1263
+ let chunk_output;
1233
1264
  if (flags.json) {
1234
1265
  const out = buildJsonOutput(
1235
1266
  { text: result.text, chunk: idx, duration: parseFloat(duration.toFixed(1)), latency: result.latency, words: wordCount },
1236
1267
  { segments: result.segments, words: result.words, timestamps: flags.timestamps, diarize: flags.diarize },
1237
1268
  );
1238
- process.stdout.write(JSON.stringify(out) + '\n');
1269
+ chunk_output = JSON.stringify(out, null, flags.output ? 2 : 0) + '\n';
1239
1270
  } else if (flags.diarize && result.segments) {
1240
1271
  const sep = flags.noNewline ? ' ' : '\n';
1241
- process.stdout.write(formatDiarizedText(result.segments) + sep);
1272
+ chunk_output = formatDiarizedText(result.segments) + sep;
1242
1273
  } else {
1243
- process.stdout.write(result.text + (flags.noNewline ? ' ' : '\n'));
1274
+ chunk_output = result.text + (flags.noNewline ? ' ' : '\n');
1275
+ }
1276
+ if (flags.output) {
1277
+ outputParts[idx] = chunk_output;
1278
+ } else {
1279
+ process.stdout.write(chunk_output);
1244
1280
  }
1245
1281
  })
1246
1282
  .catch((err) => {
@@ -1267,17 +1303,23 @@ async function runStream(flags) {
1267
1303
  const result = await transcribeBuffer(chunks, { timestamps: flags.timestamps, diarize: flags.diarize });
1268
1304
  if (result.text) {
1269
1305
  const wordCount = result.text.split(/\s+/).filter(Boolean).length;
1306
+ let chunk_output;
1270
1307
  if (flags.json) {
1271
1308
  const out = buildJsonOutput(
1272
1309
  { text: result.text, chunk: idx, duration: parseFloat(duration.toFixed(1)), latency: result.latency, words: wordCount },
1273
1310
  { segments: result.segments, words: result.words, timestamps: flags.timestamps, diarize: flags.diarize },
1274
1311
  );
1275
- process.stdout.write(JSON.stringify(out) + '\n');
1312
+ chunk_output = JSON.stringify(out, null, flags.output ? 2 : 0) + '\n';
1276
1313
  } else if (flags.diarize && result.segments) {
1277
1314
  const sep = flags.noNewline ? ' ' : '\n';
1278
- process.stdout.write(formatDiarizedText(result.segments) + sep);
1315
+ chunk_output = formatDiarizedText(result.segments) + sep;
1279
1316
  } else {
1280
- process.stdout.write(result.text + (flags.noNewline ? ' ' : '\n'));
1317
+ chunk_output = result.text + (flags.noNewline ? ' ' : '\n');
1318
+ }
1319
+ if (flags.output) {
1320
+ outputParts[idx] = chunk_output;
1321
+ } else {
1322
+ process.stdout.write(chunk_output);
1281
1323
  }
1282
1324
  }
1283
1325
  } catch (err) {
@@ -1288,8 +1330,13 @@ async function runStream(flags) {
1288
1330
  // Wait for any in-flight transcriptions to finish
1289
1331
  await Promise.allSettled(pending);
1290
1332
 
1333
+ if (flags.output && outputParts.length) {
1334
+ fs.writeFileSync(flags.output, outputParts.filter(Boolean).join(''));
1335
+ process.stderr.write(`Saved to ${flags.output}\n`);
1336
+ }
1337
+
1291
1338
  // Final newline for --no-newline so shell prompt starts on a new line
1292
- if (flags.noNewline && !flags.json) process.stdout.write('\n');
1339
+ if (!flags.output && flags.noNewline && !flags.json) process.stdout.write('\n');
1293
1340
 
1294
1341
  return EXIT_OK;
1295
1342
  } catch (err) {
@@ -1318,6 +1365,26 @@ function quit() {
1318
1365
 
1319
1366
  // ── Main ──────────────────────────────────────────────────────────────────────
1320
1367
 
1368
+ function flagVal(args, name, hint, { valid, numeric } = {}) {
1369
+ const i = args.indexOf(name);
1370
+ if (i === -1) return '';
1371
+ const v = args[i + 1];
1372
+ if (!v || v.startsWith('-')) {
1373
+ const h = hint ? ` (${hint})` : '';
1374
+ process.stderr.write(`Error: ${name} requires a value${h}\n`);
1375
+ process.exit(EXIT_CONFIG);
1376
+ }
1377
+ if (valid && !valid.includes(v)) {
1378
+ process.stderr.write(`Error: invalid value for ${name}: '${v}' (${hint})\n`);
1379
+ process.exit(EXIT_CONFIG);
1380
+ }
1381
+ if (numeric && !Number.isFinite(parseFloat(v))) {
1382
+ process.stderr.write(`Error: ${name} must be a number\n`);
1383
+ process.exit(EXIT_CONFIG);
1384
+ }
1385
+ return v;
1386
+ }
1387
+
1321
1388
  async function main() {
1322
1389
  const args = process.argv.slice(2);
1323
1390
  const flags = {
@@ -1326,15 +1393,42 @@ async function main() {
1326
1393
  noInput: args.includes('--no-input'),
1327
1394
  setup: args.includes('--setup') || args[0] === 'setup',
1328
1395
  stream: args.includes('--stream'),
1329
- silence: args.includes('--silence') ? (Number.isFinite(parseFloat(args[args.indexOf('--silence') + 1])) ? parseFloat(args[args.indexOf('--silence') + 1]) : 2.0) : 2.0,
1330
- pause: args.includes('--pause') ? parseFloat(args[args.indexOf('--pause') + 1]) || 1.0 : 1.0,
1331
- language: args.includes('--language') ? args[args.indexOf('--language') + 1] || '' : '',
1332
- file: args.includes('--file') ? args[args.indexOf('--file') + 1] || '' : '',
1396
+ silence: args.includes('--silence') ? parseFloat(flagVal(args, '--silence', 'seconds', { numeric: true })) : 2.0,
1397
+ pause: args.includes('--pause') ? parseFloat(flagVal(args, '--pause', 'seconds', { numeric: true })) : 1.0,
1398
+ language: flagVal(args, '--language', 'e.g. en, de, fr'),
1399
+ file: flagVal(args, '--file', 'path to audio file'),
1333
1400
  noNewline: args.includes('--no-newline') || args.includes('-n'),
1334
- timestamps: args.includes('--timestamps') ? args[args.indexOf('--timestamps') + 1] || '' : '',
1401
+ timestamps: flagVal(args, '--timestamps', 'segment, word, or segment,word', { valid: ['segment', 'word', 'segment,word'] }),
1335
1402
  diarize: args.includes('--diarize'),
1403
+ output: flagVal(args, '--output', 'path') || flagVal(args, '-o', 'path'),
1336
1404
  };
1337
1405
 
1406
+ // Reject unknown flags and arguments
1407
+ const knownFlags = new Set([
1408
+ '--json', '--quiet', '-q', '--no-input', '--setup', '--stream',
1409
+ '--no-newline', '-n', '--diarize', '--version', '--update',
1410
+ '--help', '-h', '--no-color',
1411
+ '--silence', '--pause', '--language', '--file', '--timestamps',
1412
+ '--output', '-o',
1413
+ ]);
1414
+ const knownCommands = new Set(['setup', 'update']);
1415
+ const valueTakers = new Set(['--silence', '--pause', '--language', '--file', '--timestamps', '--output', '-o']);
1416
+ for (let i = 0; i < args.length; i++) {
1417
+ const a = args[i];
1418
+ if (a.startsWith('-')) {
1419
+ if (!knownFlags.has(a)) {
1420
+ process.stderr.write(`Unknown flag: ${a}\nRun dikt --help for usage.\n`);
1421
+ process.exit(EXIT_CONFIG);
1422
+ }
1423
+ if (valueTakers.has(a)) i++; // skip value
1424
+ } else if (knownCommands.has(a)) {
1425
+ // ok — subcommand
1426
+ } else {
1427
+ process.stderr.write(`Unexpected argument: ${a}\nRun dikt --help for usage.\n`);
1428
+ process.exit(EXIT_CONFIG);
1429
+ }
1430
+ }
1431
+
1338
1432
  if (args.includes('--version')) {
1339
1433
  console.log(`dikt v${VERSION}`);
1340
1434
  process.exit(EXIT_OK);
@@ -1375,6 +1469,7 @@ Options:
1375
1469
  -q, --quiet Record once, print transcript to stdout
1376
1470
  --stream Stream transcription chunks on pauses
1377
1471
  --file <path> Transcribe an audio file (no mic needed)
1472
+ -o, --output <path> Write output to file (.json auto-enables JSON)
1378
1473
  --silence <seconds> Silence duration before auto-stop (default: 2.0)
1379
1474
  --pause <seconds> Pause duration to split chunks (default: 1.0)
1380
1475
  --language <code> Language code, e.g. en, de, fr (default: auto)
@@ -1404,6 +1499,8 @@ Examples:
1404
1499
  dikt -q | claude Dictate a prompt to Claude Code
1405
1500
  dikt update Update to the latest version
1406
1501
  dikt --file meeting.wav Transcribe an existing audio file
1502
+ dikt --file a.wav -o a.json Transcribe to a JSON file
1503
+ dikt --file a.wav -o a.txt Transcribe to a text file
1407
1504
  dikt --stream --silence 0 Stream continuously until Ctrl+C
1408
1505
  dikt --stream -n Stream as continuous flowing text
1409
1506
  dikt -q --json --diarize Transcribe with speaker labels
@@ -1447,6 +1544,7 @@ Requires: sox (brew install sox)`);
1447
1544
  if (flags.language) config.language = flags.language;
1448
1545
  if (!flags.timestamps && config.timestamps) flags.timestamps = config.timestamps;
1449
1546
  if (!flags.diarize && config.diarize) flags.diarize = true;
1547
+ if (flags.output && flags.output.endsWith('.json')) flags.json = true;
1450
1548
 
1451
1549
  const validation = validateConfig(config);
1452
1550
  if (!validation.valid) {
@@ -1457,14 +1555,27 @@ Requires: sox (brew install sox)`);
1457
1555
  }
1458
1556
 
1459
1557
  // Validate incompatible flag combinations
1558
+ // Only error when both sides are CLI-passed. When one comes from config,
1559
+ // let the explicit CLI flag win and silently drop the config value.
1560
+ const cliLanguage = args.includes('--language');
1561
+ const cliTimestamps = args.includes('--timestamps');
1562
+ const cliDiarize = args.includes('--diarize');
1460
1563
  const lang = config.language;
1461
1564
  if (lang && flags.timestamps) {
1462
- process.stderr.write('Error: --timestamps and --language cannot be used together\n');
1463
- process.exit(EXIT_CONFIG);
1565
+ if (cliLanguage && cliTimestamps) {
1566
+ process.stderr.write('Error: --timestamps and --language cannot be used together\n');
1567
+ process.exit(EXIT_CONFIG);
1568
+ }
1569
+ if (cliLanguage) flags.timestamps = '';
1570
+ else config.language = '';
1464
1571
  }
1465
1572
  if (lang && flags.diarize) {
1466
- process.stderr.write('Error: --diarize and --language cannot be used together\n');
1467
- process.exit(EXIT_CONFIG);
1573
+ if (cliLanguage && cliDiarize) {
1574
+ process.stderr.write('Error: --diarize and --language cannot be used together\n');
1575
+ process.exit(EXIT_CONFIG);
1576
+ }
1577
+ if (cliLanguage) flags.diarize = false;
1578
+ else config.language = '';
1468
1579
  }
1469
1580
  if (flags.diarize && flags.stream) {
1470
1581
  process.stderr.write('Error: --diarize is not compatible with --stream, use -q --diarize instead\n');
@@ -1488,6 +1599,11 @@ Requires: sox (brew install sox)`);
1488
1599
  process.exit(await runOnce(flags));
1489
1600
  }
1490
1601
 
1602
+ // Warn about flags that don't apply to interactive mode
1603
+ if (flags.output) {
1604
+ process.stderr.write(`Warning: --output is ignored in interactive mode. Use with --file, -q, or --stream.\n`);
1605
+ }
1606
+
1491
1607
  // Interactive TUI mode
1492
1608
  checkTTY();
1493
1609
 
@@ -1514,7 +1630,7 @@ Requires: sox (brew install sox)`);
1514
1630
  }
1515
1631
 
1516
1632
  main().catch((err) => {
1517
- process.stdout.write(SHOW_CURSOR + ALT_SCREEN_OFF);
1633
+ if (process.stdout.isTTY) process.stdout.write(SHOW_CURSOR + ALT_SCREEN_OFF);
1518
1634
  console.error(err);
1519
1635
  process.exit(EXIT_DEPENDENCY);
1520
1636
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dikt",
3
- "version": "1.1.2",
3
+ "version": "1.2.0",
4
4
  "description": "Voice dictation for the terminal.",
5
5
  "type": "module",
6
6
  "bin": {