dikt 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -4
- package/cli.mjs +582 -30
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -23,6 +23,11 @@ sudo apt install sox
|
|
|
23
23
|
sudo pacman -S sox
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
+
Optional dependencies for `--file` mode:
|
|
27
|
+
|
|
28
|
+
- [ffmpeg](https://ffmpeg.org/) — enables compression, chunked transcription of long files, and broader format support
|
|
29
|
+
- [yt-dlp](https://github.com/yt-dlp/yt-dlp) — enables transcribing audio from URLs (YouTube, podcasts, etc.)
|
|
30
|
+
|
|
26
31
|
## Setup
|
|
27
32
|
|
|
28
33
|
On first run, dikt will prompt you for your Mistral API key and model preferences:
|
|
@@ -90,7 +95,7 @@ dikt --stream --silence 0
|
|
|
90
95
|
|
|
91
96
|
### File mode
|
|
92
97
|
|
|
93
|
-
Transcribe an existing audio file (wav, mp3, m4a, flac, ogg, webm
|
|
98
|
+
Transcribe an existing audio file (wav, mp3, m4a, flac, ogg, webm, aac, wma, and more):
|
|
94
99
|
|
|
95
100
|
```bash
|
|
96
101
|
dikt --file meeting.wav
|
|
@@ -101,6 +106,10 @@ dikt --file meeting.wav -o transcript.txt
|
|
|
101
106
|
|
|
102
107
|
# With JSON output
|
|
103
108
|
dikt --file recording.mp3 --json
|
|
109
|
+
|
|
110
|
+
# Transcribe from a URL (requires yt-dlp)
|
|
111
|
+
dikt --file https://youtube.com/watch?v=VIDEO_ID
|
|
112
|
+
dikt --file https://youtube.com/watch?v=VIDEO_ID -o transcript.txt
|
|
104
113
|
```
|
|
105
114
|
|
|
106
115
|
### Speaker identification & timestamps
|
|
@@ -112,7 +121,7 @@ dikt -q --diarize
|
|
|
112
121
|
# Timestamps
|
|
113
122
|
dikt -q --timestamps segment
|
|
114
123
|
dikt -q --timestamps word
|
|
115
|
-
dikt
|
|
124
|
+
dikt --file lecture.mp3 --timestamps segment
|
|
116
125
|
|
|
117
126
|
# Combined with JSON
|
|
118
127
|
dikt -q --json --diarize
|
|
@@ -122,7 +131,7 @@ dikt -q --json --diarize
|
|
|
122
131
|
|
|
123
132
|
| Flag | Description |
|
|
124
133
|
|---|---|
|
|
125
|
-
| `--file <path>` | Transcribe
|
|
134
|
+
| `--file <path\|url>` | Transcribe audio file or URL (via yt-dlp) |
|
|
126
135
|
| `-o`, `--output <path>` | Write output to file (`.json` auto-enables JSON) |
|
|
127
136
|
| `--stream` | Stream transcription chunks on pauses |
|
|
128
137
|
| `--json` | Output JSON (single-shot or stream) |
|
|
@@ -130,7 +139,7 @@ dikt -q --json --diarize
|
|
|
130
139
|
| `--silence <seconds>` | Silence duration before auto-stop (default: 2.0) |
|
|
131
140
|
| `--pause <seconds>` | Pause duration to split stream chunks (default: 1.0) |
|
|
132
141
|
| `--language <code>` | Language code, e.g. en, de, fr (default: auto) |
|
|
133
|
-
| `--timestamps <granularity>` | Add timestamps: segment
|
|
142
|
+
| `--timestamps <granularity>` | Add timestamps: segment or word |
|
|
134
143
|
| `--diarize` | Enable speaker identification |
|
|
135
144
|
| `-n`, `--no-newline` | Join stream chunks without newlines |
|
|
136
145
|
| `--no-color` | Disable colored output |
|
package/cli.mjs
CHANGED
|
@@ -6,7 +6,10 @@ import fs from 'node:fs';
|
|
|
6
6
|
import path from 'node:path';
|
|
7
7
|
import os from 'node:os';
|
|
8
8
|
import readline from 'node:readline';
|
|
9
|
-
import { spawn, execFileSync } from 'node:child_process';
|
|
9
|
+
import { spawn, execFileSync, execFile as execFileCb } from 'node:child_process';
|
|
10
|
+
import { promisify } from 'node:util';
|
|
11
|
+
const execFileAsync = promisify(execFileCb);
|
|
12
|
+
import https from 'node:https';
|
|
10
13
|
|
|
11
14
|
// ── ANSI helpers ──────────────────────────────────────────────────────────────
|
|
12
15
|
|
|
@@ -37,9 +40,16 @@ if (process.env.NO_COLOR != null || process.env.TERM === 'dumb' || process.argv.
|
|
|
37
40
|
|
|
38
41
|
const moveTo = (row, col = 1) => `${ESC}${row};${col}H`;
|
|
39
42
|
|
|
43
|
+
function formatFileSize(bytes) {
|
|
44
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
45
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
46
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
47
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
48
|
+
}
|
|
49
|
+
|
|
40
50
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
41
51
|
|
|
42
|
-
const VERSION = '1.
|
|
52
|
+
const VERSION = '1.3.0';
|
|
43
53
|
const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
|
|
44
54
|
const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
|
|
45
55
|
const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
|
|
@@ -47,6 +57,56 @@ const MAX_HISTORY = 10;
|
|
|
47
57
|
const MIN_RECORDING_MS = 500;
|
|
48
58
|
const COST_PER_MIN = 0.003;
|
|
49
59
|
const SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
60
|
+
const TARGET_CHUNK_SEC = 270; // ~4.5 min target chunk size
|
|
61
|
+
const CHUNK_MIN_SEC = 360; // only chunk files longer than 6 minutes
|
|
62
|
+
const SPLIT_SEARCH_SEC = 30; // search ±30s around target for silence split point
|
|
63
|
+
const MIN_CHUNK_SEC = 30; // merge chunks shorter than this into neighbor
|
|
64
|
+
const MAX_PARALLEL = 4; // max concurrent API requests
|
|
65
|
+
const MIME_TYPES = { wav: 'audio/wav', mp3: 'audio/mpeg', ogg: 'audio/ogg', flac: 'audio/flac', opus: 'audio/ogg', webm: 'audio/webm', m4a: 'audio/mp4', aac: 'audio/aac', wma: 'audio/x-ms-wma', aif: 'audio/aiff', aiff: 'audio/aiff', mp4: 'audio/mp4', oga: 'audio/ogg', amr: 'audio/amr', caf: 'audio/x-caf' };
|
|
66
|
+
const COMPRESSIBLE = new Set(['wav', 'flac', 'aiff', 'aif', 'raw', 'caf']); // lossless formats worth re-encoding
|
|
67
|
+
|
|
68
|
+
function createStderrSpinner() {
|
|
69
|
+
let frame = 0;
|
|
70
|
+
let interval = null;
|
|
71
|
+
let currentMsg = '';
|
|
72
|
+
const isTTY = process.stderr.isTTY;
|
|
73
|
+
const render = () => {
|
|
74
|
+
const sp = SPINNER[frame++ % SPINNER.length];
|
|
75
|
+
process.stderr.write(`\r${CLEAR_LINE}${YELLOW}${sp}${RESET} ${currentMsg}`);
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
start(msg) {
|
|
80
|
+
currentMsg = msg;
|
|
81
|
+
if (isTTY) {
|
|
82
|
+
render();
|
|
83
|
+
interval = setInterval(render, 80);
|
|
84
|
+
} else {
|
|
85
|
+
process.stderr.write(`${currentMsg}\n`);
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
update(msg) {
|
|
89
|
+
currentMsg = msg;
|
|
90
|
+
if (isTTY) {
|
|
91
|
+
// Restart interval — prevents queued callbacks from firing after sync calls
|
|
92
|
+
if (interval) { clearInterval(interval); }
|
|
93
|
+
render();
|
|
94
|
+
interval = setInterval(render, 80);
|
|
95
|
+
} else {
|
|
96
|
+
process.stderr.write(`${msg}\n`);
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
stop(finalMsg) {
|
|
100
|
+
if (interval) { clearInterval(interval); interval = null; }
|
|
101
|
+
if (isTTY) {
|
|
102
|
+
process.stderr.write(`\r${CLEAR_LINE}`);
|
|
103
|
+
if (finalMsg) process.stderr.write(`${finalMsg}\n`);
|
|
104
|
+
} else if (finalMsg) {
|
|
105
|
+
process.stderr.write(`${finalMsg}\n`);
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
}
|
|
50
110
|
|
|
51
111
|
const EXIT_OK = 0;
|
|
52
112
|
const EXIT_DEPENDENCY = 1;
|
|
@@ -93,8 +153,8 @@ function validateConfig(cfg) {
|
|
|
93
153
|
|
|
94
154
|
// ── Setup wizard (form-based) ─────────────────────────────────────────────────
|
|
95
155
|
|
|
96
|
-
const TIMESTAMPS_DISPLAY = { '': 'off', 'segment': 'segment', 'word': 'word'
|
|
97
|
-
const TIMESTAMPS_VALUE = { 'off': '', 'segment': 'segment', 'word': 'word'
|
|
156
|
+
const TIMESTAMPS_DISPLAY = { '': 'off', 'segment': 'segment', 'word': 'word' };
|
|
157
|
+
const TIMESTAMPS_VALUE = { 'off': '', 'segment': 'segment', 'word': 'word' };
|
|
98
158
|
|
|
99
159
|
async function setupWizard() {
|
|
100
160
|
const existing = loadConfig() || {};
|
|
@@ -105,7 +165,7 @@ async function setupWizard() {
|
|
|
105
165
|
{ key: 'language', label: 'Language', type: 'text', value: '', display: existing.language || 'auto', fallback: existing.language || '' },
|
|
106
166
|
{ key: 'temperature', label: 'Temperature', type: 'text', value: '', display: existing.temperature != null ? String(existing.temperature) : 'default', fallback: existing.temperature != null ? String(existing.temperature) : '' },
|
|
107
167
|
{ key: 'contextBias', label: 'Context bias', type: 'text', value: '', display: existing.contextBias || '', fallback: existing.contextBias || '' },
|
|
108
|
-
{ key: 'timestamps', label: 'Timestamps', type: 'select', options: ['off', 'segment', 'word'
|
|
168
|
+
{ key: 'timestamps', label: 'Timestamps', type: 'select', options: ['off', 'segment', 'word'], idx: ['off', 'segment', 'word'].indexOf(TIMESTAMPS_DISPLAY[existing.timestamps || ''] || 'off') },
|
|
109
169
|
{ key: 'diarize', label: 'Diarize', type: 'select', options: ['off', 'on'], idx: existing.diarize ? 1 : 0 },
|
|
110
170
|
];
|
|
111
171
|
|
|
@@ -953,7 +1013,7 @@ function trimSilence(rawData) {
|
|
|
953
1013
|
return Buffer.concat(output);
|
|
954
1014
|
}
|
|
955
1015
|
|
|
956
|
-
async function callTranscribeAPI(file, { signal, timestamps, diarize } = {}) {
|
|
1016
|
+
async function callTranscribeAPI(file, { signal, timestamps, diarize, onProgress } = {}) {
|
|
957
1017
|
const fd = new FormData();
|
|
958
1018
|
fd.append('file', file);
|
|
959
1019
|
fd.append('model', config.model);
|
|
@@ -961,7 +1021,7 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize } = {}) {
|
|
|
961
1021
|
if (config.temperature != null) fd.append('temperature', String(config.temperature));
|
|
962
1022
|
if (config.contextBias) fd.append('context_bias', config.contextBias);
|
|
963
1023
|
if (timestamps) {
|
|
964
|
-
|
|
1024
|
+
fd.append('timestamp_granularities[]', timestamps);
|
|
965
1025
|
}
|
|
966
1026
|
if (diarize) {
|
|
967
1027
|
fd.append('diarize', 'true');
|
|
@@ -969,17 +1029,68 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize } = {}) {
|
|
|
969
1029
|
if (!timestamps) fd.append('timestamp_granularities[]', 'segment');
|
|
970
1030
|
}
|
|
971
1031
|
|
|
972
|
-
|
|
973
|
-
|
|
1032
|
+
// Use Request to serialize FormData into multipart body,
|
|
1033
|
+
// then send via node:https which has no hardcoded headersTimeout
|
|
1034
|
+
// (Node's built-in fetch/undici has a 300s headersTimeout that
|
|
1035
|
+
// cannot be configured without importing undici as a dependency).
|
|
1036
|
+
const req = new Request('https://api.mistral.ai/v1/audio/transcriptions', {
|
|
974
1037
|
method: 'POST',
|
|
975
1038
|
headers: { Authorization: `Bearer ${config.apiKey}` },
|
|
976
1039
|
body: fd,
|
|
977
|
-
|
|
1040
|
+
});
|
|
1041
|
+
const contentType = req.headers.get('content-type');
|
|
1042
|
+
const body = Buffer.from(await req.arrayBuffer());
|
|
1043
|
+
|
|
1044
|
+
const t0 = Date.now();
|
|
1045
|
+
const { status, raw } = await new Promise((resolve, reject) => {
|
|
1046
|
+
const hreq = https.request('https://api.mistral.ai/v1/audio/transcriptions', {
|
|
1047
|
+
method: 'POST',
|
|
1048
|
+
headers: {
|
|
1049
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
1050
|
+
'Content-Type': contentType,
|
|
1051
|
+
'Content-Length': body.length,
|
|
1052
|
+
},
|
|
1053
|
+
}, (res) => {
|
|
1054
|
+
const chunks = [];
|
|
1055
|
+
res.on('data', (c) => chunks.push(c));
|
|
1056
|
+
res.on('end', () => resolve({ status: res.statusCode, raw: Buffer.concat(chunks).toString() }));
|
|
1057
|
+
res.on('error', reject);
|
|
1058
|
+
});
|
|
1059
|
+
|
|
1060
|
+
hreq.on('error', (err) => {
|
|
1061
|
+
const ne = new Error(`Network error: ${err.message}`);
|
|
1062
|
+
ne.networkError = true;
|
|
1063
|
+
reject(ne);
|
|
1064
|
+
});
|
|
1065
|
+
|
|
1066
|
+
const abortSig = signal || AbortSignal.timeout(30_000);
|
|
1067
|
+
if (abortSig.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
|
|
1068
|
+
abortSig.addEventListener('abort', () => {
|
|
1069
|
+
hreq.destroy();
|
|
1070
|
+
reject(abortSig.reason instanceof DOMException ? abortSig.reason
|
|
1071
|
+
: new DOMException('The operation was aborted', 'AbortError'));
|
|
1072
|
+
}, { once: true });
|
|
1073
|
+
|
|
1074
|
+
// Write body in chunks to enable upload progress tracking
|
|
1075
|
+
const CHUNK_SIZE = 256 * 1024;
|
|
1076
|
+
let written = 0;
|
|
1077
|
+
const total = body.length;
|
|
1078
|
+
const writeChunks = () => {
|
|
1079
|
+
while (written < total) {
|
|
1080
|
+
const end = Math.min(written + CHUNK_SIZE, total);
|
|
1081
|
+
const ok = hreq.write(body.subarray(written, end));
|
|
1082
|
+
written = end;
|
|
1083
|
+
if (onProgress) onProgress(written, total);
|
|
1084
|
+
if (!ok) { hreq.once('drain', writeChunks); return; }
|
|
1085
|
+
}
|
|
1086
|
+
if (onProgress) onProgress(-1, total); // upload done, server processing
|
|
1087
|
+
hreq.end();
|
|
1088
|
+
};
|
|
1089
|
+
writeChunks();
|
|
978
1090
|
});
|
|
979
1091
|
const latency = Date.now() - t0;
|
|
980
1092
|
|
|
981
|
-
if (
|
|
982
|
-
const raw = await resp.text().catch(() => '');
|
|
1093
|
+
if (status < 200 || status >= 300) {
|
|
983
1094
|
let msg;
|
|
984
1095
|
try {
|
|
985
1096
|
const e = JSON.parse(raw);
|
|
@@ -992,14 +1103,14 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize } = {}) {
|
|
|
992
1103
|
}
|
|
993
1104
|
if (!msg) msg = raw;
|
|
994
1105
|
} catch {
|
|
995
|
-
msg = raw || `HTTP ${
|
|
1106
|
+
msg = raw || `HTTP ${status}`;
|
|
996
1107
|
}
|
|
997
1108
|
const err = new Error(msg);
|
|
998
|
-
err.status =
|
|
1109
|
+
err.status = status;
|
|
999
1110
|
throw err;
|
|
1000
1111
|
}
|
|
1001
1112
|
|
|
1002
|
-
const data =
|
|
1113
|
+
const data = JSON.parse(raw);
|
|
1003
1114
|
const text = (data.text || '').trim();
|
|
1004
1115
|
return { text, latency, segments: data.segments, words: data.words };
|
|
1005
1116
|
}
|
|
@@ -1059,28 +1170,297 @@ function buildJsonOutput(base, { segments, words, timestamps, diarize } = {}) {
|
|
|
1059
1170
|
return out;
|
|
1060
1171
|
}
|
|
1061
1172
|
|
|
1173
|
+
// ── File optimization helpers ────────────────────────────────────────────────
|
|
1174
|
+
|
|
1175
|
+
let _ffmpegAvail;
|
|
1176
|
+
function ffmpegAvailable() {
|
|
1177
|
+
if (_ffmpegAvail !== undefined) return _ffmpegAvail;
|
|
1178
|
+
try {
|
|
1179
|
+
execFileSync('ffmpeg', ['-version'], { stdio: 'pipe' });
|
|
1180
|
+
execFileSync('ffprobe', ['-version'], { stdio: 'pipe' });
|
|
1181
|
+
_ffmpegAvail = true;
|
|
1182
|
+
} catch { _ffmpegAvail = false; }
|
|
1183
|
+
return _ffmpegAvail;
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
let _ytdlpAvail;
|
|
1187
|
+
function ytdlpAvailable() {
|
|
1188
|
+
if (_ytdlpAvail !== undefined) return _ytdlpAvail;
|
|
1189
|
+
try { execFileSync('yt-dlp', ['--version'], { stdio: 'pipe' }); _ytdlpAvail = true; }
|
|
1190
|
+
catch { _ytdlpAvail = false; }
|
|
1191
|
+
return _ytdlpAvail;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
function downloadWithYtdlp(url, spinner) {
|
|
1195
|
+
const tmpBase = path.join(os.tmpdir(), `dikt-ytdlp-${process.pid}-${Date.now()}`);
|
|
1196
|
+
const outTemplate = `${tmpBase}.%(ext)s`;
|
|
1197
|
+
|
|
1198
|
+
return new Promise((resolve, reject) => {
|
|
1199
|
+
const proc = spawn('yt-dlp', [
|
|
1200
|
+
'-x', '--audio-format', 'opus', '--audio-quality', '48K',
|
|
1201
|
+
'-o', outTemplate, '--no-playlist', '--newline', url,
|
|
1202
|
+
], { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
1203
|
+
|
|
1204
|
+
const cleanupPartial = () => {
|
|
1205
|
+
const dir = path.dirname(tmpBase);
|
|
1206
|
+
const prefix = path.basename(tmpBase);
|
|
1207
|
+
try {
|
|
1208
|
+
for (const f of fs.readdirSync(dir)) {
|
|
1209
|
+
if (f.startsWith(prefix) && f.length > prefix.length) try { fs.unlinkSync(path.join(dir, f)); } catch {}
|
|
1210
|
+
}
|
|
1211
|
+
} catch {}
|
|
1212
|
+
};
|
|
1213
|
+
|
|
1214
|
+
let aborted = false;
|
|
1215
|
+
const onSigint = () => { aborted = true; proc.kill(); };
|
|
1216
|
+
process.on('SIGINT', onSigint);
|
|
1217
|
+
|
|
1218
|
+
let lastErr = '';
|
|
1219
|
+
const parseOutput = (chunk) => {
|
|
1220
|
+
const lines = chunk.toString().split('\n');
|
|
1221
|
+
for (const line of lines) {
|
|
1222
|
+
if (!line.trim()) continue;
|
|
1223
|
+
const dl = line.match(/\[download\]\s+([\d.]+)%/);
|
|
1224
|
+
if (dl) { spinner.update(`Downloading... ${Math.round(parseFloat(dl[1]))}%`); continue; }
|
|
1225
|
+
if (/\[ExtractAudio\]/.test(line)) { spinner.update('Converting audio...'); continue; }
|
|
1226
|
+
if (/\[download\]\s+Destination:/.test(line)) { spinner.update('Downloading...'); continue; }
|
|
1227
|
+
}
|
|
1228
|
+
};
|
|
1229
|
+
proc.stdout.on('data', parseOutput);
|
|
1230
|
+
proc.stderr.on('data', (chunk) => {
|
|
1231
|
+
lastErr = chunk.toString().trim().split('\n').pop();
|
|
1232
|
+
parseOutput(chunk);
|
|
1233
|
+
});
|
|
1234
|
+
|
|
1235
|
+
proc.on('close', (code) => {
|
|
1236
|
+
process.removeListener('SIGINT', onSigint);
|
|
1237
|
+
if (aborted) { cleanupPartial(); return reject(new Error('Download aborted')); }
|
|
1238
|
+
if (code !== 0) { cleanupPartial(); return reject(new Error(lastErr || `yt-dlp exited with code ${code}`)); }
|
|
1239
|
+
// yt-dlp may produce a different extension than requested; find the actual file
|
|
1240
|
+
const dir = path.dirname(tmpBase);
|
|
1241
|
+
const prefix = path.basename(tmpBase);
|
|
1242
|
+
try {
|
|
1243
|
+
const match = fs.readdirSync(dir).find(f => f.startsWith(prefix) && f.length > prefix.length);
|
|
1244
|
+
if (!match) return reject(new Error('yt-dlp produced no output file'));
|
|
1245
|
+
resolve(path.join(dir, match));
|
|
1246
|
+
} catch (err) { reject(err); }
|
|
1247
|
+
});
|
|
1248
|
+
});
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
function getAudioDuration(filePath) {
|
|
1252
|
+
try {
|
|
1253
|
+
const out = execFileSync('ffprobe', ['-v', 'quiet', '-show_entries', 'format=duration', '-of', 'csv=p=0', filePath], { stdio: 'pipe', encoding: 'utf8' });
|
|
1254
|
+
return parseFloat(out.trim()) || 0;
|
|
1255
|
+
} catch { return 0; }
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
async function compressAudio(inputPath) {
|
|
1259
|
+
const base = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}-${path.basename(inputPath, path.extname(inputPath))}`);
|
|
1260
|
+
for (const codec of ['libopus', 'libvorbis']) {
|
|
1261
|
+
const outPath = `${base}.ogg`;
|
|
1262
|
+
try {
|
|
1263
|
+
await execFileAsync('ffmpeg', ['-i', inputPath, '-c:a', codec, '-b:a', '48k', '-y', '-v', 'quiet', outPath], { stdio: 'pipe' });
|
|
1264
|
+
if (fs.statSync(outPath).size > 0) return outPath;
|
|
1265
|
+
try { fs.unlinkSync(outPath); } catch {}
|
|
1266
|
+
} catch { try { fs.unlinkSync(outPath); } catch {} }
|
|
1267
|
+
}
|
|
1268
|
+
return null;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
async function findSilenceSplitPoint(filePath, targetSec) {
|
|
1272
|
+
const startSec = Math.max(0, targetSec - SPLIT_SEARCH_SEC);
|
|
1273
|
+
const durSec = SPLIT_SEARCH_SEC * 2;
|
|
1274
|
+
|
|
1275
|
+
try {
|
|
1276
|
+
// Extract a small window of raw PCM around the target for silence analysis
|
|
1277
|
+
const { stdout: raw } = await execFileAsync('ffmpeg', [
|
|
1278
|
+
'-ss', String(startSec), '-t', String(durSec), '-i', filePath,
|
|
1279
|
+
'-f', 's16le', '-ar', '16000', '-ac', '1', '-v', 'quiet', '-',
|
|
1280
|
+
], { encoding: 'buffer', maxBuffer: 16000 * 2 * durSec + 4096 });
|
|
1281
|
+
|
|
1282
|
+
// Scan for silence in 50ms windows
|
|
1283
|
+
const WINDOW_BYTES = Math.round(16000 * 0.05) * 2; // 50ms at 16kHz 16-bit mono
|
|
1284
|
+
let bestOffset = -1, bestLen = 0;
|
|
1285
|
+
let runStart = -1, runLen = 0;
|
|
1286
|
+
|
|
1287
|
+
for (let offset = 0; offset + WINDOW_BYTES <= raw.length; offset += WINDOW_BYTES) {
|
|
1288
|
+
const peak = peakAmplitude(raw.subarray(offset, offset + WINDOW_BYTES));
|
|
1289
|
+
if (peak < SILENCE_THRESHOLD) {
|
|
1290
|
+
if (runStart === -1) runStart = offset;
|
|
1291
|
+
runLen++;
|
|
1292
|
+
} else {
|
|
1293
|
+
if (runLen > bestLen) { bestOffset = runStart; bestLen = runLen; }
|
|
1294
|
+
runStart = -1; runLen = 0;
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
if (runLen > bestLen) { bestOffset = runStart; bestLen = runLen; }
|
|
1298
|
+
|
|
1299
|
+
if (bestLen >= 10) { // at least 500ms of silence (avoids mid-word splits)
|
|
1300
|
+
const centerBytes = bestOffset + Math.floor(bestLen / 2) * WINDOW_BYTES;
|
|
1301
|
+
return startSec + centerBytes / (16000 * 2);
|
|
1302
|
+
}
|
|
1303
|
+
} catch {}
|
|
1304
|
+
|
|
1305
|
+
return targetSec; // fallback: no silence found, split at target
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
function cleanChunkText(t) {
|
|
1309
|
+
if (!t) return '';
|
|
1310
|
+
// Strip [PRINT_WORDLEVEL_TIME] markup the API sometimes spontaneously returns
|
|
1311
|
+
if (t.includes('[PRINT_WORDLEVEL_TIME]')) {
|
|
1312
|
+
t = t.replace(/\[PRINT_WORDLEVEL_TIME\]/g, '');
|
|
1313
|
+
t = t.replace(/<\/?\d{2}:\d{2}\.\d+>/g, '');
|
|
1314
|
+
t = t.replace(/\s+/g, ' ');
|
|
1315
|
+
}
|
|
1316
|
+
return t.trim();
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
function mergeChunkResults(results, splitPoints) {
|
|
1320
|
+
// No overlap — just concatenate text, offset timestamps
|
|
1321
|
+
let text = results.map(r => cleanChunkText(r.text)).filter(Boolean).join(' ');
|
|
1322
|
+
// Fix missing spaces after punctuation (API omits leading spaces on some segments)
|
|
1323
|
+
text = text.replace(/([.!?,])([A-Za-z])/g, '$1 $2');
|
|
1324
|
+
let maxLatency = 0;
|
|
1325
|
+
const allSegments = [];
|
|
1326
|
+
const allWords = [];
|
|
1327
|
+
|
|
1328
|
+
const round1 = (n) => Math.round(n * 10) / 10;
|
|
1329
|
+
for (let i = 0; i < results.length; i++) {
|
|
1330
|
+
const r = results[i];
|
|
1331
|
+
const offset = splitPoints[i];
|
|
1332
|
+
if (r.latency > maxLatency) maxLatency = r.latency;
|
|
1333
|
+
|
|
1334
|
+
if (r.segments) {
|
|
1335
|
+
for (const seg of r.segments) {
|
|
1336
|
+
allSegments.push({ ...seg, start: round1(seg.start + offset), end: round1(seg.end + offset) });
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
if (r.words) {
|
|
1340
|
+
for (const w of r.words) {
|
|
1341
|
+
allWords.push({ ...w, start: round1(w.start + offset), end: round1(w.end + offset) });
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
return {
|
|
1347
|
+
text,
|
|
1348
|
+
latency: maxLatency,
|
|
1349
|
+
segments: allSegments.length ? allSegments : undefined,
|
|
1350
|
+
words: allWords.length ? allWords : undefined,
|
|
1351
|
+
};
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
async function parallelMap(items, fn, concurrency) {
|
|
1355
|
+
const results = new Array(items.length);
|
|
1356
|
+
let next = 0;
|
|
1357
|
+
const worker = async () => { while (next < items.length) { const i = next++; results[i] = await fn(items[i], i); } };
|
|
1358
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
|
|
1359
|
+
return results;
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1062
1362
|
// ── File mode ────────────────────────────────────────────────────────────────
|
|
1063
1363
|
|
|
1064
1364
|
async function runFile(flags) {
|
|
1365
|
+
const spinner = createStderrSpinner();
|
|
1366
|
+
let fileSize = 0;
|
|
1367
|
+
let transcribeTimer = null;
|
|
1368
|
+
const tempFiles = [];
|
|
1369
|
+
|
|
1065
1370
|
try {
|
|
1066
|
-
|
|
1371
|
+
const isURL = /^https?:\/\//i.test(flags.file);
|
|
1372
|
+
|
|
1373
|
+
if (isURL) {
|
|
1374
|
+
if (!ytdlpAvailable()) {
|
|
1375
|
+
process.stderr.write(`\n${RED}${BOLD} yt-dlp not found.${RESET}\n\n`);
|
|
1376
|
+
process.stderr.write(` yt-dlp is required to download audio from URLs. Install it:\n\n`);
|
|
1377
|
+
if (process.platform === 'darwin') {
|
|
1378
|
+
process.stderr.write(` ${BOLD}brew install yt-dlp${RESET}\n\n`);
|
|
1379
|
+
} else if (process.platform === 'win32') {
|
|
1380
|
+
process.stderr.write(` ${BOLD}choco install yt-dlp${RESET} or ${BOLD}scoop install yt-dlp${RESET}\n\n`);
|
|
1381
|
+
} else {
|
|
1382
|
+
process.stderr.write(` ${BOLD}sudo apt install yt-dlp${RESET} (Debian/Ubuntu)\n`);
|
|
1383
|
+
process.stderr.write(` ${BOLD}pip install yt-dlp${RESET} (any platform)\n\n`);
|
|
1384
|
+
}
|
|
1385
|
+
return EXIT_DEPENDENCY;
|
|
1386
|
+
}
|
|
1387
|
+
spinner.start('Downloading audio...');
|
|
1388
|
+
try {
|
|
1389
|
+
const downloaded = await downloadWithYtdlp(flags.file, spinner);
|
|
1390
|
+
tempFiles.push(downloaded);
|
|
1391
|
+
flags = { ...flags, file: downloaded };
|
|
1392
|
+
} catch (err) {
|
|
1393
|
+
spinner.stop();
|
|
1394
|
+
process.stderr.write(`Error downloading: ${err.message}\n`);
|
|
1395
|
+
return EXIT_TRANSCRIPTION;
|
|
1396
|
+
}
|
|
1397
|
+
spinner.update('Processing audio...');
|
|
1398
|
+
} else if (!flags.file || !fs.existsSync(flags.file)) {
|
|
1067
1399
|
process.stderr.write(`Error: file not found: ${flags.file}\n`);
|
|
1068
1400
|
return EXIT_TRANSCRIPTION;
|
|
1401
|
+
} else {
|
|
1402
|
+
spinner.start('Reading file...');
|
|
1403
|
+
}
|
|
1404
|
+
fileSize = fs.statSync(flags.file).size;
|
|
1405
|
+
const ext = path.extname(flags.file).slice(1).toLowerCase() || 'wav';
|
|
1406
|
+
|
|
1407
|
+
// Check if ffmpeg is available for chunking / compression optimizations
|
|
1408
|
+
const hasFFmpeg = ffmpegAvailable();
|
|
1409
|
+
const duration = hasFFmpeg ? getAudioDuration(flags.file) : 0;
|
|
1410
|
+
const canChunk = hasFFmpeg && !flags.diarize && duration > CHUNK_MIN_SEC;
|
|
1411
|
+
|
|
1412
|
+
if (canChunk) {
|
|
1413
|
+
spinner.stop();
|
|
1414
|
+
return await runFileChunked(flags, { fileSize, duration });
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
// Compress uncompressed formats (wav/flac → ogg) for faster upload
|
|
1418
|
+
let uploadPath = flags.file;
|
|
1419
|
+
let uploadExt = ext;
|
|
1420
|
+
if (hasFFmpeg && COMPRESSIBLE.has(ext)) {
|
|
1421
|
+
spinner.update('Compressing...');
|
|
1422
|
+
const compressed = await compressAudio(flags.file);
|
|
1423
|
+
if (compressed) {
|
|
1424
|
+
const newSize = fs.statSync(compressed).size;
|
|
1425
|
+
if (newSize < fileSize) {
|
|
1426
|
+
tempFiles.push(compressed);
|
|
1427
|
+
uploadPath = compressed;
|
|
1428
|
+
uploadExt = path.extname(compressed).slice(1);
|
|
1429
|
+
spinner.update(`Compressed ${formatFileSize(fileSize)} → ${formatFileSize(newSize)}`);
|
|
1430
|
+
} else {
|
|
1431
|
+
try { fs.unlinkSync(compressed); } catch {}
|
|
1432
|
+
}
|
|
1433
|
+
}
|
|
1069
1434
|
}
|
|
1070
1435
|
|
|
1071
|
-
const blob = await fs.openAsBlob(
|
|
1072
|
-
const
|
|
1073
|
-
const
|
|
1074
|
-
|
|
1075
|
-
|
|
1436
|
+
const blob = await fs.openAsBlob(uploadPath);
|
|
1437
|
+
const mime = MIME_TYPES[uploadExt] || 'application/octet-stream';
|
|
1438
|
+
const file = new File([blob], path.basename(uploadPath), { type: mime });
|
|
1439
|
+
|
|
1440
|
+
spinner.update(`Uploading to API... (${formatFileSize(blob.size)})`);
|
|
1076
1441
|
|
|
1077
1442
|
const ac = new AbortController();
|
|
1078
|
-
const abortHandler = () => ac.abort();
|
|
1443
|
+
const abortHandler = () => { spinner.stop('Aborting...'); ac.abort(); };
|
|
1079
1444
|
process.on('SIGINT', abortHandler);
|
|
1080
1445
|
|
|
1081
|
-
const
|
|
1446
|
+
const onProgress = (sent, total) => {
|
|
1447
|
+
if (sent === -1) {
|
|
1448
|
+
const t0 = Date.now();
|
|
1449
|
+
const elapsed = () => { const s = Math.floor((Date.now() - t0) / 1000); return `${Math.floor(s / 60)}:${String(s % 60).padStart(2, '0')}`; };
|
|
1450
|
+
spinner.update(`Transcribing... ${DIM}(${elapsed()})${RESET}`);
|
|
1451
|
+
transcribeTimer = setInterval(() => spinner.update(`Transcribing... ${DIM}(${elapsed()})${RESET}`), 1000);
|
|
1452
|
+
} else {
|
|
1453
|
+
const pct = Math.round((sent / total) * 100);
|
|
1454
|
+
spinner.update(`Uploading ${pct}% (${formatFileSize(sent)} / ${formatFileSize(total)})`);
|
|
1455
|
+
}
|
|
1456
|
+
};
|
|
1457
|
+
|
|
1458
|
+
const result = await callTranscribeAPI(file, { signal: ac.signal, timestamps: flags.timestamps, diarize: flags.diarize, onProgress });
|
|
1459
|
+
if (transcribeTimer) clearInterval(transcribeTimer);
|
|
1082
1460
|
process.removeListener('SIGINT', abortHandler);
|
|
1083
1461
|
|
|
1462
|
+
spinner.stop(`${GREEN}Done${RESET} (${(result.latency / 1000).toFixed(1)}s)`);
|
|
1463
|
+
|
|
1084
1464
|
if (!result.text) {
|
|
1085
1465
|
process.stderr.write('No speech detected\n');
|
|
1086
1466
|
return EXIT_TRANSCRIPTION;
|
|
@@ -1110,12 +1490,180 @@ async function runFile(flags) {
|
|
|
1110
1490
|
|
|
1111
1491
|
return EXIT_OK;
|
|
1112
1492
|
} catch (err) {
|
|
1493
|
+
if (transcribeTimer) clearInterval(transcribeTimer);
|
|
1494
|
+
spinner.stop();
|
|
1495
|
+
|
|
1113
1496
|
if (err.name === 'AbortError') {
|
|
1114
1497
|
process.stderr.write('Aborted\n');
|
|
1115
1498
|
return EXIT_TRANSCRIPTION;
|
|
1116
1499
|
}
|
|
1117
|
-
|
|
1500
|
+
|
|
1501
|
+
const parts = [`Error: ${err.message}`];
|
|
1502
|
+
if (fileSize) parts.push(` File: ${flags.file} (${formatFileSize(fileSize)})`);
|
|
1503
|
+
|
|
1504
|
+
if (err.networkError) {
|
|
1505
|
+
parts.push(' Hint: check your network connection and try again');
|
|
1506
|
+
} else if (err.status === 401) {
|
|
1507
|
+
parts.push(' Hint: invalid API key — run `dikt setup` to reconfigure');
|
|
1508
|
+
} else if (err.status === 413) {
|
|
1509
|
+
parts.push(' Hint: file is too large for the API — try a shorter recording');
|
|
1510
|
+
} else if (err.status === 429) {
|
|
1511
|
+
parts.push(' Hint: rate limited — wait a moment and try again');
|
|
1512
|
+
} else if (err.status >= 500) {
|
|
1513
|
+
parts.push(' Hint: Mistral API server error — try again later');
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
process.stderr.write(parts.join('\n') + '\n');
|
|
1517
|
+
return EXIT_TRANSCRIPTION;
|
|
1518
|
+
} finally {
|
|
1519
|
+
for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
async function runFileChunked(flags, { fileSize, duration }) {
|
|
1524
|
+
const spinner = createStderrSpinner();
|
|
1525
|
+
const tempFiles = [];
|
|
1526
|
+
const t0 = Date.now();
|
|
1527
|
+
let progressTimer = null;
|
|
1528
|
+
let abortHandler = null;
|
|
1529
|
+
|
|
1530
|
+
try {
|
|
1531
|
+
// Find optimal split points at silence boundaries
|
|
1532
|
+
const numTargetChunks = Math.ceil(duration / TARGET_CHUNK_SEC);
|
|
1533
|
+
spinner.start('Analyzing audio for split points...');
|
|
1534
|
+
|
|
1535
|
+
const splitPoints = [0];
|
|
1536
|
+
for (let i = 1; i < numTargetChunks; i++) {
|
|
1537
|
+
spinner.update(`Finding split point ${i}/${numTargetChunks - 1}...`);
|
|
1538
|
+
splitPoints.push(await findSilenceSplitPoint(flags.file, i * TARGET_CHUNK_SEC));
|
|
1539
|
+
}
|
|
1540
|
+
splitPoints.push(duration);
|
|
1541
|
+
|
|
1542
|
+
// Merge tiny trailing chunks (< MIN_CHUNK_SEC) into the previous one
|
|
1543
|
+
for (let i = splitPoints.length - 2; i > 0; i--) {
|
|
1544
|
+
if (splitPoints[i + 1] - splitPoints[i] < MIN_CHUNK_SEC) {
|
|
1545
|
+
splitPoints.splice(i, 1);
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
const numChunks = splitPoints.length - 1;
|
|
1550
|
+
|
|
1551
|
+
// Split audio and compress each chunk
|
|
1552
|
+
const chunkBase = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}`);
|
|
1553
|
+
const uploadPaths = [];
|
|
1554
|
+
|
|
1555
|
+
for (let i = 0; i < numChunks; i++) {
|
|
1556
|
+
spinner.update(`Preparing chunk ${i + 1}/${numChunks}...`);
|
|
1557
|
+
const start = splitPoints[i];
|
|
1558
|
+
const dur = splitPoints[i + 1] - start;
|
|
1559
|
+
const oggPath = `${chunkBase}-${i}.ogg`;
|
|
1560
|
+
try {
|
|
1561
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', flags.file, '-c:a', 'libopus', '-b:a', '48k', '-y', '-v', 'quiet', oggPath], { stdio: 'pipe' });
|
|
1562
|
+
if (fs.statSync(oggPath).size > 0) {
|
|
1563
|
+
tempFiles.push(oggPath);
|
|
1564
|
+
uploadPaths.push(oggPath);
|
|
1565
|
+
} else { throw new Error('empty output'); }
|
|
1566
|
+
} catch {
|
|
1567
|
+
try { fs.unlinkSync(oggPath); } catch {}
|
|
1568
|
+
const wavPath = `${chunkBase}-${i}.wav`;
|
|
1569
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', flags.file, '-y', '-v', 'quiet', wavPath], { stdio: 'pipe' });
|
|
1570
|
+
if (!fs.statSync(wavPath).size) throw new Error(`ffmpeg produced empty chunk ${i}`);
|
|
1571
|
+
tempFiles.push(wavPath);
|
|
1572
|
+
uploadPaths.push(wavPath);
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
const totalUploadSize = uploadPaths.reduce((sum, p) => sum + fs.statSync(p).size, 0);
|
|
1577
|
+
spinner.update(`Compressed → ${formatFileSize(totalUploadSize)} total`);
|
|
1578
|
+
|
|
1579
|
+
// Abort handling
|
|
1580
|
+
const ac = new AbortController();
|
|
1581
|
+
abortHandler = () => { spinner.stop('Aborting...'); ac.abort(); };
|
|
1582
|
+
process.on('SIGINT', abortHandler);
|
|
1583
|
+
|
|
1584
|
+
// Transcribe chunks in parallel
|
|
1585
|
+
let completed = 0;
|
|
1586
|
+
const elapsed = () => {
|
|
1587
|
+
const s = Math.floor((Date.now() - t0) / 1000);
|
|
1588
|
+
return `${Math.floor(s / 60)}:${String(s % 60).padStart(2, '0')}`;
|
|
1589
|
+
};
|
|
1590
|
+
spinner.update(`Transcribing ${numChunks} chunks... ${DIM}(${elapsed()})${RESET}`);
|
|
1591
|
+
progressTimer = setInterval(() => {
|
|
1592
|
+
spinner.update(`Transcribing... ${completed}/${numChunks} done ${DIM}(${elapsed()})${RESET}`);
|
|
1593
|
+
}, 1000);
|
|
1594
|
+
|
|
1595
|
+
const chunkIndices = Array.from({ length: numChunks }, (_, i) => i);
|
|
1596
|
+
const results = await parallelMap(chunkIndices, async (i) => {
|
|
1597
|
+
const uploadPath = uploadPaths[i];
|
|
1598
|
+
const ext = path.extname(uploadPath).slice(1);
|
|
1599
|
+
const blob = await fs.openAsBlob(uploadPath);
|
|
1600
|
+
const file = new File([blob], `chunk-${i}.${ext}`, { type: MIME_TYPES[ext] || 'audio/wav' });
|
|
1601
|
+
const result = await callTranscribeAPI(file, { signal: ac.signal, timestamps: flags.timestamps });
|
|
1602
|
+
completed++;
|
|
1603
|
+
return result;
|
|
1604
|
+
}, MAX_PARALLEL);
|
|
1605
|
+
|
|
1606
|
+
clearInterval(progressTimer); progressTimer = null;
|
|
1607
|
+
process.removeListener('SIGINT', abortHandler); abortHandler = null;
|
|
1608
|
+
|
|
1609
|
+
// Merge results — no overlap, just concatenate text and offset timestamps
|
|
1610
|
+
const merged = mergeChunkResults(results, splitPoints);
|
|
1611
|
+
const totalLatency = Date.now() - t0;
|
|
1612
|
+
spinner.stop(`${GREEN}Done${RESET} (${(totalLatency / 1000).toFixed(1)}s, ${numChunks} chunks)`);
|
|
1613
|
+
|
|
1614
|
+
if (!merged.text) {
|
|
1615
|
+
process.stderr.write('No speech detected\n');
|
|
1616
|
+
return EXIT_TRANSCRIPTION;
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
const wordCount = merged.text.split(/\s+/).filter(Boolean).length;
|
|
1620
|
+
|
|
1621
|
+
let output;
|
|
1622
|
+
if (flags.json) {
|
|
1623
|
+
const out = buildJsonOutput(
|
|
1624
|
+
{ text: merged.text, latency: totalLatency, words: wordCount },
|
|
1625
|
+
{ segments: merged.segments, words: merged.words, timestamps: flags.timestamps, diarize: false },
|
|
1626
|
+
);
|
|
1627
|
+
output = JSON.stringify(out, null, flags.output ? 2 : 0) + '\n';
|
|
1628
|
+
} else {
|
|
1629
|
+
output = merged.text + '\n';
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
if (flags.output) {
|
|
1633
|
+
fs.writeFileSync(flags.output, output);
|
|
1634
|
+
process.stderr.write(`Saved to ${flags.output}\n`);
|
|
1635
|
+
} else {
|
|
1636
|
+
process.stdout.write(output);
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
return EXIT_OK;
|
|
1640
|
+
} catch (err) {
|
|
1641
|
+
spinner.stop();
|
|
1642
|
+
|
|
1643
|
+
if (err.name === 'AbortError') {
|
|
1644
|
+
process.stderr.write('Aborted\n');
|
|
1645
|
+
return EXIT_TRANSCRIPTION;
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
const parts = [`Error: ${err.message}`];
|
|
1649
|
+
if (fileSize) parts.push(` File: ${flags.file} (${formatFileSize(fileSize)})`);
|
|
1650
|
+
|
|
1651
|
+
if (err.networkError) {
|
|
1652
|
+
parts.push(' Hint: check your network connection and try again');
|
|
1653
|
+
} else if (err.status === 401) {
|
|
1654
|
+
parts.push(' Hint: invalid API key — run `dikt setup` to reconfigure');
|
|
1655
|
+
} else if (err.status === 429) {
|
|
1656
|
+
parts.push(' Hint: rate limited — wait a moment and try again');
|
|
1657
|
+
} else if (err.status >= 500) {
|
|
1658
|
+
parts.push(' Hint: Mistral API server error — try again later');
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
process.stderr.write(parts.join('\n') + '\n');
|
|
1118
1662
|
return EXIT_TRANSCRIPTION;
|
|
1663
|
+
} finally {
|
|
1664
|
+
if (progressTimer) clearInterval(progressTimer);
|
|
1665
|
+
if (abortHandler) process.removeListener('SIGINT', abortHandler);
|
|
1666
|
+
for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
|
|
1119
1667
|
}
|
|
1120
1668
|
}
|
|
1121
1669
|
|
|
@@ -1398,7 +1946,7 @@ async function main() {
|
|
|
1398
1946
|
language: flagVal(args, '--language', 'e.g. en, de, fr'),
|
|
1399
1947
|
file: flagVal(args, '--file', 'path to audio file'),
|
|
1400
1948
|
noNewline: args.includes('--no-newline') || args.includes('-n'),
|
|
1401
|
-
timestamps: flagVal(args, '--timestamps', 'segment
|
|
1949
|
+
timestamps: flagVal(args, '--timestamps', 'segment or word', { valid: ['segment', 'word'] }),
|
|
1402
1950
|
diarize: args.includes('--diarize'),
|
|
1403
1951
|
output: flagVal(args, '--output', 'path') || flagVal(args, '-o', 'path'),
|
|
1404
1952
|
};
|
|
@@ -1468,13 +2016,13 @@ Options:
|
|
|
1468
2016
|
--json Record once, output JSON to stdout
|
|
1469
2017
|
-q, --quiet Record once, print transcript to stdout
|
|
1470
2018
|
--stream Stream transcription chunks on pauses
|
|
1471
|
-
--file <path>
|
|
2019
|
+
--file <path|url> Transcribe audio file or URL (via yt-dlp)
|
|
1472
2020
|
-o, --output <path> Write output to file (.json auto-enables JSON)
|
|
1473
2021
|
--silence <seconds> Silence duration before auto-stop (default: 2.0)
|
|
1474
2022
|
--pause <seconds> Pause duration to split chunks (default: 1.0)
|
|
1475
2023
|
--language <code> Language code, e.g. en, de, fr (default: auto)
|
|
1476
2024
|
-n, --no-newline Join stream chunks without newlines
|
|
1477
|
-
--timestamps <granularity> Add timestamps: segment
|
|
2025
|
+
--timestamps <granularity> Add timestamps: segment or word
|
|
1478
2026
|
--diarize Enable speaker identification
|
|
1479
2027
|
--no-input Fail if config is missing (no wizard)
|
|
1480
2028
|
--no-color Disable colored output
|
|
@@ -1501,6 +2049,7 @@ Examples:
|
|
|
1501
2049
|
dikt --file meeting.wav Transcribe an existing audio file
|
|
1502
2050
|
dikt --file a.wav -o a.json Transcribe to a JSON file
|
|
1503
2051
|
dikt --file a.wav -o a.txt Transcribe to a text file
|
|
2052
|
+
dikt --file https://youtube.com/watch?v=ID Transcribe from URL
|
|
1504
2053
|
dikt --stream --silence 0 Stream continuously until Ctrl+C
|
|
1505
2054
|
dikt --stream -n Stream as continuous flowing text
|
|
1506
2055
|
dikt -q --json --diarize Transcribe with speaker labels
|
|
@@ -1514,13 +2063,13 @@ Environment variables:
|
|
|
1514
2063
|
|
|
1515
2064
|
Exit codes:
|
|
1516
2065
|
0 Success
|
|
1517
|
-
1 Missing dependency (sox)
|
|
2066
|
+
1 Missing dependency (sox/ffmpeg)
|
|
1518
2067
|
2 Not a terminal
|
|
1519
2068
|
3 Configuration error
|
|
1520
2069
|
4 Transcription error
|
|
1521
2070
|
|
|
1522
2071
|
Config: ${CONFIG_DIR}/config.json
|
|
1523
|
-
Requires: sox (
|
|
2072
|
+
Requires: sox (recording), ffmpeg (--file optimization), yt-dlp (URLs, optional)`);
|
|
1524
2073
|
process.exit(EXIT_OK);
|
|
1525
2074
|
}
|
|
1526
2075
|
|
|
@@ -1542,7 +2091,10 @@ Requires: sox (brew install sox)`);
|
|
|
1542
2091
|
|
|
1543
2092
|
applyEnvOverrides(config);
|
|
1544
2093
|
if (flags.language) config.language = flags.language;
|
|
1545
|
-
if (!flags.timestamps && config.timestamps)
|
|
2094
|
+
if (!flags.timestamps && config.timestamps) {
|
|
2095
|
+
// Migrate legacy 'segment,word' → 'word' (combined option removed)
|
|
2096
|
+
flags.timestamps = config.timestamps === 'segment,word' ? 'word' : config.timestamps;
|
|
2097
|
+
}
|
|
1546
2098
|
if (!flags.diarize && config.diarize) flags.diarize = true;
|
|
1547
2099
|
if (flags.output && flags.output.endsWith('.json')) flags.json = true;
|
|
1548
2100
|
|