dikt 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.mjs +101 -21
- package/package.json +1 -1
package/cli.mjs
CHANGED
|
@@ -49,7 +49,7 @@ function formatFileSize(bytes) {
|
|
|
49
49
|
|
|
50
50
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
51
51
|
|
|
52
|
-
const VERSION = '1.
|
|
52
|
+
const VERSION = '1.4.1';
|
|
53
53
|
const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
|
|
54
54
|
const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
|
|
55
55
|
const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
|
|
@@ -458,13 +458,22 @@ function renderKeybar() {
|
|
|
458
458
|
return ` ${DIM}[SPACE]${RESET} Record ${copyKey}${autoCopyKey}${histKey}${retryKey}`.trimEnd();
|
|
459
459
|
}
|
|
460
460
|
|
|
461
|
+
function formatDuration(seconds) {
|
|
462
|
+
if (seconds < 60) return `${seconds.toFixed(1)}s`;
|
|
463
|
+
const m = Math.floor(seconds / 60);
|
|
464
|
+
const s = (seconds % 60).toFixed(1).padStart(4, '0');
|
|
465
|
+
if (m < 60) return `${m}m ${s}s`;
|
|
466
|
+
const h = Math.floor(m / 60);
|
|
467
|
+
const rm = String(m % 60).padStart(2, '0');
|
|
468
|
+
return `${h}h ${rm}m ${s}s`;
|
|
469
|
+
}
|
|
470
|
+
|
|
461
471
|
function renderStatus() {
|
|
462
472
|
switch (state.mode) {
|
|
463
473
|
case 'idle':
|
|
464
474
|
return ` ${GREY}● Idle${RESET}`;
|
|
465
475
|
case 'recording': {
|
|
466
|
-
|
|
467
|
-
return ` ${RED}${BOLD}● Recording${RESET} ${RED}${secs}s${RESET}`;
|
|
476
|
+
return ` ${RED}${BOLD}● Recording${RESET} ${RED}${formatDuration(state.duration)}${RESET}`;
|
|
468
477
|
}
|
|
469
478
|
case 'transcribing': {
|
|
470
479
|
const sp = SPINNER[state.spinnerFrame % SPINNER.length];
|
|
@@ -558,7 +567,7 @@ function renderMeta() {
|
|
|
558
567
|
const cost = (state.duration / 60 * COST_PER_MIN).toFixed(4);
|
|
559
568
|
const latencyStr = state.latency ? `${(state.latency / 1000).toFixed(1)}s` : '—';
|
|
560
569
|
const histLabel = state.historyIndex >= 0 ? ` · history ${state.historyIndex + 1}/${state.history.length}` : '';
|
|
561
|
-
return ` ${DIM}${state.wordCount} words · ${state.duration
|
|
570
|
+
return ` ${DIM}${state.wordCount} words · ${formatDuration(state.duration)} · latency ${latencyStr} · $${cost}${histLabel}${RESET}`;
|
|
562
571
|
}
|
|
563
572
|
|
|
564
573
|
function renderHelp() {
|
|
@@ -777,22 +786,32 @@ async function transcribe(wavPath) {
|
|
|
777
786
|
const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
|
|
778
787
|
|
|
779
788
|
const t0 = Date.now();
|
|
780
|
-
const result = await callTranscribeAPI(file, {
|
|
781
|
-
signal: AbortSignal.timeout(30_000),
|
|
782
|
-
timestamps: config.timestamps || '',
|
|
783
|
-
diarize: config.diarize || false,
|
|
784
|
-
});
|
|
785
|
-
state.latency = Date.now() - t0;
|
|
786
789
|
|
|
787
|
-
|
|
790
|
+
// For long recordings, chunk and transcribe in parallel (if ffmpeg available)
|
|
791
|
+
const canChunk = ffmpegAvailable() && !config.diarize && state.duration > CHUNK_MIN_SEC;
|
|
792
|
+
let text, segments;
|
|
793
|
+
|
|
794
|
+
if (canChunk) {
|
|
795
|
+
const chunkResult = await transcribeChunkedWav(wavPath, state.duration);
|
|
796
|
+
text = chunkResult.text;
|
|
797
|
+
segments = chunkResult.segments;
|
|
798
|
+
} else {
|
|
799
|
+
const result = await callTranscribeAPI(file, {
|
|
800
|
+
timestamps: config.timestamps || '',
|
|
801
|
+
diarize: config.diarize || false,
|
|
802
|
+
});
|
|
803
|
+
text = result.text;
|
|
804
|
+
segments = result.segments;
|
|
805
|
+
}
|
|
806
|
+
state.latency = Date.now() - t0;
|
|
788
807
|
|
|
789
808
|
if (!text) {
|
|
790
809
|
state.mode = 'error';
|
|
791
810
|
state.error = 'No speech detected';
|
|
792
811
|
} else {
|
|
793
812
|
// Format with speaker labels if diarization is active
|
|
794
|
-
if (config.diarize &&
|
|
795
|
-
state.transcript = formatDiarizedText(
|
|
813
|
+
if (config.diarize && segments) {
|
|
814
|
+
state.transcript = formatDiarizedText(segments, { color: true });
|
|
796
815
|
} else {
|
|
797
816
|
state.transcript = text;
|
|
798
817
|
}
|
|
@@ -806,7 +825,7 @@ async function transcribe(wavPath) {
|
|
|
806
825
|
}
|
|
807
826
|
} catch (err) {
|
|
808
827
|
state.mode = 'error';
|
|
809
|
-
let msg = err.
|
|
828
|
+
let msg = err.message;
|
|
810
829
|
if (err.status === 401) msg += ' — press [s] to reconfigure';
|
|
811
830
|
state.error = msg;
|
|
812
831
|
} finally {
|
|
@@ -817,6 +836,66 @@ async function transcribe(wavPath) {
|
|
|
817
836
|
}
|
|
818
837
|
}
|
|
819
838
|
|
|
839
|
+
async function transcribeChunkedWav(wavPath, durationSec) {
|
|
840
|
+
const tempFiles = [];
|
|
841
|
+
try {
|
|
842
|
+
const numTargetChunks = Math.ceil(durationSec / TARGET_CHUNK_SEC);
|
|
843
|
+
const splitPoints = [0];
|
|
844
|
+
for (let i = 1; i < numTargetChunks; i++) {
|
|
845
|
+
splitPoints.push(await findSilenceSplitPoint(wavPath, i * TARGET_CHUNK_SEC));
|
|
846
|
+
}
|
|
847
|
+
splitPoints.push(durationSec);
|
|
848
|
+
|
|
849
|
+
// Merge tiny trailing chunks into the previous one
|
|
850
|
+
for (let i = splitPoints.length - 2; i > 0; i--) {
|
|
851
|
+
if (splitPoints[i + 1] - splitPoints[i] < MIN_CHUNK_SEC) {
|
|
852
|
+
splitPoints.splice(i, 1);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
const numChunks = splitPoints.length - 1;
|
|
857
|
+
const chunkBase = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}`);
|
|
858
|
+
const uploadPaths = [];
|
|
859
|
+
|
|
860
|
+
for (let i = 0; i < numChunks; i++) {
|
|
861
|
+
const start = splitPoints[i];
|
|
862
|
+
const dur = splitPoints[i + 1] - start;
|
|
863
|
+
const oggPath = `${chunkBase}-${i}.ogg`;
|
|
864
|
+
try {
|
|
865
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-c:a', 'libopus', '-b:a', '48k', '-y', '-v', 'quiet', oggPath], { stdio: 'pipe' });
|
|
866
|
+
if (fs.statSync(oggPath).size > 0) {
|
|
867
|
+
tempFiles.push(oggPath);
|
|
868
|
+
uploadPaths.push(oggPath);
|
|
869
|
+
} else { throw new Error('empty output'); }
|
|
870
|
+
} catch {
|
|
871
|
+
try { fs.unlinkSync(oggPath); } catch {}
|
|
872
|
+
const chunkWav = `${chunkBase}-${i}.wav`;
|
|
873
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-y', '-v', 'quiet', chunkWav], { stdio: 'pipe' });
|
|
874
|
+
if (!fs.statSync(chunkWav).size) throw new Error(`ffmpeg produced empty chunk ${i}`);
|
|
875
|
+
tempFiles.push(chunkWav);
|
|
876
|
+
uploadPaths.push(chunkWav);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// Transcribe chunks in parallel
|
|
881
|
+
let completed = 0;
|
|
882
|
+
const chunkIndices = Array.from({ length: numChunks }, (_, i) => i);
|
|
883
|
+
const results = await parallelMap(chunkIndices, async (i) => {
|
|
884
|
+
const uploadPath = uploadPaths[i];
|
|
885
|
+
const ext = path.extname(uploadPath).slice(1);
|
|
886
|
+
const blob = await fs.openAsBlob(uploadPath);
|
|
887
|
+
const file = new File([blob], `chunk-${i}.${ext}`, { type: MIME_TYPES[ext] || 'audio/wav' });
|
|
888
|
+
const result = await callTranscribeAPI(file, { timestamps: config.timestamps || '' });
|
|
889
|
+
completed++;
|
|
890
|
+
return result;
|
|
891
|
+
}, MAX_PARALLEL);
|
|
892
|
+
|
|
893
|
+
return mergeChunkResults(results, splitPoints);
|
|
894
|
+
} finally {
|
|
895
|
+
for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
820
899
|
function cleanupRecFile() {
|
|
821
900
|
// On success: delete the file (user got their transcript)
|
|
822
901
|
// On error: keep the file so user can press [r] to retry
|
|
@@ -1063,13 +1142,14 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize, onProgress
|
|
|
1063
1142
|
reject(ne);
|
|
1064
1143
|
});
|
|
1065
1144
|
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1145
|
+
if (signal) {
|
|
1146
|
+
if (signal.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
|
|
1147
|
+
signal.addEventListener('abort', () => {
|
|
1148
|
+
hreq.destroy();
|
|
1149
|
+
reject(signal.reason instanceof DOMException ? signal.reason
|
|
1150
|
+
: new DOMException('The operation was aborted', 'AbortError'));
|
|
1151
|
+
}, { once: true });
|
|
1152
|
+
}
|
|
1073
1153
|
|
|
1074
1154
|
// Write body in chunks to enable upload progress tracking
|
|
1075
1155
|
const CHUNK_SIZE = 256 * 1024;
|