dikt 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.mjs +101 -21
  2. package/package.json +1 -1
package/cli.mjs CHANGED
@@ -49,7 +49,7 @@ function formatFileSize(bytes) {
49
49
 
50
50
  // ── Constants ─────────────────────────────────────────────────────────────────
51
51
 
52
- const VERSION = '1.3.0';
52
+ const VERSION = '1.4.1';
53
53
  const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
54
54
  const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
55
55
  const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
@@ -458,13 +458,22 @@ function renderKeybar() {
458
458
  return ` ${DIM}[SPACE]${RESET} Record ${copyKey}${autoCopyKey}${histKey}${retryKey}`.trimEnd();
459
459
  }
460
460
 
461
+ function formatDuration(seconds) {
462
+ if (seconds < 60) return `${seconds.toFixed(1)}s`;
463
+ const m = Math.floor(seconds / 60);
464
+ const s = (seconds % 60).toFixed(1).padStart(4, '0');
465
+ if (m < 60) return `${m}m ${s}s`;
466
+ const h = Math.floor(m / 60);
467
+ const rm = String(m % 60).padStart(2, '0');
468
+ return `${h}h ${rm}m ${s}s`;
469
+ }
470
+
461
471
  function renderStatus() {
462
472
  switch (state.mode) {
463
473
  case 'idle':
464
474
  return ` ${GREY}● Idle${RESET}`;
465
475
  case 'recording': {
466
- const secs = state.duration.toFixed(1);
467
- return ` ${RED}${BOLD}● Recording${RESET} ${RED}${secs}s${RESET}`;
476
+ return ` ${RED}${BOLD}● Recording${RESET} ${RED}${formatDuration(state.duration)}${RESET}`;
468
477
  }
469
478
  case 'transcribing': {
470
479
  const sp = SPINNER[state.spinnerFrame % SPINNER.length];
@@ -558,7 +567,7 @@ function renderMeta() {
558
567
  const cost = (state.duration / 60 * COST_PER_MIN).toFixed(4);
559
568
  const latencyStr = state.latency ? `${(state.latency / 1000).toFixed(1)}s` : '—';
560
569
  const histLabel = state.historyIndex >= 0 ? ` · history ${state.historyIndex + 1}/${state.history.length}` : '';
561
- return ` ${DIM}${state.wordCount} words · ${state.duration.toFixed(1)}s · latency ${latencyStr} · $${cost}${histLabel}${RESET}`;
570
+ return ` ${DIM}${state.wordCount} words · ${formatDuration(state.duration)} · latency ${latencyStr} · $${cost}${histLabel}${RESET}`;
562
571
  }
563
572
 
564
573
  function renderHelp() {
@@ -777,22 +786,32 @@ async function transcribe(wavPath) {
777
786
  const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
778
787
 
779
788
  const t0 = Date.now();
780
- const result = await callTranscribeAPI(file, {
781
- signal: AbortSignal.timeout(30_000),
782
- timestamps: config.timestamps || '',
783
- diarize: config.diarize || false,
784
- });
785
- state.latency = Date.now() - t0;
786
789
 
787
- const text = result.text;
790
+ // For long recordings, chunk and transcribe in parallel (if ffmpeg available)
791
+ const canChunk = ffmpegAvailable() && !config.diarize && state.duration > CHUNK_MIN_SEC;
792
+ let text, segments;
793
+
794
+ if (canChunk) {
795
+ const chunkResult = await transcribeChunkedWav(wavPath, state.duration);
796
+ text = chunkResult.text;
797
+ segments = chunkResult.segments;
798
+ } else {
799
+ const result = await callTranscribeAPI(file, {
800
+ timestamps: config.timestamps || '',
801
+ diarize: config.diarize || false,
802
+ });
803
+ text = result.text;
804
+ segments = result.segments;
805
+ }
806
+ state.latency = Date.now() - t0;
788
807
 
789
808
  if (!text) {
790
809
  state.mode = 'error';
791
810
  state.error = 'No speech detected';
792
811
  } else {
793
812
  // Format with speaker labels if diarization is active
794
- if (config.diarize && result.segments) {
795
- state.transcript = formatDiarizedText(result.segments, { color: true });
813
+ if (config.diarize && segments) {
814
+ state.transcript = formatDiarizedText(segments, { color: true });
796
815
  } else {
797
816
  state.transcript = text;
798
817
  }
@@ -806,7 +825,7 @@ async function transcribe(wavPath) {
806
825
  }
807
826
  } catch (err) {
808
827
  state.mode = 'error';
809
- let msg = err.name === 'TimeoutError' ? 'Transcription timed out' : err.message;
828
+ let msg = err.message;
810
829
  if (err.status === 401) msg += ' — press [s] to reconfigure';
811
830
  state.error = msg;
812
831
  } finally {
@@ -817,6 +836,66 @@ async function transcribe(wavPath) {
817
836
  }
818
837
  }
819
838
 
839
+ async function transcribeChunkedWav(wavPath, durationSec) {
840
+ const tempFiles = [];
841
+ try {
842
+ const numTargetChunks = Math.ceil(durationSec / TARGET_CHUNK_SEC);
843
+ const splitPoints = [0];
844
+ for (let i = 1; i < numTargetChunks; i++) {
845
+ splitPoints.push(await findSilenceSplitPoint(wavPath, i * TARGET_CHUNK_SEC));
846
+ }
847
+ splitPoints.push(durationSec);
848
+
849
+ // Merge tiny trailing chunks into the previous one
850
+ for (let i = splitPoints.length - 2; i > 0; i--) {
851
+ if (splitPoints[i + 1] - splitPoints[i] < MIN_CHUNK_SEC) {
852
+ splitPoints.splice(i, 1);
853
+ }
854
+ }
855
+
856
+ const numChunks = splitPoints.length - 1;
857
+ const chunkBase = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}`);
858
+ const uploadPaths = [];
859
+
860
+ for (let i = 0; i < numChunks; i++) {
861
+ const start = splitPoints[i];
862
+ const dur = splitPoints[i + 1] - start;
863
+ const oggPath = `${chunkBase}-${i}.ogg`;
864
+ try {
865
+ await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-c:a', 'libopus', '-b:a', '48k', '-y', '-v', 'quiet', oggPath], { stdio: 'pipe' });
866
+ if (fs.statSync(oggPath).size > 0) {
867
+ tempFiles.push(oggPath);
868
+ uploadPaths.push(oggPath);
869
+ } else { throw new Error('empty output'); }
870
+ } catch {
871
+ try { fs.unlinkSync(oggPath); } catch {}
872
+ const chunkWav = `${chunkBase}-${i}.wav`;
873
+ await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-y', '-v', 'quiet', chunkWav], { stdio: 'pipe' });
874
+ if (!fs.statSync(chunkWav).size) throw new Error(`ffmpeg produced empty chunk ${i}`);
875
+ tempFiles.push(chunkWav);
876
+ uploadPaths.push(chunkWav);
877
+ }
878
+ }
879
+
880
+ // Transcribe chunks in parallel
881
+ let completed = 0;
882
+ const chunkIndices = Array.from({ length: numChunks }, (_, i) => i);
883
+ const results = await parallelMap(chunkIndices, async (i) => {
884
+ const uploadPath = uploadPaths[i];
885
+ const ext = path.extname(uploadPath).slice(1);
886
+ const blob = await fs.openAsBlob(uploadPath);
887
+ const file = new File([blob], `chunk-${i}.${ext}`, { type: MIME_TYPES[ext] || 'audio/wav' });
888
+ const result = await callTranscribeAPI(file, { timestamps: config.timestamps || '' });
889
+ completed++;
890
+ return result;
891
+ }, MAX_PARALLEL);
892
+
893
+ return mergeChunkResults(results, splitPoints);
894
+ } finally {
895
+ for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
896
+ }
897
+ }
898
+
820
899
  function cleanupRecFile() {
821
900
  // On success: delete the file (user got their transcript)
822
901
  // On error: keep the file so user can press [r] to retry
@@ -1063,13 +1142,14 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize, onProgress
1063
1142
  reject(ne);
1064
1143
  });
1065
1144
 
1066
- const abortSig = signal || AbortSignal.timeout(30_000);
1067
- if (abortSig.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
1068
- abortSig.addEventListener('abort', () => {
1069
- hreq.destroy();
1070
- reject(abortSig.reason instanceof DOMException ? abortSig.reason
1071
- : new DOMException('The operation was aborted', 'AbortError'));
1072
- }, { once: true });
1145
+ if (signal) {
1146
+ if (signal.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
1147
+ signal.addEventListener('abort', () => {
1148
+ hreq.destroy();
1149
+ reject(signal.reason instanceof DOMException ? signal.reason
1150
+ : new DOMException('The operation was aborted', 'AbortError'));
1151
+ }, { once: true });
1152
+ }
1073
1153
 
1074
1154
  // Write body in chunks to enable upload progress tracking
1075
1155
  const CHUNK_SIZE = 256 * 1024;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dikt",
3
- "version": "1.3.0",
3
+ "version": "1.4.1",
4
4
  "description": "Voice dictation for the terminal.",
5
5
  "type": "module",
6
6
  "bin": {