dikt 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.mjs +89 -18
  2. package/package.json +1 -1
package/cli.mjs CHANGED
@@ -49,7 +49,7 @@ function formatFileSize(bytes) {
49
49
 
50
50
  // ── Constants ─────────────────────────────────────────────────────────────────
51
51
 
52
- const VERSION = '1.4.0';
52
+ const VERSION = '1.4.1';
53
53
  const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
54
54
  const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
55
55
  const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
@@ -786,22 +786,32 @@ async function transcribe(wavPath) {
786
786
  const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
787
787
 
788
788
  const t0 = Date.now();
789
- const result = await callTranscribeAPI(file, {
790
- signal: AbortSignal.timeout(30_000),
791
- timestamps: config.timestamps || '',
792
- diarize: config.diarize || false,
793
- });
794
- state.latency = Date.now() - t0;
795
789
 
796
- const text = result.text;
790
+ // For long recordings, chunk and transcribe in parallel (if ffmpeg available)
791
+ const canChunk = ffmpegAvailable() && !config.diarize && state.duration > CHUNK_MIN_SEC;
792
+ let text, segments;
793
+
794
+ if (canChunk) {
795
+ const chunkResult = await transcribeChunkedWav(wavPath, state.duration);
796
+ text = chunkResult.text;
797
+ segments = chunkResult.segments;
798
+ } else {
799
+ const result = await callTranscribeAPI(file, {
800
+ timestamps: config.timestamps || '',
801
+ diarize: config.diarize || false,
802
+ });
803
+ text = result.text;
804
+ segments = result.segments;
805
+ }
806
+ state.latency = Date.now() - t0;
797
807
 
798
808
  if (!text) {
799
809
  state.mode = 'error';
800
810
  state.error = 'No speech detected';
801
811
  } else {
802
812
  // Format with speaker labels if diarization is active
803
- if (config.diarize && result.segments) {
804
- state.transcript = formatDiarizedText(result.segments, { color: true });
813
+ if (config.diarize && segments) {
814
+ state.transcript = formatDiarizedText(segments, { color: true });
805
815
  } else {
806
816
  state.transcript = text;
807
817
  }
@@ -815,7 +825,7 @@ async function transcribe(wavPath) {
815
825
  }
816
826
  } catch (err) {
817
827
  state.mode = 'error';
818
- let msg = err.name === 'TimeoutError' ? 'Transcription timed out' : err.message;
828
+ let msg = err.message;
819
829
  if (err.status === 401) msg += ' — press [s] to reconfigure';
820
830
  state.error = msg;
821
831
  } finally {
@@ -826,6 +836,66 @@ async function transcribe(wavPath) {
826
836
  }
827
837
  }
828
838
 
839
+ async function transcribeChunkedWav(wavPath, durationSec) {
840
+ const tempFiles = [];
841
+ try {
842
+ const numTargetChunks = Math.ceil(durationSec / TARGET_CHUNK_SEC);
843
+ const splitPoints = [0];
844
+ for (let i = 1; i < numTargetChunks; i++) {
845
+ splitPoints.push(await findSilenceSplitPoint(wavPath, i * TARGET_CHUNK_SEC));
846
+ }
847
+ splitPoints.push(durationSec);
848
+
849
+ // Merge tiny trailing chunks into the previous one
850
+ for (let i = splitPoints.length - 2; i > 0; i--) {
851
+ if (splitPoints[i + 1] - splitPoints[i] < MIN_CHUNK_SEC) {
852
+ splitPoints.splice(i, 1);
853
+ }
854
+ }
855
+
856
+ const numChunks = splitPoints.length - 1;
857
+ const chunkBase = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}`);
858
+ const uploadPaths = [];
859
+
860
+ for (let i = 0; i < numChunks; i++) {
861
+ const start = splitPoints[i];
862
+ const dur = splitPoints[i + 1] - start;
863
+ const oggPath = `${chunkBase}-${i}.ogg`;
864
+ try {
865
+ await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-c:a', 'libopus', '-b:a', '48k', '-y', '-v', 'quiet', oggPath], { stdio: 'pipe' });
866
+ if (fs.statSync(oggPath).size > 0) {
867
+ tempFiles.push(oggPath);
868
+ uploadPaths.push(oggPath);
869
+ } else { throw new Error('empty output'); }
870
+ } catch {
871
+ try { fs.unlinkSync(oggPath); } catch {}
872
+ const chunkWav = `${chunkBase}-${i}.wav`;
873
+ await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-y', '-v', 'quiet', chunkWav], { stdio: 'pipe' });
874
+ if (!fs.statSync(chunkWav).size) throw new Error(`ffmpeg produced empty chunk ${i}`);
875
+ tempFiles.push(chunkWav);
876
+ uploadPaths.push(chunkWav);
877
+ }
878
+ }
879
+
880
+ // Transcribe chunks in parallel
881
+ let completed = 0;
882
+ const chunkIndices = Array.from({ length: numChunks }, (_, i) => i);
883
+ const results = await parallelMap(chunkIndices, async (i) => {
884
+ const uploadPath = uploadPaths[i];
885
+ const ext = path.extname(uploadPath).slice(1);
886
+ const blob = await fs.openAsBlob(uploadPath);
887
+ const file = new File([blob], `chunk-${i}.${ext}`, { type: MIME_TYPES[ext] || 'audio/wav' });
888
+ const result = await callTranscribeAPI(file, { timestamps: config.timestamps || '' });
889
+ completed++;
890
+ return result;
891
+ }, MAX_PARALLEL);
892
+
893
+ return mergeChunkResults(results, splitPoints);
894
+ } finally {
895
+ for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
896
+ }
897
+ }
898
+
829
899
  function cleanupRecFile() {
830
900
  // On success: delete the file (user got their transcript)
831
901
  // On error: keep the file so user can press [r] to retry
@@ -1072,13 +1142,14 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize, onProgress
1072
1142
  reject(ne);
1073
1143
  });
1074
1144
 
1075
- const abortSig = signal || AbortSignal.timeout(30_000);
1076
- if (abortSig.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
1077
- abortSig.addEventListener('abort', () => {
1078
- hreq.destroy();
1079
- reject(abortSig.reason instanceof DOMException ? abortSig.reason
1080
- : new DOMException('The operation was aborted', 'AbortError'));
1081
- }, { once: true });
1145
+ if (signal) {
1146
+ if (signal.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
1147
+ signal.addEventListener('abort', () => {
1148
+ hreq.destroy();
1149
+ reject(signal.reason instanceof DOMException ? signal.reason
1150
+ : new DOMException('The operation was aborted', 'AbortError'));
1151
+ }, { once: true });
1152
+ }
1082
1153
 
1083
1154
  // Write body in chunks to enable upload progress tracking
1084
1155
  const CHUNK_SIZE = 256 * 1024;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dikt",
3
- "version": "1.4.0",
3
+ "version": "1.4.1",
4
4
  "description": "Voice dictation for the terminal.",
5
5
  "type": "module",
6
6
  "bin": {