dikt 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.mjs +89 -18
- package/package.json +1 -1
package/cli.mjs
CHANGED
|
@@ -49,7 +49,7 @@ function formatFileSize(bytes) {
|
|
|
49
49
|
|
|
50
50
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
51
51
|
|
|
52
|
-
const VERSION = '1.4.
|
|
52
|
+
const VERSION = '1.4.1';
|
|
53
53
|
const CONFIG_BASE = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), '.config');
|
|
54
54
|
const CONFIG_DIR = path.join(CONFIG_BASE, 'dikt');
|
|
55
55
|
const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
|
|
@@ -786,22 +786,32 @@ async function transcribe(wavPath) {
|
|
|
786
786
|
const file = new File([blob], 'recording.wav', { type: 'audio/wav' });
|
|
787
787
|
|
|
788
788
|
const t0 = Date.now();
|
|
789
|
-
const result = await callTranscribeAPI(file, {
|
|
790
|
-
signal: AbortSignal.timeout(30_000),
|
|
791
|
-
timestamps: config.timestamps || '',
|
|
792
|
-
diarize: config.diarize || false,
|
|
793
|
-
});
|
|
794
|
-
state.latency = Date.now() - t0;
|
|
795
789
|
|
|
796
|
-
|
|
790
|
+
// For long recordings, chunk and transcribe in parallel (if ffmpeg available)
|
|
791
|
+
const canChunk = ffmpegAvailable() && !config.diarize && state.duration > CHUNK_MIN_SEC;
|
|
792
|
+
let text, segments;
|
|
793
|
+
|
|
794
|
+
if (canChunk) {
|
|
795
|
+
const chunkResult = await transcribeChunkedWav(wavPath, state.duration);
|
|
796
|
+
text = chunkResult.text;
|
|
797
|
+
segments = chunkResult.segments;
|
|
798
|
+
} else {
|
|
799
|
+
const result = await callTranscribeAPI(file, {
|
|
800
|
+
timestamps: config.timestamps || '',
|
|
801
|
+
diarize: config.diarize || false,
|
|
802
|
+
});
|
|
803
|
+
text = result.text;
|
|
804
|
+
segments = result.segments;
|
|
805
|
+
}
|
|
806
|
+
state.latency = Date.now() - t0;
|
|
797
807
|
|
|
798
808
|
if (!text) {
|
|
799
809
|
state.mode = 'error';
|
|
800
810
|
state.error = 'No speech detected';
|
|
801
811
|
} else {
|
|
802
812
|
// Format with speaker labels if diarization is active
|
|
803
|
-
if (config.diarize &&
|
|
804
|
-
state.transcript = formatDiarizedText(
|
|
813
|
+
if (config.diarize && segments) {
|
|
814
|
+
state.transcript = formatDiarizedText(segments, { color: true });
|
|
805
815
|
} else {
|
|
806
816
|
state.transcript = text;
|
|
807
817
|
}
|
|
@@ -815,7 +825,7 @@ async function transcribe(wavPath) {
|
|
|
815
825
|
}
|
|
816
826
|
} catch (err) {
|
|
817
827
|
state.mode = 'error';
|
|
818
|
-
let msg = err.
|
|
828
|
+
let msg = err.message;
|
|
819
829
|
if (err.status === 401) msg += ' — press [s] to reconfigure';
|
|
820
830
|
state.error = msg;
|
|
821
831
|
} finally {
|
|
@@ -826,6 +836,66 @@ async function transcribe(wavPath) {
|
|
|
826
836
|
}
|
|
827
837
|
}
|
|
828
838
|
|
|
839
|
+
async function transcribeChunkedWav(wavPath, durationSec) {
|
|
840
|
+
const tempFiles = [];
|
|
841
|
+
try {
|
|
842
|
+
const numTargetChunks = Math.ceil(durationSec / TARGET_CHUNK_SEC);
|
|
843
|
+
const splitPoints = [0];
|
|
844
|
+
for (let i = 1; i < numTargetChunks; i++) {
|
|
845
|
+
splitPoints.push(await findSilenceSplitPoint(wavPath, i * TARGET_CHUNK_SEC));
|
|
846
|
+
}
|
|
847
|
+
splitPoints.push(durationSec);
|
|
848
|
+
|
|
849
|
+
// Merge tiny trailing chunks into the previous one
|
|
850
|
+
for (let i = splitPoints.length - 2; i > 0; i--) {
|
|
851
|
+
if (splitPoints[i + 1] - splitPoints[i] < MIN_CHUNK_SEC) {
|
|
852
|
+
splitPoints.splice(i, 1);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
const numChunks = splitPoints.length - 1;
|
|
857
|
+
const chunkBase = path.join(os.tmpdir(), `dikt-${process.pid}-${Date.now()}`);
|
|
858
|
+
const uploadPaths = [];
|
|
859
|
+
|
|
860
|
+
for (let i = 0; i < numChunks; i++) {
|
|
861
|
+
const start = splitPoints[i];
|
|
862
|
+
const dur = splitPoints[i + 1] - start;
|
|
863
|
+
const oggPath = `${chunkBase}-${i}.ogg`;
|
|
864
|
+
try {
|
|
865
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-c:a', 'libopus', '-b:a', '48k', '-y', '-v', 'quiet', oggPath], { stdio: 'pipe' });
|
|
866
|
+
if (fs.statSync(oggPath).size > 0) {
|
|
867
|
+
tempFiles.push(oggPath);
|
|
868
|
+
uploadPaths.push(oggPath);
|
|
869
|
+
} else { throw new Error('empty output'); }
|
|
870
|
+
} catch {
|
|
871
|
+
try { fs.unlinkSync(oggPath); } catch {}
|
|
872
|
+
const chunkWav = `${chunkBase}-${i}.wav`;
|
|
873
|
+
await execFileAsync('ffmpeg', ['-ss', String(start), '-t', String(dur), '-i', wavPath, '-y', '-v', 'quiet', chunkWav], { stdio: 'pipe' });
|
|
874
|
+
if (!fs.statSync(chunkWav).size) throw new Error(`ffmpeg produced empty chunk ${i}`);
|
|
875
|
+
tempFiles.push(chunkWav);
|
|
876
|
+
uploadPaths.push(chunkWav);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// Transcribe chunks in parallel
|
|
881
|
+
let completed = 0;
|
|
882
|
+
const chunkIndices = Array.from({ length: numChunks }, (_, i) => i);
|
|
883
|
+
const results = await parallelMap(chunkIndices, async (i) => {
|
|
884
|
+
const uploadPath = uploadPaths[i];
|
|
885
|
+
const ext = path.extname(uploadPath).slice(1);
|
|
886
|
+
const blob = await fs.openAsBlob(uploadPath);
|
|
887
|
+
const file = new File([blob], `chunk-${i}.${ext}`, { type: MIME_TYPES[ext] || 'audio/wav' });
|
|
888
|
+
const result = await callTranscribeAPI(file, { timestamps: config.timestamps || '' });
|
|
889
|
+
completed++;
|
|
890
|
+
return result;
|
|
891
|
+
}, MAX_PARALLEL);
|
|
892
|
+
|
|
893
|
+
return mergeChunkResults(results, splitPoints);
|
|
894
|
+
} finally {
|
|
895
|
+
for (const f of tempFiles) { try { fs.unlinkSync(f); } catch {} }
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
829
899
|
function cleanupRecFile() {
|
|
830
900
|
// On success: delete the file (user got their transcript)
|
|
831
901
|
// On error: keep the file so user can press [r] to retry
|
|
@@ -1072,13 +1142,14 @@ async function callTranscribeAPI(file, { signal, timestamps, diarize, onProgress
|
|
|
1072
1142
|
reject(ne);
|
|
1073
1143
|
});
|
|
1074
1144
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1145
|
+
if (signal) {
|
|
1146
|
+
if (signal.aborted) { hreq.destroy(); reject(new DOMException('The operation was aborted', 'AbortError')); return; }
|
|
1147
|
+
signal.addEventListener('abort', () => {
|
|
1148
|
+
hreq.destroy();
|
|
1149
|
+
reject(signal.reason instanceof DOMException ? signal.reason
|
|
1150
|
+
: new DOMException('The operation was aborted', 'AbortError'));
|
|
1151
|
+
}, { once: true });
|
|
1152
|
+
}
|
|
1082
1153
|
|
|
1083
1154
|
// Write body in chunks to enable upload progress tracking
|
|
1084
1155
|
const CHUNK_SIZE = 256 * 1024;
|