npm - @steipete/summarize-core - Versions diffs - 0.11.0 → 0.12.0 - Mend

@steipete/summarize-core 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/dist/esm/transcription/whisper/core.js CHANGED Viewed

@@ -2,459 +2,385 @@ import { randomUUID } from "node:crypto";
 import { promises as fs } from "node:fs";
 import { tmpdir } from "node:os";
 import { basename, join } from "node:path";
-import { resolvePreferredOnnxModel, transcribeWithOnnxCli, transcribeWithOnnxCliFile, } from "../onnx-cli.js";
+import { transcribeWithOnnxCli, transcribeWithOnnxCliFile } from "../onnx-cli.js";
+import { transcribeChunkedFile } from "./chunking.js";
 import { DEFAULT_SEGMENT_SECONDS, MAX_OPENAI_UPLOAD_BYTES } from "./constants.js";
-import { transcribeWithFal } from "./fal.js";
-import { isFfmpegAvailable, runFfmpegSegment, transcodeBytesToMp3 } from "./ffmpeg.js";
+import { isFfmpegAvailable, transcodeBytesToMp3 } from "./ffmpeg.js";
 import { shouldRetryGroqViaFfmpeg, transcribeWithGroq } from "./groq.js";
-import { shouldRetryOpenAiViaFfmpeg, transcribeWithOpenAi } from "./openai.js";
-import { ensureWhisperFilenameExtension, formatBytes, readFirstBytes, wrapError } from "./utils.js";
+import { resolveOnnxModelPreference } from "./preferences.js";
+import { transcribeBytesWithRemoteFallbacks, transcribeFileWithRemoteFallbacks, transcribeOversizedBytesViaTempFile, } from "./remote.js";
+import { ensureWhisperFilenameExtension, formatBytes, wrapError } from "./utils.js";
 import { isWhisperCppReady, transcribeWithWhisperCppFile } from "./whisper-cpp.js";
-function resolveTranscriberPreference(env) {
-    const raw = env.SUMMARIZE_TRANSCRIBER?.trim().toLowerCase();
-    if (raw === "auto" || raw === "whisper" || raw === "parakeet" || raw === "canary")
-        return raw;
-    return "auto";
-}
-function resolveOnnxModelPreference(env) {
-    const preference = resolveTranscriberPreference(env);
-    if (preference === "parakeet" || preference === "canary")
-        return preference;
-    if (preference === "auto")
-        return resolvePreferredOnnxModel(env);
-    return null;
-}
-export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
+export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
     const notes = [];
-    // 1. Groq (cloud, free, fastest)
     let groqError = null;
     if (groqApiKey && !skipGroq) {
-        try {
-            const text = await transcribeWithGroq(bytes, mediaType, filename, groqApiKey);
-            if (text) {
-                return { text, provider: "groq", error: null, notes };
-            }
-            groqError = new Error("Groq transcription returned empty text");
-        }
-        catch (error) {
-            groqError = wrapError("Groq transcription failed", error);
-        }
-    }
-    if (!skipGroq && groqApiKey && groqError && shouldRetryGroqViaFfmpeg(groqError)) {
-        const canTranscode = await isFfmpegAvailable();
-        if (canTranscode) {
-            try {
-                notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
-                const mp3Bytes = await transcodeBytesToMp3(bytes);
-                const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
-                if (retried) {
-                    return { text: retried, provider: "groq", error: null, notes };
-                }
-                groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
-                bytes = mp3Bytes;
-                mediaType = "audio/mpeg";
-                filename = "audio.mp3";
-            }
-            catch (error) {
-                notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
-            }
-        }
-        else {
-            notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
+        const groqResult = await transcribeWithGroqFirst({
+            bytes,
+            mediaType,
+            filename,
+            groqApiKey,
+            notes,
+        });
+        bytes = groqResult.bytes;
+        mediaType = groqResult.mediaType;
+        filename = groqResult.filename;
+        if (groqResult.text) {
+            return { text: groqResult.text, provider: "groq", error: null, notes };
         }
+        groqError = groqResult.error;
     }
     if (groqError) {
-        notes.push(`Groq transcription failed; falling back to local/OpenAI: ${groqError.message}`);
+        notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${groqError.message}`);
     }
-    // 2. ONNX (local)
-    const onnxPreference = resolveOnnxModelPreference(env);
-    if (onnxPreference) {
-        const onnx = await transcribeWithOnnxCli({
-            model: onnxPreference,
+    const onnx = await transcribeWithLocalOnnx({
+        bytes,
+        mediaType,
+        filename,
+        totalDurationSeconds,
+        onProgress,
+        env,
+        notes,
+    });
+    if (onnx)
+        return onnx;
+    const local = await transcribeWithLocalWhisperBytes({
+        bytes,
+        mediaType,
+        filename,
+        totalDurationSeconds,
+        onProgress,
+        notes,
+    });
+    if (local)
+        return local;
+    return await transcribeBytesWithRemoteFallbacks({
+        bytes,
+        mediaType,
+        filename,
+        notes,
+        groqApiKey,
+        groqError,
+        assemblyaiApiKey,
+        geminiApiKey,
+        openaiApiKey,
+        falApiKey,
+        env,
+        onProgress,
+        transcribeOversizedBytesWithChunking: ({ bytes, mediaType, filename, onProgress }) => transcribeOversizedBytesViaTempFile({
             bytes,
             mediaType,
             filename,
+            onProgress,
+            transcribeFile: ({ filePath, mediaType, filename, onProgress }) => transcribeMediaFileWithWhisper({
+                filePath,
+                mediaType,
+                filename,
+                groqApiKey,
+                assemblyaiApiKey,
+                geminiApiKey,
+                openaiApiKey,
+                falApiKey,
+                segmentSeconds: DEFAULT_SEGMENT_SECONDS,
+                onProgress,
+                env,
+            }),
+        }),
+    });
+}
+export async function transcribeMediaFileWithWhisper({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, segmentSeconds = DEFAULT_SEGMENT_SECONDS, totalDurationSeconds = null, onProgress = null, env = process.env, }) {
+    const notes = [];
+    let skipGroqInNestedCalls = false;
+    let groqError = null;
+    if (groqApiKey) {
+        skipGroqInNestedCalls = true;
+        const groqResult = await transcribeGroqFileFirst({
+            filePath,
+            mediaType,
+            filename,
+            groqApiKey,
+            assemblyaiApiKey,
+            geminiApiKey,
+            openaiApiKey,
+            falApiKey,
+            segmentSeconds,
             totalDurationSeconds,
             onProgress,
             env,
-        });
-        if (onnx.text) {
-            if (onnx.notes.length > 0)
-                notes.push(...onnx.notes);
-            return { ...onnx, notes };
-        }
-        if (onnx.notes.length > 0)
-            notes.push(...onnx.notes);
-        if (onnx.error) {
-            notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
-        }
-    }
-    // 3. whisper.cpp (local)
-    const localReady = await isWhisperCppReady();
-    let local = null;
-    if (localReady) {
-        const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
-        const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
-        try {
-            await fs.writeFile(tempFile, bytes);
-            try {
-                local = await transcribeWithWhisperCppFile({
-                    filePath: tempFile,
-                    mediaType,
-                    totalDurationSeconds,
-                    onProgress,
-                });
-            }
-            catch (error) {
-                local = {
-                    text: null,
-                    provider: "whisper.cpp",
-                    error: wrapError("whisper.cpp failed", error),
-                    notes: [],
-                };
-            }
-            if (local.text) {
-                if (local.notes.length > 0)
-                    notes.push(...local.notes);
-                return { ...local, notes };
-            }
-            if (local.notes.length > 0)
-                notes.push(...local.notes);
-            if (local.error) {
-                notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
-            }
-        }
-        finally {
-            await fs.unlink(tempFile).catch(() => { });
-        }
-    }
-    // 4. OpenAI / FAL (cloud fallbacks)
-    if (!groqApiKey && !openaiApiKey && !falApiKey) {
-        return {
-            text: null,
-            provider: null,
-            error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
             notes,
-        };
+        });
+        if (groqResult.text)
+            return groqResult;
+        groqError = groqResult.error;
     }
-    if (openaiApiKey && bytes.byteLength > MAX_OPENAI_UPLOAD_BYTES) {
-        const canChunk = await isFfmpegAvailable();
-        if (canChunk) {
-            const tempFile = join(tmpdir(), `summarize-whisper-${randomUUID()}`);
-            try {
-                await fs.writeFile(tempFile, bytes);
-                const chunked = await transcribeMediaFileWithWhisper({
-                    filePath: tempFile,
-                    mediaType,
-                    filename,
-                    groqApiKey,
-                    openaiApiKey,
-                    falApiKey,
-                    segmentSeconds: DEFAULT_SEGMENT_SECONDS,
-                    onProgress,
-                    env,
-                });
-                return chunked;
-            }
-            finally {
-                await fs.unlink(tempFile).catch(() => { });
-            }
-        }
-        notes.push(`Media too large for Whisper upload (${formatBytes(bytes.byteLength)}); transcribing first ${formatBytes(MAX_OPENAI_UPLOAD_BYTES)} only (install ffmpeg for full transcription)`);
-        bytes = bytes.slice(0, MAX_OPENAI_UPLOAD_BYTES);
+    const onnx = await transcribeWithLocalOnnxFile({
+        filePath,
+        mediaType,
+        totalDurationSeconds,
+        onProgress,
+        env,
+        notes,
+    });
+    if (onnx)
+        return onnx;
+    const local = await transcribeWithLocalWhisperFile({
+        filePath,
+        mediaType,
+        totalDurationSeconds,
+        onProgress,
+        notes,
+    });
+    if (local)
+        return local;
+    return await transcribeFileWithRemoteFallbacks({
+        filePath,
+        mediaType,
+        filename,
+        notes,
+        groqApiKey,
+        groqError,
+        assemblyaiApiKey,
+        geminiApiKey,
+        openaiApiKey,
+        falApiKey,
+        env,
+        totalDurationSeconds,
+        onProgress,
+        transcribeChunkedFile: ({ filePath, segmentSeconds, totalDurationSeconds, onProgress }) => transcribeChunkedFile({
+            filePath,
+            segmentSeconds,
+            totalDurationSeconds,
+            onProgress,
+            transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
+                bytes,
+                mediaType: "audio/mpeg",
+                filename,
+                groqApiKey,
+                skipGroq: skipGroqInNestedCalls,
+                assemblyaiApiKey,
+                geminiApiKey,
+                openaiApiKey,
+                falApiKey,
+                env,
+            }),
+        }),
+    });
+}
+async function transcribeWithGroqFirst({ bytes, mediaType, filename, groqApiKey, notes, }) {
+    let groqError = null;
+    try {
+        const text = await transcribeWithGroq(bytes, mediaType, filename, groqApiKey);
+        if (text)
+            return { text, error: null, bytes, mediaType, filename };
+        groqError = new Error("Groq transcription returned empty text");
     }
-    let openaiError = null;
-    if (openaiApiKey) {
-        try {
-            const text = await transcribeWithOpenAi(bytes, mediaType, filename, openaiApiKey, { env });
-            if (text) {
-                return { text, provider: "openai", error: null, notes };
-            }
-            openaiError = new Error("OpenAI transcription returned empty text");
-        }
-        catch (error) {
-            openaiError = wrapError("OpenAI transcription failed", error);
-        }
+    catch (error) {
+        groqError = wrapError("Groq transcription failed", error);
     }
-    if (openaiApiKey && openaiError && shouldRetryOpenAiViaFfmpeg(openaiError)) {
+    if (groqError && shouldRetryGroqViaFfmpeg(groqError)) {
         const canTranscode = await isFfmpegAvailable();
         if (canTranscode) {
             try {
-                // Some providers hand out containers/codecs Whisper rejects. Transcoding to a small mono MP3
-                // is the most reliable cross-format fallback (and also reduces upload size).
-                notes.push("OpenAI could not decode media; transcoding via ffmpeg and retrying");
+                notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
                 const mp3Bytes = await transcodeBytesToMp3(bytes);
-                const retried = await transcribeWithOpenAi(mp3Bytes, "audio/mpeg", "audio.mp3", openaiApiKey, { env });
+                const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
                 if (retried) {
-                    return { text: retried, provider: "openai", error: null, notes };
+                    return {
+                        text: retried,
+                        error: null,
+                        bytes: mp3Bytes,
+                        mediaType: "audio/mpeg",
+                        filename: "audio.mp3",
+                    };
                 }
-                openaiError = new Error("OpenAI transcription returned empty text after ffmpeg transcode");
+                groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
                 bytes = mp3Bytes;
                 mediaType = "audio/mpeg";
                 filename = "audio.mp3";
             }
             catch (error) {
-                notes.push(`ffmpeg transcode failed; cannot retry OpenAI decode error: ${error instanceof Error ? error.message : String(error)}`);
+                notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
             }
         }
         else {
-            notes.push("OpenAI could not decode media; install ffmpeg to enable transcoding retry");
+            notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
         }
     }
-    const canUseFal = Boolean(falApiKey) && mediaType.toLowerCase().startsWith("audio/");
-    if (openaiError && canUseFal) {
-        notes.push(`OpenAI transcription failed; falling back to FAL: ${openaiError.message}`);
-    }
-    if (falApiKey && !canUseFal) {
-        notes.push(`Skipping FAL transcription: unsupported mediaType ${mediaType}`);
-    }
-    if (falApiKey && canUseFal) {
+    return { text: null, error: groqError, bytes, mediaType, filename };
+}
+async function transcribeGroqFileFirst({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey, geminiApiKey, openaiApiKey, falApiKey, segmentSeconds, totalDurationSeconds, onProgress, env, notes, }) {
+    const stat = await fs.stat(filePath);
+    if (stat.size <= MAX_OPENAI_UPLOAD_BYTES) {
+        const fileBytes = new Uint8Array(await fs.readFile(filePath));
         try {
-            const text = await transcribeWithFal(bytes, mediaType, falApiKey);
-            if (text) {
-                return { text, provider: "fal", error: null, notes };
-            }
-            return {
-                text: null,
-                provider: "fal",
-                error: new Error("FAL transcription returned empty text"),
-                notes,
-            };
+            const text = await transcribeWithGroq(fileBytes, mediaType, filename, groqApiKey);
+            if (text)
+                return { text, provider: "groq", error: null, notes };
+            const error = new Error("Groq transcription returned empty text");
+            notes.push("Groq transcription returned empty text; falling back to local/AssemblyAI/Gemini/OpenAI");
+            return { text: null, provider: "groq", error, notes };
         }
         catch (error) {
-            return {
-                text: null,
-                provider: "fal",
-                error: wrapError("FAL transcription failed", error),
-                notes,
-            };
+            const wrapped = wrapError("Groq transcription failed", error);
+            notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
+            return { text: null, provider: "groq", error: wrapped, notes };
         }
     }
-    const terminalError = openaiError ?? groqError ?? new Error("No transcription providers available");
-    const terminalProvider = openaiError
-        ? "openai"
-        : groqError
-            ? "groq"
-            : openaiApiKey
-                ? "openai"
-                : null;
-    return { text: null, provider: terminalProvider, error: terminalError, notes };
+    const canChunk = await isFfmpegAvailable();
+    if (!canChunk) {
+        const error = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
+        notes.push(error.message);
+        return { text: null, provider: "groq", error, notes };
+    }
+    const chunked = await transcribeChunkedFile({
+        filePath,
+        segmentSeconds,
+        totalDurationSeconds,
+        onProgress,
+        transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
+            bytes,
+            mediaType: "audio/mpeg",
+            filename,
+            groqApiKey,
+            assemblyaiApiKey,
+            geminiApiKey,
+            openaiApiKey,
+            falApiKey,
+            env,
+        }),
+    });
+    if (chunked.notes.length > 0)
+        notes.push(...chunked.notes);
+    if (chunked.text)
+        return { ...chunked, notes };
+    const error = chunked.error ?? new Error("Groq chunked transcription failed");
+    notes.push(`Groq chunked transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error.message}`);
+    return { text: null, provider: "groq", error, notes };
 }
-export async function transcribeMediaFileWithWhisper({ filePath, mediaType, filename, groqApiKey, openaiApiKey, falApiKey, segmentSeconds = DEFAULT_SEGMENT_SECONDS, totalDurationSeconds = null, onProgress = null, env = process.env, }) {
-    const notes = [];
-    const stat = await fs.stat(filePath);
-    let skipGroqInNestedCalls = false;
-    let groqError = null;
-    // 1. Groq (cloud, free, fastest) — try first even for file-based transcription
-    if (groqApiKey) {
-        skipGroqInNestedCalls = true;
-        if (stat.size <= MAX_OPENAI_UPLOAD_BYTES) {
-            const fileBytes = new Uint8Array(await fs.readFile(filePath));
-            try {
-                const text = await transcribeWithGroq(fileBytes, mediaType, filename, groqApiKey);
-                if (text) {
-                    return { text, provider: "groq", error: null, notes };
-                }
-                groqError = new Error("Groq transcription returned empty text");
-                notes.push("Groq transcription returned empty text; falling back to local/OpenAI");
-            }
-            catch (error) {
-                groqError = wrapError("Groq transcription failed", error);
-                notes.push(`Groq transcription failed; falling back to local/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
-            }
-        }
-        else {
-            groqError = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
-            notes.push(groqError.message);
-        }
+async function transcribeWithLocalOnnx({ bytes, mediaType, filename, totalDurationSeconds, onProgress, env, notes, }) {
+    const onnxPreference = resolveOnnxModelPreference(env);
+    if (!onnxPreference)
+        return null;
+    const onnx = await transcribeWithOnnxCli({
+        model: onnxPreference,
+        bytes,
+        mediaType,
+        filename,
+        totalDurationSeconds,
+        onProgress,
+        env,
+    });
+    if (onnx.text) {
+        if (onnx.notes.length > 0)
+            notes.push(...onnx.notes);
+        return { ...onnx, notes };
     }
-    // 2. ONNX (local)
+    if (onnx.notes.length > 0)
+        notes.push(...onnx.notes);
+    if (onnx.error) {
+        notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
+    }
+    return null;
+}
+async function transcribeWithLocalOnnxFile({ filePath, mediaType, totalDurationSeconds, onProgress, env, notes, }) {
     const onnxPreference = resolveOnnxModelPreference(env);
-    if (onnxPreference) {
-        onProgress?.({
-            partIndex: null,
-            parts: null,
-            processedDurationSeconds: null,
-            totalDurationSeconds,
-        });
-        const onnx = await transcribeWithOnnxCliFile({
-            model: onnxPreference,
-            filePath,
-            mediaType,
-            totalDurationSeconds,
-            onProgress,
-            env,
-        });
-        if (onnx.text) {
-            if (onnx.notes.length > 0)
-                notes.push(...onnx.notes);
-            return { ...onnx, notes };
-        }
+    if (!onnxPreference)
+        return null;
+    onProgress?.({
+        partIndex: null,
+        parts: null,
+        processedDurationSeconds: null,
+        totalDurationSeconds,
+    });
+    const onnx = await transcribeWithOnnxCliFile({
+        model: onnxPreference,
+        filePath,
+        mediaType,
+        totalDurationSeconds,
+        onProgress,
+        env,
+    });
+    if (onnx.text) {
         if (onnx.notes.length > 0)
             notes.push(...onnx.notes);
-        if (onnx.error) {
-            notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
-        }
+        return { ...onnx, notes };
     }
-    // 3. whisper.cpp (local)
+    if (onnx.notes.length > 0)
+        notes.push(...onnx.notes);
+    if (onnx.error) {
+        notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
+    }
+    return null;
+}
+async function transcribeWithLocalWhisperBytes({ bytes, mediaType, filename, totalDurationSeconds, onProgress, notes, }) {
     const localReady = await isWhisperCppReady();
-    let local = null;
-    if (localReady) {
-        onProgress?.({
-            partIndex: null,
-            parts: null,
-            processedDurationSeconds: null,
+    if (!localReady)
+        return null;
+    const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
+    const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
+    try {
+        await fs.writeFile(tempFile, bytes);
+        const result = await safeTranscribeWithWhisperCppFile({
+            filePath: tempFile,
+            mediaType,
             totalDurationSeconds,
+            onProgress,
         });
-        try {
-            local = await transcribeWithWhisperCppFile({
-                filePath,
-                mediaType,
-                totalDurationSeconds,
-                onProgress,
-            });
+        if (result.text) {
+            if (result.notes.length > 0)
+                notes.push(...result.notes);
+            return { ...result, notes };
         }
-        catch (error) {
-            local = {
-                text: null,
-                provider: "whisper.cpp",
-                error: wrapError("whisper.cpp failed", error),
-                notes: [],
-            };
-        }
-        if (local.text) {
-            if (local.notes.length > 0)
-                notes.push(...local.notes);
-            return { ...local, notes };
-        }
-        if (local.notes.length > 0)
-            notes.push(...local.notes);
-        if (local.error) {
-            notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
+        if (result.notes.length > 0)
+            notes.push(...result.notes);
+        if (result.error) {
+            notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
         }
+        return null;
     }
-    // 4. OpenAI / FAL (cloud fallbacks)
-    if (!openaiApiKey && !falApiKey) {
-        if (groqError) {
-            return {
-                text: null,
-                provider: "groq",
-                error: groqError,
-                notes,
-            };
-        }
-        return {
-            text: null,
-            provider: null,
-            error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
-            notes,
-        };
-    }
-    if (openaiApiKey && stat.size > MAX_OPENAI_UPLOAD_BYTES) {
-        const canChunk = await isFfmpegAvailable();
-        if (!canChunk) {
-            notes.push(`Media too large for Whisper upload (${formatBytes(stat.size)}); install ffmpeg to enable chunked transcription`);
-            const head = await readFirstBytes(filePath, MAX_OPENAI_UPLOAD_BYTES);
-            const partial = await transcribeMediaWithWhisper({
-                bytes: head,
-                mediaType,
-                filename,
-                groqApiKey,
-                skipGroq: skipGroqInNestedCalls,
-                openaiApiKey,
-                falApiKey,
-                env,
-            });
-            if (partial.notes.length > 0)
-                notes.push(...partial.notes);
-            return { ...partial, notes };
-        }
-        const dir = await fs.mkdtemp(join(tmpdir(), "summarize-whisper-segments-"));
-        try {
-            const pattern = join(dir, "part-%03d.mp3");
-            await runFfmpegSegment({
-                inputPath: filePath,
-                outputPattern: pattern,
-                segmentSeconds,
-            });
-            const files = (await fs.readdir(dir))
-                .filter((name) => name.startsWith("part-") && name.endsWith(".mp3"))
-                .sort((a, b) => a.localeCompare(b));
-            if (files.length === 0) {
-                return {
-                    text: null,
-                    provider: null,
-                    error: new Error("ffmpeg produced no audio segments"),
-                    notes,
-                };
-            }
-            notes.push(`ffmpeg chunked media into ${files.length} parts (${segmentSeconds}s each)`);
-            onProgress?.({
-                partIndex: null,
-                parts: files.length,
-                processedDurationSeconds: null,
-                totalDurationSeconds,
-            });
-            const parts = [];
-            let usedProvider = null;
-            for (const [index, name] of files.entries()) {
-                const segmentPath = join(dir, name);
-                const segmentBytes = new Uint8Array(await fs.readFile(segmentPath));
-                const result = await transcribeMediaWithWhisper({
-                    bytes: segmentBytes,
-                    mediaType: "audio/mpeg",
-                    filename: name,
-                    groqApiKey,
-                    skipGroq: skipGroqInNestedCalls,
-                    openaiApiKey,
-                    falApiKey,
-                    onProgress: null,
-                    env,
-                });
-                if (!usedProvider && result.provider)
-                    usedProvider = result.provider;
-                if (result.error && !result.text) {
-                    return { text: null, provider: usedProvider, error: result.error, notes };
-                }
-                if (result.text)
-                    parts.push(result.text);
-                // Coarse but useful: update based on part boundaries. Duration is best-effort (RSS hints or
-                // ffprobe); the per-part time is stable enough to make the spinner feel alive.
-                const processedSeconds = Math.max(0, (index + 1) * segmentSeconds);
-                onProgress?.({
-                    partIndex: index + 1,
-                    parts: files.length,
-                    processedDurationSeconds: typeof totalDurationSeconds === "number" && totalDurationSeconds > 0
-                        ? Math.min(processedSeconds, totalDurationSeconds)
-                        : null,
-                    totalDurationSeconds,
-                });
-            }
-            return { text: parts.join("\n\n"), provider: usedProvider, error: null, notes };
-        }
-        finally {
-            await fs.rm(dir, { recursive: true, force: true }).catch(() => { });
-        }
+    finally {
+        await fs.unlink(tempFile).catch(() => { });
     }
-    const bytes = new Uint8Array(await fs.readFile(filePath));
+}
+async function transcribeWithLocalWhisperFile({ filePath, mediaType, totalDurationSeconds, onProgress, notes, }) {
+    const localReady = await isWhisperCppReady();
+    if (!localReady)
+        return null;
     onProgress?.({
         partIndex: null,
         parts: null,
         processedDurationSeconds: null,
         totalDurationSeconds,
     });
-    const result = await transcribeMediaWithWhisper({
-        bytes,
+    const result = await safeTranscribeWithWhisperCppFile({
+        filePath,
         mediaType,
-        filename,
-        groqApiKey,
-        skipGroq: skipGroqInNestedCalls,
-        openaiApiKey,
-        falApiKey,
-        env,
+        totalDurationSeconds,
+        onProgress,
     });
+    if (result.text) {
+        if (result.notes.length > 0)
+            notes.push(...result.notes);
+        return { ...result, notes };
+    }
     if (result.notes.length > 0)
         notes.push(...result.notes);
-    return { ...result, notes };
+    if (result.error) {
+        notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
+    }
+    return null;
+}
+async function safeTranscribeWithWhisperCppFile(args) {
+    try {
+        return await transcribeWithWhisperCppFile(args);
+    }
+    catch (error) {
+        return {
+            text: null,
+            provider: "whisper.cpp",
+            error: wrapError("whisper.cpp failed", error),
+            notes: [],
+        };
+    }
 }
 //# sourceMappingURL=core.js.map