@steipete/summarize-core 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/content/bun.js +21 -0
- package/dist/esm/content/bun.js.map +1 -0
- package/dist/esm/content/index.js +1 -0
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +6 -0
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +19 -2
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +29 -12
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/transcript/index.js +2 -0
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +10 -11
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +9 -1
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js +157 -0
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js +123 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +2 -226
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +26 -155
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-capability.js +22 -0
- package/dist/esm/content/transcript/providers/transcription-capability.js.map +1 -0
- package/dist/esm/content/transcript/providers/transcription-start.js +40 -30
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/api.js +3 -2
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions-player.js +173 -0
- package/dist/esm/content/transcript/providers/youtube/captions-player.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js +8 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js +361 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +2 -557
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js +189 -0
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +7 -2
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +42 -194
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/transcription-config.js +24 -4
- package/dist/esm/content/transcript/transcription-config.js.map +1 -1
- package/dist/esm/content/url.js +3 -3
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/format.js +6 -0
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +27 -3
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js.map +1 -1
- package/dist/esm/transcription/whisper/assemblyai.js +132 -0
- package/dist/esm/transcription/whisper/assemblyai.js.map +1 -0
- package/dist/esm/transcription/whisper/chunking.js +64 -0
- package/dist/esm/transcription/whisper/chunking.js.map +1 -0
- package/dist/esm/transcription/whisper/cloud-providers.js +69 -0
- package/dist/esm/transcription/whisper/cloud-providers.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +316 -390
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/gemini.js +324 -0
- package/dist/esm/transcription/whisper/gemini.js.map +1 -0
- package/dist/esm/transcription/whisper/preferences.js +16 -0
- package/dist/esm/transcription/whisper/preferences.js.map +1 -0
- package/dist/esm/transcription/whisper/provider-setup.js +62 -0
- package/dist/esm/transcription/whisper/provider-setup.js.map +1 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js +189 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js.map +1 -0
- package/dist/esm/transcription/whisper/remote.js +220 -0
- package/dist/esm/transcription/whisper/remote.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/types/content/bun.d.ts +6 -0
- package/dist/types/content/index.d.ts +1 -0
- package/dist/types/content/link-preview/client.d.ts +3 -1
- package/dist/types/content/link-preview/content/fetcher.d.ts +1 -1
- package/dist/types/content/link-preview/content/html.d.ts +1 -1
- package/dist/types/content/link-preview/deps.d.ts +8 -2
- package/dist/types/content/link-preview/types.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +4 -2
- package/dist/types/content/transcript/providers/podcast/provider-flow.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/rss-feed.d.ts +15 -0
- package/dist/types/content/transcript/providers/podcast/rss-transcript.d.ts +12 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -24
- package/dist/types/content/transcript/providers/transcription-capability.d.ts +18 -0
- package/dist/types/content/transcript/providers/transcription-start.d.ts +10 -3
- package/dist/types/content/transcript/providers/youtube/captions-player.d.ts +12 -0
- package/dist/types/content/transcript/providers/youtube/captions-shared.d.ts +42 -0
- package/dist/types/content/transcript/providers/youtube/captions-transcript.d.ts +4 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +2 -19
- package/dist/types/content/transcript/providers/youtube/provider-flow.d.ts +34 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +4 -2
- package/dist/types/content/transcript/transcription-config.d.ts +6 -0
- package/dist/types/content/transcript/types.d.ts +1 -0
- package/dist/types/prompts/format.d.ts +1 -0
- package/dist/types/prompts/link-summary.d.ts +2 -1
- package/dist/types/transcription/whisper/assemblyai.d.ts +17 -0
- package/dist/types/transcription/whisper/chunking.d.ts +11 -0
- package/dist/types/transcription/whisper/cloud-providers.d.ts +22 -0
- package/dist/types/transcription/whisper/core.d.ts +12 -14
- package/dist/types/transcription/whisper/gemini.d.ts +14 -0
- package/dist/types/transcription/whisper/preferences.d.ts +4 -0
- package/dist/types/transcription/whisper/provider-setup.d.ts +30 -0
- package/dist/types/transcription/whisper/remote-provider-attempts.d.ts +51 -0
- package/dist/types/transcription/whisper/remote.d.ts +51 -0
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/package.json +9 -9
|
@@ -2,459 +2,385 @@ import { randomUUID } from "node:crypto";
|
|
|
2
2
|
import { promises as fs } from "node:fs";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { basename, join } from "node:path";
|
|
5
|
-
import {
|
|
5
|
+
import { transcribeWithOnnxCli, transcribeWithOnnxCliFile } from "../onnx-cli.js";
|
|
6
|
+
import { transcribeChunkedFile } from "./chunking.js";
|
|
6
7
|
import { DEFAULT_SEGMENT_SECONDS, MAX_OPENAI_UPLOAD_BYTES } from "./constants.js";
|
|
7
|
-
import {
|
|
8
|
-
import { isFfmpegAvailable, runFfmpegSegment, transcodeBytesToMp3 } from "./ffmpeg.js";
|
|
8
|
+
import { isFfmpegAvailable, transcodeBytesToMp3 } from "./ffmpeg.js";
|
|
9
9
|
import { shouldRetryGroqViaFfmpeg, transcribeWithGroq } from "./groq.js";
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import { resolveOnnxModelPreference } from "./preferences.js";
|
|
11
|
+
import { transcribeBytesWithRemoteFallbacks, transcribeFileWithRemoteFallbacks, transcribeOversizedBytesViaTempFile, } from "./remote.js";
|
|
12
|
+
import { ensureWhisperFilenameExtension, formatBytes, wrapError } from "./utils.js";
|
|
12
13
|
import { isWhisperCppReady, transcribeWithWhisperCppFile } from "./whisper-cpp.js";
|
|
13
|
-
function
|
|
14
|
-
const raw = env.SUMMARIZE_TRANSCRIBER?.trim().toLowerCase();
|
|
15
|
-
if (raw === "auto" || raw === "whisper" || raw === "parakeet" || raw === "canary")
|
|
16
|
-
return raw;
|
|
17
|
-
return "auto";
|
|
18
|
-
}
|
|
19
|
-
function resolveOnnxModelPreference(env) {
|
|
20
|
-
const preference = resolveTranscriberPreference(env);
|
|
21
|
-
if (preference === "parakeet" || preference === "canary")
|
|
22
|
-
return preference;
|
|
23
|
-
if (preference === "auto")
|
|
24
|
-
return resolvePreferredOnnxModel(env);
|
|
25
|
-
return null;
|
|
26
|
-
}
|
|
27
|
-
export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
|
|
14
|
+
export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
|
|
28
15
|
const notes = [];
|
|
29
|
-
// 1. Groq (cloud, free, fastest)
|
|
30
16
|
let groqError = null;
|
|
31
17
|
if (groqApiKey && !skipGroq) {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const canTranscode = await isFfmpegAvailable();
|
|
45
|
-
if (canTranscode) {
|
|
46
|
-
try {
|
|
47
|
-
notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
|
|
48
|
-
const mp3Bytes = await transcodeBytesToMp3(bytes);
|
|
49
|
-
const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
|
|
50
|
-
if (retried) {
|
|
51
|
-
return { text: retried, provider: "groq", error: null, notes };
|
|
52
|
-
}
|
|
53
|
-
groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
|
|
54
|
-
bytes = mp3Bytes;
|
|
55
|
-
mediaType = "audio/mpeg";
|
|
56
|
-
filename = "audio.mp3";
|
|
57
|
-
}
|
|
58
|
-
catch (error) {
|
|
59
|
-
notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
else {
|
|
63
|
-
notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
|
|
18
|
+
const groqResult = await transcribeWithGroqFirst({
|
|
19
|
+
bytes,
|
|
20
|
+
mediaType,
|
|
21
|
+
filename,
|
|
22
|
+
groqApiKey,
|
|
23
|
+
notes,
|
|
24
|
+
});
|
|
25
|
+
bytes = groqResult.bytes;
|
|
26
|
+
mediaType = groqResult.mediaType;
|
|
27
|
+
filename = groqResult.filename;
|
|
28
|
+
if (groqResult.text) {
|
|
29
|
+
return { text: groqResult.text, provider: "groq", error: null, notes };
|
|
64
30
|
}
|
|
31
|
+
groqError = groqResult.error;
|
|
65
32
|
}
|
|
66
33
|
if (groqError) {
|
|
67
|
-
notes.push(`Groq transcription failed; falling back to local/OpenAI: ${groqError.message}`);
|
|
34
|
+
notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${groqError.message}`);
|
|
68
35
|
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
const onnx = await transcribeWithLocalOnnx({
|
|
37
|
+
bytes,
|
|
38
|
+
mediaType,
|
|
39
|
+
filename,
|
|
40
|
+
totalDurationSeconds,
|
|
41
|
+
onProgress,
|
|
42
|
+
env,
|
|
43
|
+
notes,
|
|
44
|
+
});
|
|
45
|
+
if (onnx)
|
|
46
|
+
return onnx;
|
|
47
|
+
const local = await transcribeWithLocalWhisperBytes({
|
|
48
|
+
bytes,
|
|
49
|
+
mediaType,
|
|
50
|
+
filename,
|
|
51
|
+
totalDurationSeconds,
|
|
52
|
+
onProgress,
|
|
53
|
+
notes,
|
|
54
|
+
});
|
|
55
|
+
if (local)
|
|
56
|
+
return local;
|
|
57
|
+
return await transcribeBytesWithRemoteFallbacks({
|
|
58
|
+
bytes,
|
|
59
|
+
mediaType,
|
|
60
|
+
filename,
|
|
61
|
+
notes,
|
|
62
|
+
groqApiKey,
|
|
63
|
+
groqError,
|
|
64
|
+
assemblyaiApiKey,
|
|
65
|
+
geminiApiKey,
|
|
66
|
+
openaiApiKey,
|
|
67
|
+
falApiKey,
|
|
68
|
+
env,
|
|
69
|
+
onProgress,
|
|
70
|
+
transcribeOversizedBytesWithChunking: ({ bytes, mediaType, filename, onProgress }) => transcribeOversizedBytesViaTempFile({
|
|
74
71
|
bytes,
|
|
75
72
|
mediaType,
|
|
76
73
|
filename,
|
|
74
|
+
onProgress,
|
|
75
|
+
transcribeFile: ({ filePath, mediaType, filename, onProgress }) => transcribeMediaFileWithWhisper({
|
|
76
|
+
filePath,
|
|
77
|
+
mediaType,
|
|
78
|
+
filename,
|
|
79
|
+
groqApiKey,
|
|
80
|
+
assemblyaiApiKey,
|
|
81
|
+
geminiApiKey,
|
|
82
|
+
openaiApiKey,
|
|
83
|
+
falApiKey,
|
|
84
|
+
segmentSeconds: DEFAULT_SEGMENT_SECONDS,
|
|
85
|
+
onProgress,
|
|
86
|
+
env,
|
|
87
|
+
}),
|
|
88
|
+
}),
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
export async function transcribeMediaFileWithWhisper({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, segmentSeconds = DEFAULT_SEGMENT_SECONDS, totalDurationSeconds = null, onProgress = null, env = process.env, }) {
|
|
92
|
+
const notes = [];
|
|
93
|
+
let skipGroqInNestedCalls = false;
|
|
94
|
+
let groqError = null;
|
|
95
|
+
if (groqApiKey) {
|
|
96
|
+
skipGroqInNestedCalls = true;
|
|
97
|
+
const groqResult = await transcribeGroqFileFirst({
|
|
98
|
+
filePath,
|
|
99
|
+
mediaType,
|
|
100
|
+
filename,
|
|
101
|
+
groqApiKey,
|
|
102
|
+
assemblyaiApiKey,
|
|
103
|
+
geminiApiKey,
|
|
104
|
+
openaiApiKey,
|
|
105
|
+
falApiKey,
|
|
106
|
+
segmentSeconds,
|
|
77
107
|
totalDurationSeconds,
|
|
78
108
|
onProgress,
|
|
79
109
|
env,
|
|
80
|
-
});
|
|
81
|
-
if (onnx.text) {
|
|
82
|
-
if (onnx.notes.length > 0)
|
|
83
|
-
notes.push(...onnx.notes);
|
|
84
|
-
return { ...onnx, notes };
|
|
85
|
-
}
|
|
86
|
-
if (onnx.notes.length > 0)
|
|
87
|
-
notes.push(...onnx.notes);
|
|
88
|
-
if (onnx.error) {
|
|
89
|
-
notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
// 3. whisper.cpp (local)
|
|
93
|
-
const localReady = await isWhisperCppReady();
|
|
94
|
-
let local = null;
|
|
95
|
-
if (localReady) {
|
|
96
|
-
const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
|
|
97
|
-
const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
|
|
98
|
-
try {
|
|
99
|
-
await fs.writeFile(tempFile, bytes);
|
|
100
|
-
try {
|
|
101
|
-
local = await transcribeWithWhisperCppFile({
|
|
102
|
-
filePath: tempFile,
|
|
103
|
-
mediaType,
|
|
104
|
-
totalDurationSeconds,
|
|
105
|
-
onProgress,
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
catch (error) {
|
|
109
|
-
local = {
|
|
110
|
-
text: null,
|
|
111
|
-
provider: "whisper.cpp",
|
|
112
|
-
error: wrapError("whisper.cpp failed", error),
|
|
113
|
-
notes: [],
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
if (local.text) {
|
|
117
|
-
if (local.notes.length > 0)
|
|
118
|
-
notes.push(...local.notes);
|
|
119
|
-
return { ...local, notes };
|
|
120
|
-
}
|
|
121
|
-
if (local.notes.length > 0)
|
|
122
|
-
notes.push(...local.notes);
|
|
123
|
-
if (local.error) {
|
|
124
|
-
notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
finally {
|
|
128
|
-
await fs.unlink(tempFile).catch(() => { });
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
// 4. OpenAI / FAL (cloud fallbacks)
|
|
132
|
-
if (!groqApiKey && !openaiApiKey && !falApiKey) {
|
|
133
|
-
return {
|
|
134
|
-
text: null,
|
|
135
|
-
provider: null,
|
|
136
|
-
error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
|
|
137
110
|
notes,
|
|
138
|
-
};
|
|
111
|
+
});
|
|
112
|
+
if (groqResult.text)
|
|
113
|
+
return groqResult;
|
|
114
|
+
groqError = groqResult.error;
|
|
139
115
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
notes
|
|
164
|
-
|
|
116
|
+
const onnx = await transcribeWithLocalOnnxFile({
|
|
117
|
+
filePath,
|
|
118
|
+
mediaType,
|
|
119
|
+
totalDurationSeconds,
|
|
120
|
+
onProgress,
|
|
121
|
+
env,
|
|
122
|
+
notes,
|
|
123
|
+
});
|
|
124
|
+
if (onnx)
|
|
125
|
+
return onnx;
|
|
126
|
+
const local = await transcribeWithLocalWhisperFile({
|
|
127
|
+
filePath,
|
|
128
|
+
mediaType,
|
|
129
|
+
totalDurationSeconds,
|
|
130
|
+
onProgress,
|
|
131
|
+
notes,
|
|
132
|
+
});
|
|
133
|
+
if (local)
|
|
134
|
+
return local;
|
|
135
|
+
return await transcribeFileWithRemoteFallbacks({
|
|
136
|
+
filePath,
|
|
137
|
+
mediaType,
|
|
138
|
+
filename,
|
|
139
|
+
notes,
|
|
140
|
+
groqApiKey,
|
|
141
|
+
groqError,
|
|
142
|
+
assemblyaiApiKey,
|
|
143
|
+
geminiApiKey,
|
|
144
|
+
openaiApiKey,
|
|
145
|
+
falApiKey,
|
|
146
|
+
env,
|
|
147
|
+
totalDurationSeconds,
|
|
148
|
+
onProgress,
|
|
149
|
+
transcribeChunkedFile: ({ filePath, segmentSeconds, totalDurationSeconds, onProgress }) => transcribeChunkedFile({
|
|
150
|
+
filePath,
|
|
151
|
+
segmentSeconds,
|
|
152
|
+
totalDurationSeconds,
|
|
153
|
+
onProgress,
|
|
154
|
+
transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
|
|
155
|
+
bytes,
|
|
156
|
+
mediaType: "audio/mpeg",
|
|
157
|
+
filename,
|
|
158
|
+
groqApiKey,
|
|
159
|
+
skipGroq: skipGroqInNestedCalls,
|
|
160
|
+
assemblyaiApiKey,
|
|
161
|
+
geminiApiKey,
|
|
162
|
+
openaiApiKey,
|
|
163
|
+
falApiKey,
|
|
164
|
+
env,
|
|
165
|
+
}),
|
|
166
|
+
}),
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
async function transcribeWithGroqFirst({ bytes, mediaType, filename, groqApiKey, notes, }) {
|
|
170
|
+
let groqError = null;
|
|
171
|
+
try {
|
|
172
|
+
const text = await transcribeWithGroq(bytes, mediaType, filename, groqApiKey);
|
|
173
|
+
if (text)
|
|
174
|
+
return { text, error: null, bytes, mediaType, filename };
|
|
175
|
+
groqError = new Error("Groq transcription returned empty text");
|
|
165
176
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
try {
|
|
169
|
-
const text = await transcribeWithOpenAi(bytes, mediaType, filename, openaiApiKey, { env });
|
|
170
|
-
if (text) {
|
|
171
|
-
return { text, provider: "openai", error: null, notes };
|
|
172
|
-
}
|
|
173
|
-
openaiError = new Error("OpenAI transcription returned empty text");
|
|
174
|
-
}
|
|
175
|
-
catch (error) {
|
|
176
|
-
openaiError = wrapError("OpenAI transcription failed", error);
|
|
177
|
-
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
groqError = wrapError("Groq transcription failed", error);
|
|
178
179
|
}
|
|
179
|
-
if (
|
|
180
|
+
if (groqError && shouldRetryGroqViaFfmpeg(groqError)) {
|
|
180
181
|
const canTranscode = await isFfmpegAvailable();
|
|
181
182
|
if (canTranscode) {
|
|
182
183
|
try {
|
|
183
|
-
|
|
184
|
-
// is the most reliable cross-format fallback (and also reduces upload size).
|
|
185
|
-
notes.push("OpenAI could not decode media; transcoding via ffmpeg and retrying");
|
|
184
|
+
notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
|
|
186
185
|
const mp3Bytes = await transcodeBytesToMp3(bytes);
|
|
187
|
-
const retried = await
|
|
186
|
+
const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
|
|
188
187
|
if (retried) {
|
|
189
|
-
return {
|
|
188
|
+
return {
|
|
189
|
+
text: retried,
|
|
190
|
+
error: null,
|
|
191
|
+
bytes: mp3Bytes,
|
|
192
|
+
mediaType: "audio/mpeg",
|
|
193
|
+
filename: "audio.mp3",
|
|
194
|
+
};
|
|
190
195
|
}
|
|
191
|
-
|
|
196
|
+
groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
|
|
192
197
|
bytes = mp3Bytes;
|
|
193
198
|
mediaType = "audio/mpeg";
|
|
194
199
|
filename = "audio.mp3";
|
|
195
200
|
}
|
|
196
201
|
catch (error) {
|
|
197
|
-
notes.push(`ffmpeg transcode failed; cannot retry
|
|
202
|
+
notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
|
|
198
203
|
}
|
|
199
204
|
}
|
|
200
205
|
else {
|
|
201
|
-
notes.push("
|
|
206
|
+
notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
|
|
202
207
|
}
|
|
203
208
|
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
if (
|
|
209
|
-
|
|
210
|
-
}
|
|
211
|
-
if (falApiKey && canUseFal) {
|
|
209
|
+
return { text: null, error: groqError, bytes, mediaType, filename };
|
|
210
|
+
}
|
|
211
|
+
async function transcribeGroqFileFirst({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey, geminiApiKey, openaiApiKey, falApiKey, segmentSeconds, totalDurationSeconds, onProgress, env, notes, }) {
|
|
212
|
+
const stat = await fs.stat(filePath);
|
|
213
|
+
if (stat.size <= MAX_OPENAI_UPLOAD_BYTES) {
|
|
214
|
+
const fileBytes = new Uint8Array(await fs.readFile(filePath));
|
|
212
215
|
try {
|
|
213
|
-
const text = await
|
|
214
|
-
if (text)
|
|
215
|
-
return { text, provider: "
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
provider: "fal",
|
|
220
|
-
error: new Error("FAL transcription returned empty text"),
|
|
221
|
-
notes,
|
|
222
|
-
};
|
|
216
|
+
const text = await transcribeWithGroq(fileBytes, mediaType, filename, groqApiKey);
|
|
217
|
+
if (text)
|
|
218
|
+
return { text, provider: "groq", error: null, notes };
|
|
219
|
+
const error = new Error("Groq transcription returned empty text");
|
|
220
|
+
notes.push("Groq transcription returned empty text; falling back to local/AssemblyAI/Gemini/OpenAI");
|
|
221
|
+
return { text: null, provider: "groq", error, notes };
|
|
223
222
|
}
|
|
224
223
|
catch (error) {
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
error: wrapError("FAL transcription failed", error),
|
|
229
|
-
notes,
|
|
230
|
-
};
|
|
224
|
+
const wrapped = wrapError("Groq transcription failed", error);
|
|
225
|
+
notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
|
|
226
|
+
return { text: null, provider: "groq", error: wrapped, notes };
|
|
231
227
|
}
|
|
232
228
|
}
|
|
233
|
-
const
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
229
|
+
const canChunk = await isFfmpegAvailable();
|
|
230
|
+
if (!canChunk) {
|
|
231
|
+
const error = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
|
|
232
|
+
notes.push(error.message);
|
|
233
|
+
return { text: null, provider: "groq", error, notes };
|
|
234
|
+
}
|
|
235
|
+
const chunked = await transcribeChunkedFile({
|
|
236
|
+
filePath,
|
|
237
|
+
segmentSeconds,
|
|
238
|
+
totalDurationSeconds,
|
|
239
|
+
onProgress,
|
|
240
|
+
transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
|
|
241
|
+
bytes,
|
|
242
|
+
mediaType: "audio/mpeg",
|
|
243
|
+
filename,
|
|
244
|
+
groqApiKey,
|
|
245
|
+
assemblyaiApiKey,
|
|
246
|
+
geminiApiKey,
|
|
247
|
+
openaiApiKey,
|
|
248
|
+
falApiKey,
|
|
249
|
+
env,
|
|
250
|
+
}),
|
|
251
|
+
});
|
|
252
|
+
if (chunked.notes.length > 0)
|
|
253
|
+
notes.push(...chunked.notes);
|
|
254
|
+
if (chunked.text)
|
|
255
|
+
return { ...chunked, notes };
|
|
256
|
+
const error = chunked.error ?? new Error("Groq chunked transcription failed");
|
|
257
|
+
notes.push(`Groq chunked transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error.message}`);
|
|
258
|
+
return { text: null, provider: "groq", error, notes };
|
|
242
259
|
}
|
|
243
|
-
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
}
|
|
261
|
-
catch (error) {
|
|
262
|
-
groqError = wrapError("Groq transcription failed", error);
|
|
263
|
-
notes.push(`Groq transcription failed; falling back to local/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
else {
|
|
267
|
-
groqError = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
|
|
268
|
-
notes.push(groqError.message);
|
|
269
|
-
}
|
|
260
|
+
async function transcribeWithLocalOnnx({ bytes, mediaType, filename, totalDurationSeconds, onProgress, env, notes, }) {
|
|
261
|
+
const onnxPreference = resolveOnnxModelPreference(env);
|
|
262
|
+
if (!onnxPreference)
|
|
263
|
+
return null;
|
|
264
|
+
const onnx = await transcribeWithOnnxCli({
|
|
265
|
+
model: onnxPreference,
|
|
266
|
+
bytes,
|
|
267
|
+
mediaType,
|
|
268
|
+
filename,
|
|
269
|
+
totalDurationSeconds,
|
|
270
|
+
onProgress,
|
|
271
|
+
env,
|
|
272
|
+
});
|
|
273
|
+
if (onnx.text) {
|
|
274
|
+
if (onnx.notes.length > 0)
|
|
275
|
+
notes.push(...onnx.notes);
|
|
276
|
+
return { ...onnx, notes };
|
|
270
277
|
}
|
|
271
|
-
|
|
278
|
+
if (onnx.notes.length > 0)
|
|
279
|
+
notes.push(...onnx.notes);
|
|
280
|
+
if (onnx.error) {
|
|
281
|
+
notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
|
|
282
|
+
}
|
|
283
|
+
return null;
|
|
284
|
+
}
|
|
285
|
+
async function transcribeWithLocalOnnxFile({ filePath, mediaType, totalDurationSeconds, onProgress, env, notes, }) {
|
|
272
286
|
const onnxPreference = resolveOnnxModelPreference(env);
|
|
273
|
-
if (onnxPreference)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
notes.push(...onnx.notes);
|
|
291
|
-
return { ...onnx, notes };
|
|
292
|
-
}
|
|
287
|
+
if (!onnxPreference)
|
|
288
|
+
return null;
|
|
289
|
+
onProgress?.({
|
|
290
|
+
partIndex: null,
|
|
291
|
+
parts: null,
|
|
292
|
+
processedDurationSeconds: null,
|
|
293
|
+
totalDurationSeconds,
|
|
294
|
+
});
|
|
295
|
+
const onnx = await transcribeWithOnnxCliFile({
|
|
296
|
+
model: onnxPreference,
|
|
297
|
+
filePath,
|
|
298
|
+
mediaType,
|
|
299
|
+
totalDurationSeconds,
|
|
300
|
+
onProgress,
|
|
301
|
+
env,
|
|
302
|
+
});
|
|
303
|
+
if (onnx.text) {
|
|
293
304
|
if (onnx.notes.length > 0)
|
|
294
305
|
notes.push(...onnx.notes);
|
|
295
|
-
|
|
296
|
-
notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
|
|
297
|
-
}
|
|
306
|
+
return { ...onnx, notes };
|
|
298
307
|
}
|
|
299
|
-
|
|
308
|
+
if (onnx.notes.length > 0)
|
|
309
|
+
notes.push(...onnx.notes);
|
|
310
|
+
if (onnx.error) {
|
|
311
|
+
notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
|
|
312
|
+
}
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
async function transcribeWithLocalWhisperBytes({ bytes, mediaType, filename, totalDurationSeconds, onProgress, notes, }) {
|
|
300
316
|
const localReady = await isWhisperCppReady();
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
317
|
+
if (!localReady)
|
|
318
|
+
return null;
|
|
319
|
+
const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
|
|
320
|
+
const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
|
|
321
|
+
try {
|
|
322
|
+
await fs.writeFile(tempFile, bytes);
|
|
323
|
+
const result = await safeTranscribeWithWhisperCppFile({
|
|
324
|
+
filePath: tempFile,
|
|
325
|
+
mediaType,
|
|
307
326
|
totalDurationSeconds,
|
|
327
|
+
onProgress,
|
|
308
328
|
});
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
totalDurationSeconds,
|
|
314
|
-
onProgress,
|
|
315
|
-
});
|
|
329
|
+
if (result.text) {
|
|
330
|
+
if (result.notes.length > 0)
|
|
331
|
+
notes.push(...result.notes);
|
|
332
|
+
return { ...result, notes };
|
|
316
333
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
error: wrapError("whisper.cpp failed", error),
|
|
322
|
-
notes: [],
|
|
323
|
-
};
|
|
324
|
-
}
|
|
325
|
-
if (local.text) {
|
|
326
|
-
if (local.notes.length > 0)
|
|
327
|
-
notes.push(...local.notes);
|
|
328
|
-
return { ...local, notes };
|
|
329
|
-
}
|
|
330
|
-
if (local.notes.length > 0)
|
|
331
|
-
notes.push(...local.notes);
|
|
332
|
-
if (local.error) {
|
|
333
|
-
notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
|
|
334
|
+
if (result.notes.length > 0)
|
|
335
|
+
notes.push(...result.notes);
|
|
336
|
+
if (result.error) {
|
|
337
|
+
notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
|
|
334
338
|
}
|
|
339
|
+
return null;
|
|
335
340
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
if (groqError) {
|
|
339
|
-
return {
|
|
340
|
-
text: null,
|
|
341
|
-
provider: "groq",
|
|
342
|
-
error: groqError,
|
|
343
|
-
notes,
|
|
344
|
-
};
|
|
345
|
-
}
|
|
346
|
-
return {
|
|
347
|
-
text: null,
|
|
348
|
-
provider: null,
|
|
349
|
-
error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
|
|
350
|
-
notes,
|
|
351
|
-
};
|
|
352
|
-
}
|
|
353
|
-
if (openaiApiKey && stat.size > MAX_OPENAI_UPLOAD_BYTES) {
|
|
354
|
-
const canChunk = await isFfmpegAvailable();
|
|
355
|
-
if (!canChunk) {
|
|
356
|
-
notes.push(`Media too large for Whisper upload (${formatBytes(stat.size)}); install ffmpeg to enable chunked transcription`);
|
|
357
|
-
const head = await readFirstBytes(filePath, MAX_OPENAI_UPLOAD_BYTES);
|
|
358
|
-
const partial = await transcribeMediaWithWhisper({
|
|
359
|
-
bytes: head,
|
|
360
|
-
mediaType,
|
|
361
|
-
filename,
|
|
362
|
-
groqApiKey,
|
|
363
|
-
skipGroq: skipGroqInNestedCalls,
|
|
364
|
-
openaiApiKey,
|
|
365
|
-
falApiKey,
|
|
366
|
-
env,
|
|
367
|
-
});
|
|
368
|
-
if (partial.notes.length > 0)
|
|
369
|
-
notes.push(...partial.notes);
|
|
370
|
-
return { ...partial, notes };
|
|
371
|
-
}
|
|
372
|
-
const dir = await fs.mkdtemp(join(tmpdir(), "summarize-whisper-segments-"));
|
|
373
|
-
try {
|
|
374
|
-
const pattern = join(dir, "part-%03d.mp3");
|
|
375
|
-
await runFfmpegSegment({
|
|
376
|
-
inputPath: filePath,
|
|
377
|
-
outputPattern: pattern,
|
|
378
|
-
segmentSeconds,
|
|
379
|
-
});
|
|
380
|
-
const files = (await fs.readdir(dir))
|
|
381
|
-
.filter((name) => name.startsWith("part-") && name.endsWith(".mp3"))
|
|
382
|
-
.sort((a, b) => a.localeCompare(b));
|
|
383
|
-
if (files.length === 0) {
|
|
384
|
-
return {
|
|
385
|
-
text: null,
|
|
386
|
-
provider: null,
|
|
387
|
-
error: new Error("ffmpeg produced no audio segments"),
|
|
388
|
-
notes,
|
|
389
|
-
};
|
|
390
|
-
}
|
|
391
|
-
notes.push(`ffmpeg chunked media into ${files.length} parts (${segmentSeconds}s each)`);
|
|
392
|
-
onProgress?.({
|
|
393
|
-
partIndex: null,
|
|
394
|
-
parts: files.length,
|
|
395
|
-
processedDurationSeconds: null,
|
|
396
|
-
totalDurationSeconds,
|
|
397
|
-
});
|
|
398
|
-
const parts = [];
|
|
399
|
-
let usedProvider = null;
|
|
400
|
-
for (const [index, name] of files.entries()) {
|
|
401
|
-
const segmentPath = join(dir, name);
|
|
402
|
-
const segmentBytes = new Uint8Array(await fs.readFile(segmentPath));
|
|
403
|
-
const result = await transcribeMediaWithWhisper({
|
|
404
|
-
bytes: segmentBytes,
|
|
405
|
-
mediaType: "audio/mpeg",
|
|
406
|
-
filename: name,
|
|
407
|
-
groqApiKey,
|
|
408
|
-
skipGroq: skipGroqInNestedCalls,
|
|
409
|
-
openaiApiKey,
|
|
410
|
-
falApiKey,
|
|
411
|
-
onProgress: null,
|
|
412
|
-
env,
|
|
413
|
-
});
|
|
414
|
-
if (!usedProvider && result.provider)
|
|
415
|
-
usedProvider = result.provider;
|
|
416
|
-
if (result.error && !result.text) {
|
|
417
|
-
return { text: null, provider: usedProvider, error: result.error, notes };
|
|
418
|
-
}
|
|
419
|
-
if (result.text)
|
|
420
|
-
parts.push(result.text);
|
|
421
|
-
// Coarse but useful: update based on part boundaries. Duration is best-effort (RSS hints or
|
|
422
|
-
// ffprobe); the per-part time is stable enough to make the spinner feel alive.
|
|
423
|
-
const processedSeconds = Math.max(0, (index + 1) * segmentSeconds);
|
|
424
|
-
onProgress?.({
|
|
425
|
-
partIndex: index + 1,
|
|
426
|
-
parts: files.length,
|
|
427
|
-
processedDurationSeconds: typeof totalDurationSeconds === "number" && totalDurationSeconds > 0
|
|
428
|
-
? Math.min(processedSeconds, totalDurationSeconds)
|
|
429
|
-
: null,
|
|
430
|
-
totalDurationSeconds,
|
|
431
|
-
});
|
|
432
|
-
}
|
|
433
|
-
return { text: parts.join("\n\n"), provider: usedProvider, error: null, notes };
|
|
434
|
-
}
|
|
435
|
-
finally {
|
|
436
|
-
await fs.rm(dir, { recursive: true, force: true }).catch(() => { });
|
|
437
|
-
}
|
|
341
|
+
finally {
|
|
342
|
+
await fs.unlink(tempFile).catch(() => { });
|
|
438
343
|
}
|
|
439
|
-
|
|
344
|
+
}
|
|
345
|
+
async function transcribeWithLocalWhisperFile({ filePath, mediaType, totalDurationSeconds, onProgress, notes, }) {
|
|
346
|
+
const localReady = await isWhisperCppReady();
|
|
347
|
+
if (!localReady)
|
|
348
|
+
return null;
|
|
440
349
|
onProgress?.({
|
|
441
350
|
partIndex: null,
|
|
442
351
|
parts: null,
|
|
443
352
|
processedDurationSeconds: null,
|
|
444
353
|
totalDurationSeconds,
|
|
445
354
|
});
|
|
446
|
-
const result = await
|
|
447
|
-
|
|
355
|
+
const result = await safeTranscribeWithWhisperCppFile({
|
|
356
|
+
filePath,
|
|
448
357
|
mediaType,
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
skipGroq: skipGroqInNestedCalls,
|
|
452
|
-
openaiApiKey,
|
|
453
|
-
falApiKey,
|
|
454
|
-
env,
|
|
358
|
+
totalDurationSeconds,
|
|
359
|
+
onProgress,
|
|
455
360
|
});
|
|
361
|
+
if (result.text) {
|
|
362
|
+
if (result.notes.length > 0)
|
|
363
|
+
notes.push(...result.notes);
|
|
364
|
+
return { ...result, notes };
|
|
365
|
+
}
|
|
456
366
|
if (result.notes.length > 0)
|
|
457
367
|
notes.push(...result.notes);
|
|
458
|
-
|
|
368
|
+
if (result.error) {
|
|
369
|
+
notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
|
|
370
|
+
}
|
|
371
|
+
return null;
|
|
372
|
+
}
|
|
373
|
+
async function safeTranscribeWithWhisperCppFile(args) {
|
|
374
|
+
try {
|
|
375
|
+
return await transcribeWithWhisperCppFile(args);
|
|
376
|
+
}
|
|
377
|
+
catch (error) {
|
|
378
|
+
return {
|
|
379
|
+
text: null,
|
|
380
|
+
provider: "whisper.cpp",
|
|
381
|
+
error: wrapError("whisper.cpp failed", error),
|
|
382
|
+
notes: [],
|
|
383
|
+
};
|
|
384
|
+
}
|
|
459
385
|
}
|
|
460
386
|
//# sourceMappingURL=core.js.map
|