@steipete/summarize-core 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/dist/esm/content/bun.js +21 -0
  3. package/dist/esm/content/bun.js.map +1 -0
  4. package/dist/esm/content/index.js +1 -0
  5. package/dist/esm/content/index.js.map +1 -1
  6. package/dist/esm/content/link-preview/client.js +6 -0
  7. package/dist/esm/content/link-preview/client.js.map +1 -1
  8. package/dist/esm/content/link-preview/content/fetcher.js +19 -2
  9. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
  10. package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/html.js.map +1 -1
  12. package/dist/esm/content/link-preview/content/index.js +29 -12
  13. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  14. package/dist/esm/content/link-preview/content/utils.js.map +1 -1
  15. package/dist/esm/content/transcript/index.js +2 -0
  16. package/dist/esm/content/transcript/index.js.map +1 -1
  17. package/dist/esm/content/transcript/providers/generic.js +10 -11
  18. package/dist/esm/content/transcript/providers/generic.js.map +1 -1
  19. package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
  20. package/dist/esm/content/transcript/providers/podcast/media.js +9 -1
  21. package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
  22. package/dist/esm/content/transcript/providers/podcast/provider-flow.js +157 -0
  23. package/dist/esm/content/transcript/providers/podcast/provider-flow.js.map +1 -0
  24. package/dist/esm/content/transcript/providers/podcast/rss-feed.js +123 -0
  25. package/dist/esm/content/transcript/providers/podcast/rss-feed.js.map +1 -0
  26. package/dist/esm/content/transcript/providers/podcast/rss-transcript.js +113 -0
  27. package/dist/esm/content/transcript/providers/podcast/rss-transcript.js.map +1 -0
  28. package/dist/esm/content/transcript/providers/podcast/rss.js +2 -226
  29. package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
  30. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
  31. package/dist/esm/content/transcript/providers/podcast.js +26 -155
  32. package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
  33. package/dist/esm/content/transcript/providers/transcription-capability.js +22 -0
  34. package/dist/esm/content/transcript/providers/transcription-capability.js.map +1 -0
  35. package/dist/esm/content/transcript/providers/transcription-start.js +40 -30
  36. package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
  37. package/dist/esm/content/transcript/providers/youtube/api.js +3 -2
  38. package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
  39. package/dist/esm/content/transcript/providers/youtube/captions-player.js +173 -0
  40. package/dist/esm/content/transcript/providers/youtube/captions-player.js.map +1 -0
  41. package/dist/esm/content/transcript/providers/youtube/captions-shared.js +8 -0
  42. package/dist/esm/content/transcript/providers/youtube/captions-shared.js.map +1 -0
  43. package/dist/esm/content/transcript/providers/youtube/captions-transcript.js +361 -0
  44. package/dist/esm/content/transcript/providers/youtube/captions-transcript.js.map +1 -0
  45. package/dist/esm/content/transcript/providers/youtube/captions.js +2 -557
  46. package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
  47. package/dist/esm/content/transcript/providers/youtube/provider-flow.js +189 -0
  48. package/dist/esm/content/transcript/providers/youtube/provider-flow.js.map +1 -0
  49. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +7 -2
  50. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
  51. package/dist/esm/content/transcript/providers/youtube.js +42 -194
  52. package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
  53. package/dist/esm/content/transcript/transcription-config.js +24 -4
  54. package/dist/esm/content/transcript/transcription-config.js.map +1 -1
  55. package/dist/esm/content/url.js +3 -3
  56. package/dist/esm/content/url.js.map +1 -1
  57. package/dist/esm/processes.js.map +1 -1
  58. package/dist/esm/prompts/format.js +6 -0
  59. package/dist/esm/prompts/format.js.map +1 -1
  60. package/dist/esm/prompts/link-summary.js +27 -3
  61. package/dist/esm/prompts/link-summary.js.map +1 -1
  62. package/dist/esm/transcription/onnx-cli.js.map +1 -1
  63. package/dist/esm/transcription/whisper/assemblyai.js +132 -0
  64. package/dist/esm/transcription/whisper/assemblyai.js.map +1 -0
  65. package/dist/esm/transcription/whisper/chunking.js +64 -0
  66. package/dist/esm/transcription/whisper/chunking.js.map +1 -0
  67. package/dist/esm/transcription/whisper/cloud-providers.js +69 -0
  68. package/dist/esm/transcription/whisper/cloud-providers.js.map +1 -0
  69. package/dist/esm/transcription/whisper/core.js +316 -390
  70. package/dist/esm/transcription/whisper/core.js.map +1 -1
  71. package/dist/esm/transcription/whisper/gemini.js +324 -0
  72. package/dist/esm/transcription/whisper/gemini.js.map +1 -0
  73. package/dist/esm/transcription/whisper/preferences.js +16 -0
  74. package/dist/esm/transcription/whisper/preferences.js.map +1 -0
  75. package/dist/esm/transcription/whisper/provider-setup.js +62 -0
  76. package/dist/esm/transcription/whisper/provider-setup.js.map +1 -0
  77. package/dist/esm/transcription/whisper/remote-provider-attempts.js +189 -0
  78. package/dist/esm/transcription/whisper/remote-provider-attempts.js.map +1 -0
  79. package/dist/esm/transcription/whisper/remote.js +220 -0
  80. package/dist/esm/transcription/whisper/remote.js.map +1 -0
  81. package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
  82. package/dist/types/content/bun.d.ts +6 -0
  83. package/dist/types/content/index.d.ts +1 -0
  84. package/dist/types/content/link-preview/client.d.ts +3 -1
  85. package/dist/types/content/link-preview/content/fetcher.d.ts +1 -1
  86. package/dist/types/content/link-preview/content/html.d.ts +1 -1
  87. package/dist/types/content/link-preview/deps.d.ts +8 -2
  88. package/dist/types/content/link-preview/types.d.ts +1 -1
  89. package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +3 -0
  90. package/dist/types/content/transcript/providers/podcast/media.d.ts +4 -2
  91. package/dist/types/content/transcript/providers/podcast/provider-flow.d.ts +7 -0
  92. package/dist/types/content/transcript/providers/podcast/rss-feed.d.ts +15 -0
  93. package/dist/types/content/transcript/providers/podcast/rss-transcript.d.ts +12 -0
  94. package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -24
  95. package/dist/types/content/transcript/providers/transcription-capability.d.ts +18 -0
  96. package/dist/types/content/transcript/providers/transcription-start.d.ts +10 -3
  97. package/dist/types/content/transcript/providers/youtube/captions-player.d.ts +12 -0
  98. package/dist/types/content/transcript/providers/youtube/captions-shared.d.ts +42 -0
  99. package/dist/types/content/transcript/providers/youtube/captions-transcript.d.ts +4 -0
  100. package/dist/types/content/transcript/providers/youtube/captions.d.ts +2 -19
  101. package/dist/types/content/transcript/providers/youtube/provider-flow.d.ts +34 -0
  102. package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +4 -2
  103. package/dist/types/content/transcript/transcription-config.d.ts +6 -0
  104. package/dist/types/content/transcript/types.d.ts +1 -0
  105. package/dist/types/prompts/format.d.ts +1 -0
  106. package/dist/types/prompts/link-summary.d.ts +2 -1
  107. package/dist/types/transcription/whisper/assemblyai.d.ts +17 -0
  108. package/dist/types/transcription/whisper/chunking.d.ts +11 -0
  109. package/dist/types/transcription/whisper/cloud-providers.d.ts +22 -0
  110. package/dist/types/transcription/whisper/core.d.ts +12 -14
  111. package/dist/types/transcription/whisper/gemini.d.ts +14 -0
  112. package/dist/types/transcription/whisper/preferences.d.ts +4 -0
  113. package/dist/types/transcription/whisper/provider-setup.d.ts +30 -0
  114. package/dist/types/transcription/whisper/remote-provider-attempts.d.ts +51 -0
  115. package/dist/types/transcription/whisper/remote.d.ts +51 -0
  116. package/dist/types/transcription/whisper/types.d.ts +1 -1
  117. package/package.json +15 -16
@@ -2,459 +2,385 @@ import { randomUUID } from "node:crypto";
2
2
  import { promises as fs } from "node:fs";
3
3
  import { tmpdir } from "node:os";
4
4
  import { basename, join } from "node:path";
5
- import { resolvePreferredOnnxModel, transcribeWithOnnxCli, transcribeWithOnnxCliFile, } from "../onnx-cli.js";
5
+ import { transcribeWithOnnxCli, transcribeWithOnnxCliFile } from "../onnx-cli.js";
6
+ import { transcribeChunkedFile } from "./chunking.js";
6
7
  import { DEFAULT_SEGMENT_SECONDS, MAX_OPENAI_UPLOAD_BYTES } from "./constants.js";
7
- import { transcribeWithFal } from "./fal.js";
8
- import { isFfmpegAvailable, runFfmpegSegment, transcodeBytesToMp3 } from "./ffmpeg.js";
8
+ import { isFfmpegAvailable, transcodeBytesToMp3 } from "./ffmpeg.js";
9
9
  import { shouldRetryGroqViaFfmpeg, transcribeWithGroq } from "./groq.js";
10
- import { shouldRetryOpenAiViaFfmpeg, transcribeWithOpenAi } from "./openai.js";
11
- import { ensureWhisperFilenameExtension, formatBytes, readFirstBytes, wrapError } from "./utils.js";
10
+ import { resolveOnnxModelPreference } from "./preferences.js";
11
+ import { transcribeBytesWithRemoteFallbacks, transcribeFileWithRemoteFallbacks, transcribeOversizedBytesViaTempFile, } from "./remote.js";
12
+ import { ensureWhisperFilenameExtension, formatBytes, wrapError } from "./utils.js";
12
13
  import { isWhisperCppReady, transcribeWithWhisperCppFile } from "./whisper-cpp.js";
13
- function resolveTranscriberPreference(env) {
14
- const raw = env.SUMMARIZE_TRANSCRIBER?.trim().toLowerCase();
15
- if (raw === "auto" || raw === "whisper" || raw === "parakeet" || raw === "canary")
16
- return raw;
17
- return "auto";
18
- }
19
- function resolveOnnxModelPreference(env) {
20
- const preference = resolveTranscriberPreference(env);
21
- if (preference === "parakeet" || preference === "canary")
22
- return preference;
23
- if (preference === "auto")
24
- return resolvePreferredOnnxModel(env);
25
- return null;
26
- }
27
- export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
14
+ export async function transcribeMediaWithWhisper({ bytes, mediaType, filename, groqApiKey, skipGroq = false, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, totalDurationSeconds = null, onProgress, env = process.env, }) {
28
15
  const notes = [];
29
- // 1. Groq (cloud, free, fastest)
30
16
  let groqError = null;
31
17
  if (groqApiKey && !skipGroq) {
32
- try {
33
- const text = await transcribeWithGroq(bytes, mediaType, filename, groqApiKey);
34
- if (text) {
35
- return { text, provider: "groq", error: null, notes };
36
- }
37
- groqError = new Error("Groq transcription returned empty text");
38
- }
39
- catch (error) {
40
- groqError = wrapError("Groq transcription failed", error);
41
- }
42
- }
43
- if (!skipGroq && groqApiKey && groqError && shouldRetryGroqViaFfmpeg(groqError)) {
44
- const canTranscode = await isFfmpegAvailable();
45
- if (canTranscode) {
46
- try {
47
- notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
48
- const mp3Bytes = await transcodeBytesToMp3(bytes);
49
- const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
50
- if (retried) {
51
- return { text: retried, provider: "groq", error: null, notes };
52
- }
53
- groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
54
- bytes = mp3Bytes;
55
- mediaType = "audio/mpeg";
56
- filename = "audio.mp3";
57
- }
58
- catch (error) {
59
- notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
60
- }
61
- }
62
- else {
63
- notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
18
+ const groqResult = await transcribeWithGroqFirst({
19
+ bytes,
20
+ mediaType,
21
+ filename,
22
+ groqApiKey,
23
+ notes,
24
+ });
25
+ bytes = groqResult.bytes;
26
+ mediaType = groqResult.mediaType;
27
+ filename = groqResult.filename;
28
+ if (groqResult.text) {
29
+ return { text: groqResult.text, provider: "groq", error: null, notes };
64
30
  }
31
+ groqError = groqResult.error;
65
32
  }
66
33
  if (groqError) {
67
- notes.push(`Groq transcription failed; falling back to local/OpenAI: ${groqError.message}`);
34
+ notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${groqError.message}`);
68
35
  }
69
- // 2. ONNX (local)
70
- const onnxPreference = resolveOnnxModelPreference(env);
71
- if (onnxPreference) {
72
- const onnx = await transcribeWithOnnxCli({
73
- model: onnxPreference,
36
+ const onnx = await transcribeWithLocalOnnx({
37
+ bytes,
38
+ mediaType,
39
+ filename,
40
+ totalDurationSeconds,
41
+ onProgress,
42
+ env,
43
+ notes,
44
+ });
45
+ if (onnx)
46
+ return onnx;
47
+ const local = await transcribeWithLocalWhisperBytes({
48
+ bytes,
49
+ mediaType,
50
+ filename,
51
+ totalDurationSeconds,
52
+ onProgress,
53
+ notes,
54
+ });
55
+ if (local)
56
+ return local;
57
+ return await transcribeBytesWithRemoteFallbacks({
58
+ bytes,
59
+ mediaType,
60
+ filename,
61
+ notes,
62
+ groqApiKey,
63
+ groqError,
64
+ assemblyaiApiKey,
65
+ geminiApiKey,
66
+ openaiApiKey,
67
+ falApiKey,
68
+ env,
69
+ onProgress,
70
+ transcribeOversizedBytesWithChunking: ({ bytes, mediaType, filename, onProgress }) => transcribeOversizedBytesViaTempFile({
74
71
  bytes,
75
72
  mediaType,
76
73
  filename,
74
+ onProgress,
75
+ transcribeFile: ({ filePath, mediaType, filename, onProgress }) => transcribeMediaFileWithWhisper({
76
+ filePath,
77
+ mediaType,
78
+ filename,
79
+ groqApiKey,
80
+ assemblyaiApiKey,
81
+ geminiApiKey,
82
+ openaiApiKey,
83
+ falApiKey,
84
+ segmentSeconds: DEFAULT_SEGMENT_SECONDS,
85
+ onProgress,
86
+ env,
87
+ }),
88
+ }),
89
+ });
90
+ }
91
+ export async function transcribeMediaFileWithWhisper({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey = null, geminiApiKey = null, openaiApiKey, falApiKey, segmentSeconds = DEFAULT_SEGMENT_SECONDS, totalDurationSeconds = null, onProgress = null, env = process.env, }) {
92
+ const notes = [];
93
+ let skipGroqInNestedCalls = false;
94
+ let groqError = null;
95
+ if (groqApiKey) {
96
+ skipGroqInNestedCalls = true;
97
+ const groqResult = await transcribeGroqFileFirst({
98
+ filePath,
99
+ mediaType,
100
+ filename,
101
+ groqApiKey,
102
+ assemblyaiApiKey,
103
+ geminiApiKey,
104
+ openaiApiKey,
105
+ falApiKey,
106
+ segmentSeconds,
77
107
  totalDurationSeconds,
78
108
  onProgress,
79
109
  env,
80
- });
81
- if (onnx.text) {
82
- if (onnx.notes.length > 0)
83
- notes.push(...onnx.notes);
84
- return { ...onnx, notes };
85
- }
86
- if (onnx.notes.length > 0)
87
- notes.push(...onnx.notes);
88
- if (onnx.error) {
89
- notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
90
- }
91
- }
92
- // 3. whisper.cpp (local)
93
- const localReady = await isWhisperCppReady();
94
- let local = null;
95
- if (localReady) {
96
- const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
97
- const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
98
- try {
99
- await fs.writeFile(tempFile, bytes);
100
- try {
101
- local = await transcribeWithWhisperCppFile({
102
- filePath: tempFile,
103
- mediaType,
104
- totalDurationSeconds,
105
- onProgress,
106
- });
107
- }
108
- catch (error) {
109
- local = {
110
- text: null,
111
- provider: "whisper.cpp",
112
- error: wrapError("whisper.cpp failed", error),
113
- notes: [],
114
- };
115
- }
116
- if (local.text) {
117
- if (local.notes.length > 0)
118
- notes.push(...local.notes);
119
- return { ...local, notes };
120
- }
121
- if (local.notes.length > 0)
122
- notes.push(...local.notes);
123
- if (local.error) {
124
- notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
125
- }
126
- }
127
- finally {
128
- await fs.unlink(tempFile).catch(() => { });
129
- }
130
- }
131
- // 4. OpenAI / FAL (cloud fallbacks)
132
- if (!groqApiKey && !openaiApiKey && !falApiKey) {
133
- return {
134
- text: null,
135
- provider: null,
136
- error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
137
110
  notes,
138
- };
111
+ });
112
+ if (groqResult.text)
113
+ return groqResult;
114
+ groqError = groqResult.error;
139
115
  }
140
- if (openaiApiKey && bytes.byteLength > MAX_OPENAI_UPLOAD_BYTES) {
141
- const canChunk = await isFfmpegAvailable();
142
- if (canChunk) {
143
- const tempFile = join(tmpdir(), `summarize-whisper-${randomUUID()}`);
144
- try {
145
- await fs.writeFile(tempFile, bytes);
146
- const chunked = await transcribeMediaFileWithWhisper({
147
- filePath: tempFile,
148
- mediaType,
149
- filename,
150
- groqApiKey,
151
- openaiApiKey,
152
- falApiKey,
153
- segmentSeconds: DEFAULT_SEGMENT_SECONDS,
154
- onProgress,
155
- env,
156
- });
157
- return chunked;
158
- }
159
- finally {
160
- await fs.unlink(tempFile).catch(() => { });
161
- }
162
- }
163
- notes.push(`Media too large for Whisper upload (${formatBytes(bytes.byteLength)}); transcribing first ${formatBytes(MAX_OPENAI_UPLOAD_BYTES)} only (install ffmpeg for full transcription)`);
164
- bytes = bytes.slice(0, MAX_OPENAI_UPLOAD_BYTES);
116
+ const onnx = await transcribeWithLocalOnnxFile({
117
+ filePath,
118
+ mediaType,
119
+ totalDurationSeconds,
120
+ onProgress,
121
+ env,
122
+ notes,
123
+ });
124
+ if (onnx)
125
+ return onnx;
126
+ const local = await transcribeWithLocalWhisperFile({
127
+ filePath,
128
+ mediaType,
129
+ totalDurationSeconds,
130
+ onProgress,
131
+ notes,
132
+ });
133
+ if (local)
134
+ return local;
135
+ return await transcribeFileWithRemoteFallbacks({
136
+ filePath,
137
+ mediaType,
138
+ filename,
139
+ notes,
140
+ groqApiKey,
141
+ groqError,
142
+ assemblyaiApiKey,
143
+ geminiApiKey,
144
+ openaiApiKey,
145
+ falApiKey,
146
+ env,
147
+ totalDurationSeconds,
148
+ onProgress,
149
+ transcribeChunkedFile: ({ filePath, segmentSeconds, totalDurationSeconds, onProgress }) => transcribeChunkedFile({
150
+ filePath,
151
+ segmentSeconds,
152
+ totalDurationSeconds,
153
+ onProgress,
154
+ transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
155
+ bytes,
156
+ mediaType: "audio/mpeg",
157
+ filename,
158
+ groqApiKey,
159
+ skipGroq: skipGroqInNestedCalls,
160
+ assemblyaiApiKey,
161
+ geminiApiKey,
162
+ openaiApiKey,
163
+ falApiKey,
164
+ env,
165
+ }),
166
+ }),
167
+ });
168
+ }
169
+ async function transcribeWithGroqFirst({ bytes, mediaType, filename, groqApiKey, notes, }) {
170
+ let groqError = null;
171
+ try {
172
+ const text = await transcribeWithGroq(bytes, mediaType, filename, groqApiKey);
173
+ if (text)
174
+ return { text, error: null, bytes, mediaType, filename };
175
+ groqError = new Error("Groq transcription returned empty text");
165
176
  }
166
- let openaiError = null;
167
- if (openaiApiKey) {
168
- try {
169
- const text = await transcribeWithOpenAi(bytes, mediaType, filename, openaiApiKey, { env });
170
- if (text) {
171
- return { text, provider: "openai", error: null, notes };
172
- }
173
- openaiError = new Error("OpenAI transcription returned empty text");
174
- }
175
- catch (error) {
176
- openaiError = wrapError("OpenAI transcription failed", error);
177
- }
177
+ catch (error) {
178
+ groqError = wrapError("Groq transcription failed", error);
178
179
  }
179
- if (openaiApiKey && openaiError && shouldRetryOpenAiViaFfmpeg(openaiError)) {
180
+ if (groqError && shouldRetryGroqViaFfmpeg(groqError)) {
180
181
  const canTranscode = await isFfmpegAvailable();
181
182
  if (canTranscode) {
182
183
  try {
183
- // Some providers hand out containers/codecs Whisper rejects. Transcoding to a small mono MP3
184
- // is the most reliable cross-format fallback (and also reduces upload size).
185
- notes.push("OpenAI could not decode media; transcoding via ffmpeg and retrying");
184
+ notes.push("Groq could not decode media; transcoding via ffmpeg and retrying");
186
185
  const mp3Bytes = await transcodeBytesToMp3(bytes);
187
- const retried = await transcribeWithOpenAi(mp3Bytes, "audio/mpeg", "audio.mp3", openaiApiKey, { env });
186
+ const retried = await transcribeWithGroq(mp3Bytes, "audio/mpeg", "audio.mp3", groqApiKey);
188
187
  if (retried) {
189
- return { text: retried, provider: "openai", error: null, notes };
188
+ return {
189
+ text: retried,
190
+ error: null,
191
+ bytes: mp3Bytes,
192
+ mediaType: "audio/mpeg",
193
+ filename: "audio.mp3",
194
+ };
190
195
  }
191
- openaiError = new Error("OpenAI transcription returned empty text after ffmpeg transcode");
196
+ groqError = new Error("Groq transcription returned empty text after ffmpeg transcode");
192
197
  bytes = mp3Bytes;
193
198
  mediaType = "audio/mpeg";
194
199
  filename = "audio.mp3";
195
200
  }
196
201
  catch (error) {
197
- notes.push(`ffmpeg transcode failed; cannot retry OpenAI decode error: ${error instanceof Error ? error.message : String(error)}`);
202
+ notes.push(`ffmpeg transcode failed; cannot retry Groq decode error: ${error instanceof Error ? error.message : String(error)}`);
198
203
  }
199
204
  }
200
205
  else {
201
- notes.push("OpenAI could not decode media; install ffmpeg to enable transcoding retry");
206
+ notes.push("Groq could not decode media; install ffmpeg to enable transcoding retry");
202
207
  }
203
208
  }
204
- const canUseFal = Boolean(falApiKey) && mediaType.toLowerCase().startsWith("audio/");
205
- if (openaiError && canUseFal) {
206
- notes.push(`OpenAI transcription failed; falling back to FAL: ${openaiError.message}`);
207
- }
208
- if (falApiKey && !canUseFal) {
209
- notes.push(`Skipping FAL transcription: unsupported mediaType ${mediaType}`);
210
- }
211
- if (falApiKey && canUseFal) {
209
+ return { text: null, error: groqError, bytes, mediaType, filename };
210
+ }
211
+ async function transcribeGroqFileFirst({ filePath, mediaType, filename, groqApiKey, assemblyaiApiKey, geminiApiKey, openaiApiKey, falApiKey, segmentSeconds, totalDurationSeconds, onProgress, env, notes, }) {
212
+ const stat = await fs.stat(filePath);
213
+ if (stat.size <= MAX_OPENAI_UPLOAD_BYTES) {
214
+ const fileBytes = new Uint8Array(await fs.readFile(filePath));
212
215
  try {
213
- const text = await transcribeWithFal(bytes, mediaType, falApiKey);
214
- if (text) {
215
- return { text, provider: "fal", error: null, notes };
216
- }
217
- return {
218
- text: null,
219
- provider: "fal",
220
- error: new Error("FAL transcription returned empty text"),
221
- notes,
222
- };
216
+ const text = await transcribeWithGroq(fileBytes, mediaType, filename, groqApiKey);
217
+ if (text)
218
+ return { text, provider: "groq", error: null, notes };
219
+ const error = new Error("Groq transcription returned empty text");
220
+ notes.push("Groq transcription returned empty text; falling back to local/AssemblyAI/Gemini/OpenAI");
221
+ return { text: null, provider: "groq", error, notes };
223
222
  }
224
223
  catch (error) {
225
- return {
226
- text: null,
227
- provider: "fal",
228
- error: wrapError("FAL transcription failed", error),
229
- notes,
230
- };
224
+ const wrapped = wrapError("Groq transcription failed", error);
225
+ notes.push(`Groq transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
226
+ return { text: null, provider: "groq", error: wrapped, notes };
231
227
  }
232
228
  }
233
- const terminalError = openaiError ?? groqError ?? new Error("No transcription providers available");
234
- const terminalProvider = openaiError
235
- ? "openai"
236
- : groqError
237
- ? "groq"
238
- : openaiApiKey
239
- ? "openai"
240
- : null;
241
- return { text: null, provider: terminalProvider, error: terminalError, notes };
229
+ const canChunk = await isFfmpegAvailable();
230
+ if (!canChunk) {
231
+ const error = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
232
+ notes.push(error.message);
233
+ return { text: null, provider: "groq", error, notes };
234
+ }
235
+ const chunked = await transcribeChunkedFile({
236
+ filePath,
237
+ segmentSeconds,
238
+ totalDurationSeconds,
239
+ onProgress,
240
+ transcribeSegment: ({ bytes, filename }) => transcribeMediaWithWhisper({
241
+ bytes,
242
+ mediaType: "audio/mpeg",
243
+ filename,
244
+ groqApiKey,
245
+ assemblyaiApiKey,
246
+ geminiApiKey,
247
+ openaiApiKey,
248
+ falApiKey,
249
+ env,
250
+ }),
251
+ });
252
+ if (chunked.notes.length > 0)
253
+ notes.push(...chunked.notes);
254
+ if (chunked.text)
255
+ return { ...chunked, notes };
256
+ const error = chunked.error ?? new Error("Groq chunked transcription failed");
257
+ notes.push(`Groq chunked transcription failed; falling back to local/AssemblyAI/Gemini/OpenAI: ${error.message}`);
258
+ return { text: null, provider: "groq", error, notes };
242
259
  }
243
- export async function transcribeMediaFileWithWhisper({ filePath, mediaType, filename, groqApiKey, openaiApiKey, falApiKey, segmentSeconds = DEFAULT_SEGMENT_SECONDS, totalDurationSeconds = null, onProgress = null, env = process.env, }) {
244
- const notes = [];
245
- const stat = await fs.stat(filePath);
246
- let skipGroqInNestedCalls = false;
247
- let groqError = null;
248
- // 1. Groq (cloud, free, fastest) — try first even for file-based transcription
249
- if (groqApiKey) {
250
- skipGroqInNestedCalls = true;
251
- if (stat.size <= MAX_OPENAI_UPLOAD_BYTES) {
252
- const fileBytes = new Uint8Array(await fs.readFile(filePath));
253
- try {
254
- const text = await transcribeWithGroq(fileBytes, mediaType, filename, groqApiKey);
255
- if (text) {
256
- return { text, provider: "groq", error: null, notes };
257
- }
258
- groqError = new Error("Groq transcription returned empty text");
259
- notes.push("Groq transcription returned empty text; falling back to local/OpenAI");
260
- }
261
- catch (error) {
262
- groqError = wrapError("Groq transcription failed", error);
263
- notes.push(`Groq transcription failed; falling back to local/OpenAI: ${error instanceof Error ? error.message : String(error)}`);
264
- }
265
- }
266
- else {
267
- groqError = new Error(`File too large for Groq upload (${formatBytes(stat.size)}); trying local providers`);
268
- notes.push(groqError.message);
269
- }
260
+ async function transcribeWithLocalOnnx({ bytes, mediaType, filename, totalDurationSeconds, onProgress, env, notes, }) {
261
+ const onnxPreference = resolveOnnxModelPreference(env);
262
+ if (!onnxPreference)
263
+ return null;
264
+ const onnx = await transcribeWithOnnxCli({
265
+ model: onnxPreference,
266
+ bytes,
267
+ mediaType,
268
+ filename,
269
+ totalDurationSeconds,
270
+ onProgress,
271
+ env,
272
+ });
273
+ if (onnx.text) {
274
+ if (onnx.notes.length > 0)
275
+ notes.push(...onnx.notes);
276
+ return { ...onnx, notes };
270
277
  }
271
- // 2. ONNX (local)
278
+ if (onnx.notes.length > 0)
279
+ notes.push(...onnx.notes);
280
+ if (onnx.error) {
281
+ notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
282
+ }
283
+ return null;
284
+ }
285
+ async function transcribeWithLocalOnnxFile({ filePath, mediaType, totalDurationSeconds, onProgress, env, notes, }) {
272
286
  const onnxPreference = resolveOnnxModelPreference(env);
273
- if (onnxPreference) {
274
- onProgress?.({
275
- partIndex: null,
276
- parts: null,
277
- processedDurationSeconds: null,
278
- totalDurationSeconds,
279
- });
280
- const onnx = await transcribeWithOnnxCliFile({
281
- model: onnxPreference,
282
- filePath,
283
- mediaType,
284
- totalDurationSeconds,
285
- onProgress,
286
- env,
287
- });
288
- if (onnx.text) {
289
- if (onnx.notes.length > 0)
290
- notes.push(...onnx.notes);
291
- return { ...onnx, notes };
292
- }
287
+ if (!onnxPreference)
288
+ return null;
289
+ onProgress?.({
290
+ partIndex: null,
291
+ parts: null,
292
+ processedDurationSeconds: null,
293
+ totalDurationSeconds,
294
+ });
295
+ const onnx = await transcribeWithOnnxCliFile({
296
+ model: onnxPreference,
297
+ filePath,
298
+ mediaType,
299
+ totalDurationSeconds,
300
+ onProgress,
301
+ env,
302
+ });
303
+ if (onnx.text) {
293
304
  if (onnx.notes.length > 0)
294
305
  notes.push(...onnx.notes);
295
- if (onnx.error) {
296
- notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
297
- }
306
+ return { ...onnx, notes };
298
307
  }
299
- // 3. whisper.cpp (local)
308
+ if (onnx.notes.length > 0)
309
+ notes.push(...onnx.notes);
310
+ if (onnx.error) {
311
+ notes.push(`${onnx.provider ?? "onnx"} failed; falling back to Whisper: ${onnx.error.message}`);
312
+ }
313
+ return null;
314
+ }
315
+ async function transcribeWithLocalWhisperBytes({ bytes, mediaType, filename, totalDurationSeconds, onProgress, notes, }) {
300
316
  const localReady = await isWhisperCppReady();
301
- let local = null;
302
- if (localReady) {
303
- onProgress?.({
304
- partIndex: null,
305
- parts: null,
306
- processedDurationSeconds: null,
317
+ if (!localReady)
318
+ return null;
319
+ const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
320
+ const tempFile = join(tmpdir(), `summarize-whisper-local-${randomUUID()}-${ensureWhisperFilenameExtension(nameHint, mediaType)}`);
321
+ try {
322
+ await fs.writeFile(tempFile, bytes);
323
+ const result = await safeTranscribeWithWhisperCppFile({
324
+ filePath: tempFile,
325
+ mediaType,
307
326
  totalDurationSeconds,
327
+ onProgress,
308
328
  });
309
- try {
310
- local = await transcribeWithWhisperCppFile({
311
- filePath,
312
- mediaType,
313
- totalDurationSeconds,
314
- onProgress,
315
- });
329
+ if (result.text) {
330
+ if (result.notes.length > 0)
331
+ notes.push(...result.notes);
332
+ return { ...result, notes };
316
333
  }
317
- catch (error) {
318
- local = {
319
- text: null,
320
- provider: "whisper.cpp",
321
- error: wrapError("whisper.cpp failed", error),
322
- notes: [],
323
- };
324
- }
325
- if (local.text) {
326
- if (local.notes.length > 0)
327
- notes.push(...local.notes);
328
- return { ...local, notes };
329
- }
330
- if (local.notes.length > 0)
331
- notes.push(...local.notes);
332
- if (local.error) {
333
- notes.push(`whisper.cpp failed; falling back to remote Whisper: ${local.error.message}`);
334
+ if (result.notes.length > 0)
335
+ notes.push(...result.notes);
336
+ if (result.error) {
337
+ notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
334
338
  }
339
+ return null;
335
340
  }
336
- // 4. OpenAI / FAL (cloud fallbacks)
337
- if (!openaiApiKey && !falApiKey) {
338
- if (groqError) {
339
- return {
340
- text: null,
341
- provider: "groq",
342
- error: groqError,
343
- notes,
344
- };
345
- }
346
- return {
347
- text: null,
348
- provider: null,
349
- error: new Error("No transcription providers available (install whisper-cpp or set GROQ_API_KEY, OPENAI_API_KEY, or FAL_KEY)"),
350
- notes,
351
- };
352
- }
353
- if (openaiApiKey && stat.size > MAX_OPENAI_UPLOAD_BYTES) {
354
- const canChunk = await isFfmpegAvailable();
355
- if (!canChunk) {
356
- notes.push(`Media too large for Whisper upload (${formatBytes(stat.size)}); install ffmpeg to enable chunked transcription`);
357
- const head = await readFirstBytes(filePath, MAX_OPENAI_UPLOAD_BYTES);
358
- const partial = await transcribeMediaWithWhisper({
359
- bytes: head,
360
- mediaType,
361
- filename,
362
- groqApiKey,
363
- skipGroq: skipGroqInNestedCalls,
364
- openaiApiKey,
365
- falApiKey,
366
- env,
367
- });
368
- if (partial.notes.length > 0)
369
- notes.push(...partial.notes);
370
- return { ...partial, notes };
371
- }
372
- const dir = await fs.mkdtemp(join(tmpdir(), "summarize-whisper-segments-"));
373
- try {
374
- const pattern = join(dir, "part-%03d.mp3");
375
- await runFfmpegSegment({
376
- inputPath: filePath,
377
- outputPattern: pattern,
378
- segmentSeconds,
379
- });
380
- const files = (await fs.readdir(dir))
381
- .filter((name) => name.startsWith("part-") && name.endsWith(".mp3"))
382
- .sort((a, b) => a.localeCompare(b));
383
- if (files.length === 0) {
384
- return {
385
- text: null,
386
- provider: null,
387
- error: new Error("ffmpeg produced no audio segments"),
388
- notes,
389
- };
390
- }
391
- notes.push(`ffmpeg chunked media into ${files.length} parts (${segmentSeconds}s each)`);
392
- onProgress?.({
393
- partIndex: null,
394
- parts: files.length,
395
- processedDurationSeconds: null,
396
- totalDurationSeconds,
397
- });
398
- const parts = [];
399
- let usedProvider = null;
400
- for (const [index, name] of files.entries()) {
401
- const segmentPath = join(dir, name);
402
- const segmentBytes = new Uint8Array(await fs.readFile(segmentPath));
403
- const result = await transcribeMediaWithWhisper({
404
- bytes: segmentBytes,
405
- mediaType: "audio/mpeg",
406
- filename: name,
407
- groqApiKey,
408
- skipGroq: skipGroqInNestedCalls,
409
- openaiApiKey,
410
- falApiKey,
411
- onProgress: null,
412
- env,
413
- });
414
- if (!usedProvider && result.provider)
415
- usedProvider = result.provider;
416
- if (result.error && !result.text) {
417
- return { text: null, provider: usedProvider, error: result.error, notes };
418
- }
419
- if (result.text)
420
- parts.push(result.text);
421
- // Coarse but useful: update based on part boundaries. Duration is best-effort (RSS hints or
422
- // ffprobe); the per-part time is stable enough to make the spinner feel alive.
423
- const processedSeconds = Math.max(0, (index + 1) * segmentSeconds);
424
- onProgress?.({
425
- partIndex: index + 1,
426
- parts: files.length,
427
- processedDurationSeconds: typeof totalDurationSeconds === "number" && totalDurationSeconds > 0
428
- ? Math.min(processedSeconds, totalDurationSeconds)
429
- : null,
430
- totalDurationSeconds,
431
- });
432
- }
433
- return { text: parts.join("\n\n"), provider: usedProvider, error: null, notes };
434
- }
435
- finally {
436
- await fs.rm(dir, { recursive: true, force: true }).catch(() => { });
437
- }
341
+ finally {
342
+ await fs.unlink(tempFile).catch(() => { });
438
343
  }
439
- const bytes = new Uint8Array(await fs.readFile(filePath));
344
+ }
345
+ async function transcribeWithLocalWhisperFile({ filePath, mediaType, totalDurationSeconds, onProgress, notes, }) {
346
+ const localReady = await isWhisperCppReady();
347
+ if (!localReady)
348
+ return null;
440
349
  onProgress?.({
441
350
  partIndex: null,
442
351
  parts: null,
443
352
  processedDurationSeconds: null,
444
353
  totalDurationSeconds,
445
354
  });
446
- const result = await transcribeMediaWithWhisper({
447
- bytes,
355
+ const result = await safeTranscribeWithWhisperCppFile({
356
+ filePath,
448
357
  mediaType,
449
- filename,
450
- groqApiKey,
451
- skipGroq: skipGroqInNestedCalls,
452
- openaiApiKey,
453
- falApiKey,
454
- env,
358
+ totalDurationSeconds,
359
+ onProgress,
455
360
  });
361
+ if (result.text) {
362
+ if (result.notes.length > 0)
363
+ notes.push(...result.notes);
364
+ return { ...result, notes };
365
+ }
456
366
  if (result.notes.length > 0)
457
367
  notes.push(...result.notes);
458
- return { ...result, notes };
368
+ if (result.error) {
369
+ notes.push(`whisper.cpp failed; falling back to remote Whisper: ${result.error.message}`);
370
+ }
371
+ return null;
372
+ }
373
+ async function safeTranscribeWithWhisperCppFile(args) {
374
+ try {
375
+ return await transcribeWithWhisperCppFile(args);
376
+ }
377
+ catch (error) {
378
+ return {
379
+ text: null,
380
+ provider: "whisper.cpp",
381
+ error: wrapError("whisper.cpp failed", error),
382
+ notes: [],
383
+ };
384
+ }
459
385
  }
460
386
  //# sourceMappingURL=core.js.map