@steipete/summarize-core 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/esm/content/index.js +5 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +28 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +155 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/constants.js +7 -0
- package/dist/esm/content/link-preview/content/constants.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +124 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/firecrawl.js +86 -0
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/esm/content/link-preview/content/html.js +162 -0
- package/dist/esm/content/link-preview/content/html.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +345 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/jsonld.js +77 -0
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js +79 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/readability.js +53 -0
- package/dist/esm/content/link-preview/content/readability.js.map +1 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js +68 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +164 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/video.js +96 -0
- package/dist/esm/content/link-preview/content/video.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +20 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/content/transcript/cache.js +79 -0
- package/dist/esm/content/transcript/cache.js.map +1 -0
- package/dist/esm/content/transcript/index.js +130 -0
- package/dist/esm/content/transcript/index.js.map +1 -0
- package/dist/esm/content/transcript/normalize.js +43 -0
- package/dist/esm/content/transcript/normalize.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic.js +11 -0
- package/dist/esm/content/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +222 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js +38 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js +8 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js +2 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js +134 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/json.js +34 -0
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/media.js +345 -0
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/results.js +28 -0
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +253 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +218 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast.js +222 -0
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +166 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube.js +173 -0
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/transcript/types.js +2 -0
- package/dist/esm/content/transcript/types.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +259 -0
- package/dist/esm/content/transcript/utils.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/language.js +126 -0
- package/dist/esm/language.js.map +1 -0
- package/dist/esm/prompts/cli.js +20 -0
- package/dist/esm/prompts/cli.js.map +1 -0
- package/dist/esm/prompts/file.js +48 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +4 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +116 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/transcription/whisper/constants.js +8 -0
- package/dist/esm/transcription/whisper/constants.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +303 -0
- package/dist/esm/transcription/whisper/core.js.map +1 -0
- package/dist/esm/transcription/whisper/fal.js +41 -0
- package/dist/esm/transcription/whisper/fal.js.map +1 -0
- package/dist/esm/transcription/whisper/ffmpeg.js +179 -0
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +47 -0
- package/dist/esm/transcription/whisper/openai.js.map +1 -0
- package/dist/esm/transcription/whisper/types.js +2 -0
- package/dist/esm/transcription/whisper/types.js.map +1 -0
- package/dist/esm/transcription/whisper/utils.js +63 -0
- package/dist/esm/transcription/whisper/utils.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js +227 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/esm/transcription/whisper.js +5 -0
- package/dist/esm/transcription/whisper.js.map +1 -0
- package/dist/types/content/index.d.ts +5 -0
- package/dist/types/content/link-preview/client.d.ts +18 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/constants.d.ts +6 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/firecrawl.d.ts +14 -0
- package/dist/types/content/link-preview/content/html.d.ts +17 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/jsonld.d.ts +6 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/podcast-utils.d.ts +7 -0
- package/dist/types/content/link-preview/content/readability.d.ts +8 -0
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +4 -0
- package/dist/types/content/link-preview/content/types.d.ts +61 -0
- package/dist/types/content/link-preview/content/utils.d.ts +17 -0
- package/dist/types/content/link-preview/content/video.d.ts +5 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +167 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/types.d.ts +37 -0
- package/dist/types/content/transcript/cache.d.ts +29 -0
- package/dist/types/content/transcript/index.d.ts +9 -0
- package/dist/types/content/transcript/normalize.d.ts +3 -0
- package/dist/types/content/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +4 -0
- package/dist/types/content/transcript/providers/podcast/apple.d.ts +6 -0
- package/dist/types/content/transcript/providers/podcast/constants.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +11 -0
- package/dist/types/content/transcript/providers/podcast/itunes.d.ts +17 -0
- package/dist/types/content/transcript/providers/podcast/json.d.ts +8 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +42 -0
- package/dist/types/content/transcript/providers/podcast/results.d.ts +10 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +22 -0
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +24 -0
- package/dist/types/content/transcript/providers/podcast.d.ts +20 -0
- package/dist/types/content/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +17 -0
- package/dist/types/content/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/transcript/types.d.ts +30 -0
- package/dist/types/content/transcript/utils.d.ts +8 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/language.d.ts +25 -0
- package/dist/types/prompts/cli.d.ts +10 -0
- package/dist/types/prompts/file.d.ts +17 -0
- package/dist/types/prompts/index.d.ts +4 -0
- package/dist/types/prompts/link-summary.d.ts +29 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/transcription/whisper/constants.d.ts +7 -0
- package/dist/types/transcription/whisper/core.d.ts +20 -0
- package/dist/types/transcription/whisper/fal.d.ts +1 -0
- package/dist/types/transcription/whisper/ffmpeg.d.ts +16 -0
- package/dist/types/transcription/whisper/openai.d.ts +2 -0
- package/dist/types/transcription/whisper/types.d.ts +17 -0
- package/dist/types/transcription/whisper/utils.d.ts +5 -0
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +9 -0
- package/dist/types/transcription/whisper.d.ts +5 -0
- package/package.json +54 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { promises as fs } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
export async function isFfmpegAvailable() {
|
|
7
|
+
return new Promise((resolve) => {
|
|
8
|
+
const proc = spawn('ffmpeg', ['-version'], { stdio: ['ignore', 'ignore', 'ignore'] });
|
|
9
|
+
proc.on('error', () => resolve(false));
|
|
10
|
+
proc.on('close', (code) => resolve(code === 0));
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
export async function probeMediaDurationSecondsWithFfprobe(filePath) {
|
|
14
|
+
// ffprobe is part of the ffmpeg suite. We keep this optional (best-effort) so environments
|
|
15
|
+
// without ffmpeg still work; it only powers nicer progress output.
|
|
16
|
+
return new Promise((resolve) => {
|
|
17
|
+
const args = [
|
|
18
|
+
'-v',
|
|
19
|
+
'error',
|
|
20
|
+
'-show_entries',
|
|
21
|
+
'format=duration',
|
|
22
|
+
'-of',
|
|
23
|
+
'default=noprint_wrappers=1:nokey=1',
|
|
24
|
+
filePath,
|
|
25
|
+
];
|
|
26
|
+
const proc = spawn('ffprobe', args, { stdio: ['ignore', 'pipe', 'ignore'] });
|
|
27
|
+
let stdout = '';
|
|
28
|
+
proc.stdout?.setEncoding('utf8');
|
|
29
|
+
proc.stdout?.on('data', (chunk) => {
|
|
30
|
+
if (stdout.length > 2048)
|
|
31
|
+
return;
|
|
32
|
+
stdout += chunk;
|
|
33
|
+
});
|
|
34
|
+
proc.on('error', () => resolve(null));
|
|
35
|
+
proc.on('close', (code) => {
|
|
36
|
+
if (code !== 0) {
|
|
37
|
+
resolve(null);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const trimmed = stdout.trim();
|
|
41
|
+
const parsed = Number(trimmed);
|
|
42
|
+
resolve(Number.isFinite(parsed) && parsed > 0 ? parsed : null);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
export async function runFfmpegSegment({ inputPath, outputPattern, segmentSeconds, }) {
|
|
47
|
+
await new Promise((resolve, reject) => {
|
|
48
|
+
const args = [
|
|
49
|
+
'-hide_banner',
|
|
50
|
+
'-loglevel',
|
|
51
|
+
'error',
|
|
52
|
+
'-i',
|
|
53
|
+
inputPath,
|
|
54
|
+
'-vn',
|
|
55
|
+
'-ac',
|
|
56
|
+
'1',
|
|
57
|
+
'-ar',
|
|
58
|
+
'16000',
|
|
59
|
+
'-b:a',
|
|
60
|
+
'32k',
|
|
61
|
+
'-f',
|
|
62
|
+
'segment',
|
|
63
|
+
'-segment_time',
|
|
64
|
+
String(segmentSeconds),
|
|
65
|
+
'-reset_timestamps',
|
|
66
|
+
'1',
|
|
67
|
+
outputPattern,
|
|
68
|
+
];
|
|
69
|
+
const proc = spawn('ffmpeg', args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
70
|
+
let stderr = '';
|
|
71
|
+
proc.stderr?.setEncoding('utf8');
|
|
72
|
+
proc.stderr?.on('data', (chunk) => {
|
|
73
|
+
if (stderr.length > 8192)
|
|
74
|
+
return;
|
|
75
|
+
stderr += chunk;
|
|
76
|
+
});
|
|
77
|
+
proc.on('error', (error) => reject(error));
|
|
78
|
+
proc.on('close', (code) => {
|
|
79
|
+
if (code === 0) {
|
|
80
|
+
resolve();
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const detail = stderr.trim();
|
|
84
|
+
reject(new Error(`ffmpeg failed (${code ?? 'unknown'}): ${detail || 'unknown error'}`));
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
export async function runFfmpegTranscodeToMp3({ inputPath, outputPath, }) {
|
|
89
|
+
await runFfmpegTranscode({
|
|
90
|
+
inputPath,
|
|
91
|
+
outputPath,
|
|
92
|
+
mode: 'strict',
|
|
93
|
+
args: [
|
|
94
|
+
'-hide_banner',
|
|
95
|
+
'-loglevel',
|
|
96
|
+
'error',
|
|
97
|
+
'-i',
|
|
98
|
+
inputPath,
|
|
99
|
+
'-vn',
|
|
100
|
+
'-ac',
|
|
101
|
+
'1',
|
|
102
|
+
'-ar',
|
|
103
|
+
'16000',
|
|
104
|
+
'-b:a',
|
|
105
|
+
'64k',
|
|
106
|
+
outputPath,
|
|
107
|
+
],
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
export async function runFfmpegTranscodeToMp3Lenient({ inputPath, outputPath, }) {
|
|
111
|
+
await runFfmpegTranscode({
|
|
112
|
+
inputPath,
|
|
113
|
+
outputPath,
|
|
114
|
+
mode: 'lenient',
|
|
115
|
+
args: [
|
|
116
|
+
'-hide_banner',
|
|
117
|
+
'-loglevel',
|
|
118
|
+
'error',
|
|
119
|
+
'-err_detect',
|
|
120
|
+
'ignore_err',
|
|
121
|
+
'-fflags',
|
|
122
|
+
'+genpts',
|
|
123
|
+
'-i',
|
|
124
|
+
inputPath,
|
|
125
|
+
'-vn',
|
|
126
|
+
'-sn',
|
|
127
|
+
'-dn',
|
|
128
|
+
'-map',
|
|
129
|
+
'0:a:0?',
|
|
130
|
+
'-ac',
|
|
131
|
+
'1',
|
|
132
|
+
'-ar',
|
|
133
|
+
'16000',
|
|
134
|
+
'-b:a',
|
|
135
|
+
'64k',
|
|
136
|
+
outputPath,
|
|
137
|
+
],
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
export async function transcodeBytesToMp3(bytes) {
|
|
141
|
+
const inputPath = join(tmpdir(), `summarize-whisper-input-${randomUUID()}.bin`);
|
|
142
|
+
const outputPath = join(tmpdir(), `summarize-whisper-output-${randomUUID()}.mp3`);
|
|
143
|
+
try {
|
|
144
|
+
await fs.writeFile(inputPath, bytes);
|
|
145
|
+
try {
|
|
146
|
+
await runFfmpegTranscodeToMp3({ inputPath, outputPath });
|
|
147
|
+
}
|
|
148
|
+
catch (_error) {
|
|
149
|
+
await runFfmpegTranscodeToMp3Lenient({ inputPath, outputPath });
|
|
150
|
+
}
|
|
151
|
+
return new Uint8Array(await fs.readFile(outputPath));
|
|
152
|
+
}
|
|
153
|
+
finally {
|
|
154
|
+
await fs.unlink(inputPath).catch(() => { });
|
|
155
|
+
await fs.unlink(outputPath).catch(() => { });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
async function runFfmpegTranscode({ inputPath, outputPath, mode, args, }) {
|
|
159
|
+
await new Promise((resolve, reject) => {
|
|
160
|
+
const proc = spawn('ffmpeg', args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
161
|
+
let stderr = '';
|
|
162
|
+
proc.stderr?.setEncoding('utf8');
|
|
163
|
+
proc.stderr?.on('data', (chunk) => {
|
|
164
|
+
if (stderr.length > 8192)
|
|
165
|
+
return;
|
|
166
|
+
stderr += chunk;
|
|
167
|
+
});
|
|
168
|
+
proc.on('error', (error) => reject(error));
|
|
169
|
+
proc.on('close', (code) => {
|
|
170
|
+
if (code === 0) {
|
|
171
|
+
resolve();
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
const detail = stderr.trim();
|
|
175
|
+
reject(new Error(`ffmpeg ${mode} transcode failed (${code ?? 'unknown'}) for ${inputPath} -> ${outputPath}: ${detail || 'unknown error'}`));
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=ffmpeg.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ffmpeg.js","sourceRoot":"","sources":["../../../../src/transcription/whisper/ffmpeg.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAA;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAA;AAChC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAA;QACrF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAA;QACtC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAA;IACjD,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oCAAoC,CACxD,QAAgB;IAEhB,2FAA2F;IAC3F,mEAAmE;IACnE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG;YACX,IAAI;YACJ,OAAO;YACP,eAAe;YACf,iBAAiB;YACjB,KAAK;YACL,oCAAoC;YACpC,QAAQ;SACT,CAAA;QACD,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAA;QAC5E,IAAI,MAAM,GAAG,EAAE,CAAA;QACf,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAA;QAChC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACxC,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI;gBAAE,OAAM;YAChC,MAAM,IAAI,KAAK,CAAA;QACjB,CAAC,CAAC,CAAA;QACF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAA;QACrC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,CAAA;gBACb,OAAM;YACR,CAAC;YACD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;YAC7B,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,CAAA;YAC9B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;QAChE,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,EACrC,SAAS,EACT,aAAa,EACb,cAAc,GAKf;IACC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1C,MAAM,IAAI,GAAG;YACX,cAAc;YACd,WAAW;YACX,OAAO;YACP,IAAI;YACJ,SAAS;YACT,KAAK;YACL,KAAK;YACL,GAAG;YACH,KAAK;YACL,OAAO;YACP,MAAM;YACN,KAAK;YACL,IAAI;YACJ,SAAS;YACT,eAAe;YACf,MAAM,CAAC,cAAc,CAAC;YACtB,mBAAmB;YACnB,GAAG;YACH,aAAa;SACd,CAAA;QACD,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,CAAA;QAC3E,IAAI,MAAM,GAAG,EAAE,CAAA;QACf,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAA;QAChC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACxC,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI;gBAAE,OAAM;YAChC,MAAM,IAAI,KAAK,CAAA;QACjB,CAAC,CAAC,CAAA;QACF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAA;QAC1C,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,EAAE,CAAA;gBACT,OAAM;YACR,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;YAC5B,MAAM,CAAC,IAAI,KAAK,CAAC,kBAAkB,IAAI,IAAI,SAAS,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC,CAAC,CAAA;QACzF,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,EAC5C,SAAS,EACT,UAAU,GAIX;IACC,MAAM,kBAAkB,CAAC;QACvB,SAAS;QACT,UAAU;QACV,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE;YACJ,cAAc;YACd,WAAW;YACX,OAAO;YACP,IAAI;YACJ,SAAS;YACT,KAAK;YACL,KAAK;YACL,GAAG;YACH,KAAK;YACL,OAAO;YACP,MAAM;YACN,KAAK;YACL,UAAU;SACX;KACF,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAAC,EACnD,SAAS,EACT,UAAU,GAIX;IACC,MAAM,kBAAkB,CAAC;QACvB,SAAS;QACT,UAAU;QACV,IAAI,EAAE,SAAS;QACf,IAAI,EAAE;YACJ,cAAc;YACd,WAAW;YACX,OAAO;YACP,aAAa;YACb,YAAY;YACZ,SAAS;YACT,SAAS;YACT,IAAI;YACJ,SAAS;YACT,KAAK;YACL,KAAK;YACL,KAAK;YACL,MAAM;YACN,QAAQ;YACR,KAAK;YACL,GAAG;YACH,KAAK;YACL,OAAO;YACP,MAAM;YACN,KAAK;YACL,UAAU;SACX;KACF,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,KAAiB;IACzD,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,2BAA2B,UAAU,EAAE,MAAM,CAAC,CAAA;IAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,4BAA4B,UAAU,EAAE,MAAM,CAAC,CAAA;IACjF,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;QACpC,IAAI,CAAC;YACH,MAAM,uBAAuB,CAAC,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,CAAA;QAC1D,CAAC;QAAC,OAAO,MAAM,EAAE,CAAC;YAChB,MAAM,8BAA8B,CAAC,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,CAAA;QACjE,CAAC;QACD,OAAO,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAA;IACtD,CAAC;YAAS,CAAC;QACT,MAAM,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QAC1C,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAC7C,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,EAChC,SAAS,EACT,UAAU,EACV,IAAI,EACJ,IAAI,GAML;IACC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,CAAA;QAC3E,IAAI,MAAM,GAAG,EAAE,CAAA;QACf,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAA;QAChC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACxC,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI;gBAAE,OAAM;YAChC,MAAM,IAAI,KAAK,CAAA;QACjB,CAAC,CAAC,CAAA;QACF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAA;QAC1C,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,EAAE,CAAA;gBACT,OAAM;YACR,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;YAC5B,MAAM,CACJ,IAAI,KAAK,CACP,UAAU,IAAI,sBAAsB,IAAI,IAAI,SAAS,SAAS,SAAS,OAAO,UAAU,KACtF,MAAM,IAAI,eACZ,EAAE,CACH,CACF,CAAA;QACH,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { MAX_ERROR_DETAIL_CHARS, TRANSCRIPTION_TIMEOUT_MS } from './constants.js';
|
|
2
|
+
import { ensureWhisperFilenameExtension, toArrayBuffer } from './utils.js';
|
|
3
|
+
export async function transcribeWithOpenAi(bytes, mediaType, filename, apiKey) {
|
|
4
|
+
const form = new FormData();
|
|
5
|
+
const providedName = filename?.trim() ? filename.trim() : 'media';
|
|
6
|
+
// Whisper sometimes relies on the filename extension for format detection; ensure a reasonable one.
|
|
7
|
+
const safeName = ensureWhisperFilenameExtension(providedName, mediaType);
|
|
8
|
+
form.append('file', new Blob([toArrayBuffer(bytes)], { type: mediaType }), safeName);
|
|
9
|
+
form.append('model', 'whisper-1');
|
|
10
|
+
const response = await globalThis.fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
11
|
+
method: 'POST',
|
|
12
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
13
|
+
body: form,
|
|
14
|
+
signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS),
|
|
15
|
+
});
|
|
16
|
+
if (!response.ok) {
|
|
17
|
+
const detail = await readErrorDetail(response);
|
|
18
|
+
const suffix = detail ? `: ${detail}` : '';
|
|
19
|
+
throw new Error(`OpenAI transcription failed (${response.status})${suffix}`);
|
|
20
|
+
}
|
|
21
|
+
const payload = (await response.json());
|
|
22
|
+
if (typeof payload?.text !== 'string')
|
|
23
|
+
return null;
|
|
24
|
+
const trimmed = payload.text.trim();
|
|
25
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
26
|
+
}
|
|
27
|
+
export function shouldRetryOpenAiViaFfmpeg(error) {
|
|
28
|
+
const msg = error.message.toLowerCase();
|
|
29
|
+
return (msg.includes('unrecognized file format') ||
|
|
30
|
+
msg.includes('could not be decoded') ||
|
|
31
|
+
msg.includes('format is not supported'));
|
|
32
|
+
}
|
|
33
|
+
async function readErrorDetail(response) {
|
|
34
|
+
try {
|
|
35
|
+
const text = await response.text();
|
|
36
|
+
const trimmed = text.trim();
|
|
37
|
+
if (!trimmed)
|
|
38
|
+
return null;
|
|
39
|
+
return trimmed.length > MAX_ERROR_DETAIL_CHARS
|
|
40
|
+
? `${trimmed.slice(0, MAX_ERROR_DETAIL_CHARS)}…`
|
|
41
|
+
: trimmed;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=openai.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../../src/transcription/whisper/openai.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAA;AACjF,OAAO,EAAE,8BAA8B,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAE1E,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,KAAiB,EACjB,SAAiB,EACjB,QAAuB,EACvB,MAAc;IAEd,MAAM,IAAI,GAAG,IAAI,QAAQ,EAAE,CAAA;IAC3B,MAAM,YAAY,GAAG,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAA;IACjE,oGAAoG;IACpG,MAAM,QAAQ,GAAG,8BAA8B,CAAC,YAAY,EAAE,SAAS,CAAC,CAAA;IACxE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAA;IACpF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;IAEjC,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,gDAAgD,EAAE;QACxF,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,aAAa,EAAE,UAAU,MAAM,EAAE,EAAE;QAC9C,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,wBAAwB,CAAC;KACtD,CAAC,CAAA;IAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QAC1C,MAAM,IAAI,KAAK,CAAC,gCAAgC,QAAQ,CAAC,MAAM,IAAI,MAAM,EAAE,CAAC,CAAA;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAuB,CAAA;IAC7D,IAAI,OAAO,OAAO,EAAE,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAA;IAClD,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IACnC,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,KAAY;IACrD,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAA;IACvC,OAAO,CACL,GAAG,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACxC,GAAG,CAAC,QAAQ,CAAC,sBAAsB,CAAC;QACpC,GAAG,CAAC,QAAQ,CAAC,yBAAyB,CAAC,CACxC,CAAA;AACH,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,QAAkB;IAC/C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;QAC3B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QACzB,OAAO,OAAO,CAAC,MAAM,GAAG,sBAAsB;YAC5C,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,GAAG;YAChD,CAAC,CAAC,OAAO,CAAA;IACb,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/transcription/whisper/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs';
|
|
2
|
+
export function wrapError(prefix, error) {
|
|
3
|
+
if (error instanceof Error) {
|
|
4
|
+
return new Error(`${prefix}: ${error.message}`, { cause: error });
|
|
5
|
+
}
|
|
6
|
+
return new Error(`${prefix}: ${String(error)}`);
|
|
7
|
+
}
|
|
8
|
+
export function toArrayBuffer(view) {
|
|
9
|
+
const buffer = view.buffer;
|
|
10
|
+
return buffer.slice(view.byteOffset, view.byteOffset + view.byteLength);
|
|
11
|
+
}
|
|
12
|
+
export function ensureWhisperFilenameExtension(name, mediaType) {
|
|
13
|
+
const trimmed = name.trim();
|
|
14
|
+
const base = trimmed.length > 0 ? trimmed : 'media';
|
|
15
|
+
const hasExtension = (() => {
|
|
16
|
+
const dot = base.lastIndexOf('.');
|
|
17
|
+
if (dot <= 0)
|
|
18
|
+
return false;
|
|
19
|
+
if (dot === base.length - 1)
|
|
20
|
+
return false;
|
|
21
|
+
return true;
|
|
22
|
+
})();
|
|
23
|
+
if (hasExtension)
|
|
24
|
+
return base;
|
|
25
|
+
const type = mediaType.toLowerCase().split(';')[0]?.trim() ?? '';
|
|
26
|
+
const ext = type === 'audio/mpeg' || type === 'audio/mp3' || type === 'audio/mpga'
|
|
27
|
+
? 'mp3'
|
|
28
|
+
: type === 'video/mp4' || type === 'audio/mp4' || type === 'application/mp4'
|
|
29
|
+
? 'mp4'
|
|
30
|
+
: type === 'audio/x-wav' || type === 'audio/wav'
|
|
31
|
+
? 'wav'
|
|
32
|
+
: type === 'audio/flac'
|
|
33
|
+
? 'flac'
|
|
34
|
+
: type === 'audio/webm' || type === 'video/webm'
|
|
35
|
+
? 'webm'
|
|
36
|
+
: type === 'audio/ogg' || type === 'audio/oga' || type === 'application/ogg'
|
|
37
|
+
? 'ogg'
|
|
38
|
+
: 'mp3';
|
|
39
|
+
return `${base}.${ext}`;
|
|
40
|
+
}
|
|
41
|
+
export async function readFirstBytes(filePath, maxBytes) {
|
|
42
|
+
const handle = await fs.open(filePath, 'r');
|
|
43
|
+
try {
|
|
44
|
+
const buffer = Buffer.allocUnsafe(maxBytes);
|
|
45
|
+
const read = await handle.read(buffer, 0, maxBytes, 0);
|
|
46
|
+
return new Uint8Array(buffer.slice(0, read.bytesRead));
|
|
47
|
+
}
|
|
48
|
+
finally {
|
|
49
|
+
await handle.close().catch(() => { });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
export function formatBytes(bytes) {
|
|
53
|
+
const units = ['B', 'KB', 'MB', 'GB'];
|
|
54
|
+
let value = bytes;
|
|
55
|
+
let idx = 0;
|
|
56
|
+
while (value >= 1024 && idx < units.length - 1) {
|
|
57
|
+
value /= 1024;
|
|
58
|
+
idx += 1;
|
|
59
|
+
}
|
|
60
|
+
const decimals = value >= 10 || idx === 0 ? 0 : 1;
|
|
61
|
+
return `${value.toFixed(decimals)}${units[idx]}`;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../src/transcription/whisper/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AAExC,MAAM,UAAU,SAAS,CAAC,MAAc,EAAE,KAAc;IACtD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,OAAO,IAAI,KAAK,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAA;IACnE,CAAC;IACD,OAAO,IAAI,KAAK,CAAC,GAAG,MAAM,KAAK,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAgB;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAqB,CAAA;IACzC,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,CAAA;AACzE,CAAC;AAED,MAAM,UAAU,8BAA8B,CAAC,IAAY,EAAE,SAAiB;IAC5E,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;IAC3B,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAA;IACnD,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAA;QACjC,IAAI,GAAG,IAAI,CAAC;YAAE,OAAO,KAAK,CAAA;QAC1B,IAAI,GAAG,KAAK,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAA;QACzC,OAAO,IAAI,CAAA;IACb,CAAC,CAAC,EAAE,CAAA;IACJ,IAAI,YAAY;QAAE,OAAO,IAAI,CAAA;IAE7B,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAChE,MAAM,GAAG,GACP,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,YAAY;QACpE,CAAC,CAAC,KAAK;QACP,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,iBAAiB;YAC1E,CAAC,CAAC,KAAK;YACP,CAAC,CAAC,IAAI,KAAK,aAAa,IAAI,IAAI,KAAK,WAAW;gBAC9C,CAAC,CAAC,KAAK;gBACP,CAAC,CAAC,IAAI,KAAK,YAAY;oBACrB,CAAC,CAAC,MAAM;oBACR,CAAC,CAAC,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,YAAY;wBAC9C,CAAC,CAAC,MAAM;wBACR,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,iBAAiB;4BAC1E,CAAC,CAAC,KAAK;4BACP,CAAC,CAAC,KAAK,CAAA;IAErB,OAAO,GAAG,IAAI,IAAI,GAAG,EAAE,CAAA;AACzB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,QAAgB,EAAE,QAAgB;IACrE,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;IAC3C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAA;QAC3C,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAA;QACtD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;IACxD,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IACtC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,KAAa;IACvC,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;IACrC,IAAI,KAAK,GAAG,KAAK,CAAA;IACjB,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,OAAO,KAAK,IAAI,IAAI,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/C,KAAK,IAAI,IAAI,CAAA;QACb,GAAG,IAAI,CAAC,CAAA;IACV,CAAC;IACD,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IACjD,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,CAAA;AAClD,CAAC"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { promises as fs } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { DISABLE_LOCAL_WHISPER_CPP_ENV, WHISPER_CPP_BINARY_ENV, WHISPER_CPP_MODEL_PATH_ENV, } from './constants.js';
|
|
7
|
+
import { isFfmpegAvailable, runFfmpegTranscodeToMp3, runFfmpegTranscodeToMp3Lenient, } from './ffmpeg.js';
|
|
8
|
+
import { wrapError } from './utils.js';
|
|
9
|
+
export async function isWhisperCppReady() {
|
|
10
|
+
if (!isWhisperCppEnabled())
|
|
11
|
+
return false;
|
|
12
|
+
if (!(await isWhisperCliAvailable()))
|
|
13
|
+
return false;
|
|
14
|
+
const model = await resolveWhisperCppModelPath();
|
|
15
|
+
return Boolean(model);
|
|
16
|
+
}
|
|
17
|
+
export async function resolveWhisperCppModelNameForDisplay() {
|
|
18
|
+
const modelPath = await resolveWhisperCppModelPath();
|
|
19
|
+
return modelPath ? resolveWhisperCppModelLabelFromPath(modelPath) : null;
|
|
20
|
+
}
|
|
21
|
+
export async function transcribeWithWhisperCppFile({ filePath, mediaType, totalDurationSeconds, onProgress, }) {
|
|
22
|
+
const notes = [];
|
|
23
|
+
const modelPath = await resolveWhisperCppModelPath();
|
|
24
|
+
if (!modelPath) {
|
|
25
|
+
return {
|
|
26
|
+
text: null,
|
|
27
|
+
provider: null,
|
|
28
|
+
error: new Error('whisper.cpp model not found (set SUMMARIZE_WHISPER_CPP_MODEL_PATH)'),
|
|
29
|
+
notes,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
const canUseDirectly = isWhisperCppSupportedMediaType(mediaType);
|
|
33
|
+
const canTranscode = !canUseDirectly && (await isFfmpegAvailable());
|
|
34
|
+
if (!canUseDirectly && !canTranscode) {
|
|
35
|
+
return {
|
|
36
|
+
text: null,
|
|
37
|
+
provider: 'whisper.cpp',
|
|
38
|
+
error: new Error(`whisper.cpp supports only flac/mp3/ogg/wav (mediaType=${mediaType}); install ffmpeg to transcode`),
|
|
39
|
+
notes,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
const effectivePath = (() => {
|
|
43
|
+
if (canUseDirectly)
|
|
44
|
+
return { path: filePath, cleanup: null };
|
|
45
|
+
if (!canTranscode)
|
|
46
|
+
return { path: filePath, cleanup: null };
|
|
47
|
+
const mp3Path = join(tmpdir(), `summarize-whisper-cpp-${randomUUID()}.mp3`);
|
|
48
|
+
return {
|
|
49
|
+
path: mp3Path,
|
|
50
|
+
cleanup: async () => {
|
|
51
|
+
await fs.unlink(mp3Path).catch(() => { });
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
})();
|
|
55
|
+
try {
|
|
56
|
+
if (!canUseDirectly && canTranscode) {
|
|
57
|
+
// whisper-cli supports only a few audio formats. We transcode via ffmpeg when possible to
|
|
58
|
+
// keep “any media file” working locally too.
|
|
59
|
+
try {
|
|
60
|
+
await runFfmpegTranscodeToMp3({ inputPath: filePath, outputPath: effectivePath.path });
|
|
61
|
+
notes.push('whisper.cpp: transcoded media to MP3 via ffmpeg');
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
await runFfmpegTranscodeToMp3Lenient({
|
|
65
|
+
inputPath: filePath,
|
|
66
|
+
outputPath: effectivePath.path,
|
|
67
|
+
});
|
|
68
|
+
notes.push('whisper.cpp: transcoded media to MP3 via ffmpeg (lenient)');
|
|
69
|
+
notes.push(`whisper.cpp: strict transcode failed: ${wrapError('ffmpeg', error).message}`);
|
|
70
|
+
}
|
|
71
|
+
onProgress?.({
|
|
72
|
+
partIndex: null,
|
|
73
|
+
parts: null,
|
|
74
|
+
processedDurationSeconds: null,
|
|
75
|
+
totalDurationSeconds,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
const outputBase = join(tmpdir(), `summarize-whisper-cpp-out-${randomUUID()}`);
|
|
79
|
+
const outputTxt = `${outputBase}.txt`;
|
|
80
|
+
const args = [
|
|
81
|
+
'--model',
|
|
82
|
+
modelPath,
|
|
83
|
+
'--language',
|
|
84
|
+
'auto',
|
|
85
|
+
'--no-timestamps',
|
|
86
|
+
'--no-prints',
|
|
87
|
+
'--print-progress',
|
|
88
|
+
'--output-txt',
|
|
89
|
+
'--output-file',
|
|
90
|
+
outputBase,
|
|
91
|
+
effectivePath.path,
|
|
92
|
+
];
|
|
93
|
+
try {
|
|
94
|
+
await new Promise((resolve, reject) => {
|
|
95
|
+
const proc = spawn(resolveWhisperCppBinary(), args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
96
|
+
let stderr = '';
|
|
97
|
+
proc.stderr?.setEncoding('utf8');
|
|
98
|
+
let lastProgressPercent = -1;
|
|
99
|
+
proc.stderr?.on('data', (chunk) => {
|
|
100
|
+
if (stderr.length <= 8192) {
|
|
101
|
+
stderr += chunk;
|
|
102
|
+
}
|
|
103
|
+
// Progress output from `whisper-cli --print-progress` arrives on stderr. We parse it
|
|
104
|
+
// best-effort and map to seconds when we know the total duration.
|
|
105
|
+
const lines = chunk.split(/\r?\n/);
|
|
106
|
+
for (const line of lines) {
|
|
107
|
+
const match = line.match(/progress\s*=\s*(\d{1,3})%/i);
|
|
108
|
+
if (!match)
|
|
109
|
+
continue;
|
|
110
|
+
const raw = Number(match[1]);
|
|
111
|
+
if (!Number.isFinite(raw))
|
|
112
|
+
continue;
|
|
113
|
+
const pct = Math.max(0, Math.min(100, Math.round(raw)));
|
|
114
|
+
if (pct === lastProgressPercent)
|
|
115
|
+
continue;
|
|
116
|
+
lastProgressPercent = pct;
|
|
117
|
+
const processed = typeof totalDurationSeconds === 'number' && totalDurationSeconds > 0
|
|
118
|
+
? (totalDurationSeconds * pct) / 100
|
|
119
|
+
: null;
|
|
120
|
+
onProgress?.({
|
|
121
|
+
partIndex: null,
|
|
122
|
+
parts: null,
|
|
123
|
+
processedDurationSeconds: processed,
|
|
124
|
+
totalDurationSeconds,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
proc.on('error', reject);
|
|
129
|
+
proc.on('close', (code) => {
|
|
130
|
+
if (code === 0) {
|
|
131
|
+
resolve();
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
reject(new Error(`whisper.cpp failed (${code ?? 'unknown'}): ${stderr.trim()}`));
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
return {
|
|
140
|
+
text: null,
|
|
141
|
+
provider: 'whisper.cpp',
|
|
142
|
+
error: wrapError('whisper.cpp failed', error),
|
|
143
|
+
notes,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
const raw = await fs.readFile(outputTxt, 'utf8').catch(() => '');
|
|
147
|
+
await fs.unlink(outputTxt).catch(() => { });
|
|
148
|
+
const text = raw.trim();
|
|
149
|
+
if (!text) {
|
|
150
|
+
return {
|
|
151
|
+
text: null,
|
|
152
|
+
provider: 'whisper.cpp',
|
|
153
|
+
error: new Error('whisper.cpp returned empty text'),
|
|
154
|
+
notes,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
notes.push(`whisper.cpp: model=${resolveWhisperCppModelLabelFromPath(modelPath)}`);
|
|
158
|
+
return { text, provider: 'whisper.cpp', error: null, notes };
|
|
159
|
+
}
|
|
160
|
+
finally {
|
|
161
|
+
await effectivePath.cleanup?.().catch(() => { });
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
function isWhisperCppEnabled() {
|
|
165
|
+
return (process.env[DISABLE_LOCAL_WHISPER_CPP_ENV] ?? '').trim() !== '1';
|
|
166
|
+
}
|
|
167
|
+
async function isWhisperCliAvailable() {
|
|
168
|
+
const bin = resolveWhisperCppBinary();
|
|
169
|
+
return new Promise((resolve) => {
|
|
170
|
+
const proc = spawn(bin, ['--help'], { stdio: ['ignore', 'ignore', 'ignore'] });
|
|
171
|
+
proc.on('error', () => resolve(false));
|
|
172
|
+
proc.on('close', (code) => resolve(code === 0));
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
function resolveWhisperCppBinary() {
|
|
176
|
+
const override = (process.env[WHISPER_CPP_BINARY_ENV] ?? '').trim();
|
|
177
|
+
return override.length > 0 ? override : 'whisper-cli';
|
|
178
|
+
}
|
|
179
|
+
async function resolveWhisperCppModelPath() {
|
|
180
|
+
const override = (process.env[WHISPER_CPP_MODEL_PATH_ENV] ?? '').trim();
|
|
181
|
+
if (override) {
|
|
182
|
+
try {
|
|
183
|
+
const stat = await fs.stat(override);
|
|
184
|
+
return stat.isFile() ? override : null;
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const home = (process.env.HOME ?? process.env.USERPROFILE ?? '').trim();
|
|
191
|
+
const cacheCandidate = home
|
|
192
|
+
? join(home, '.summarize', 'cache', 'whisper-cpp', 'models', 'ggml-base.bin')
|
|
193
|
+
: null;
|
|
194
|
+
if (cacheCandidate) {
|
|
195
|
+
try {
|
|
196
|
+
const stat = await fs.stat(cacheCandidate);
|
|
197
|
+
if (stat.isFile())
|
|
198
|
+
return cacheCandidate;
|
|
199
|
+
}
|
|
200
|
+
catch {
|
|
201
|
+
// ignore
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
function resolveWhisperCppModelLabelFromPath(modelPath) {
|
|
207
|
+
const base = modelPath.split('/').pop() ?? modelPath;
|
|
208
|
+
let name = base
|
|
209
|
+
.replace(/^ggml-/, '')
|
|
210
|
+
.replace(/\.bin$/i, '')
|
|
211
|
+
.replace(/\.en$/i, '');
|
|
212
|
+
name = name.trim();
|
|
213
|
+
return name.length > 0 ? name : base;
|
|
214
|
+
}
|
|
215
|
+
function isWhisperCppSupportedMediaType(mediaType) {
|
|
216
|
+
const type = mediaType.toLowerCase().split(';')[0]?.trim() ?? '';
|
|
217
|
+
return (type === 'audio/mpeg' ||
|
|
218
|
+
type === 'audio/mp3' ||
|
|
219
|
+
type === 'audio/mpga' ||
|
|
220
|
+
type === 'audio/ogg' ||
|
|
221
|
+
type === 'audio/oga' ||
|
|
222
|
+
type === 'application/ogg' ||
|
|
223
|
+
type === 'audio/flac' ||
|
|
224
|
+
type === 'audio/x-wav' ||
|
|
225
|
+
type === 'audio/wav');
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=whisper-cpp.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"whisper-cpp.js","sourceRoot":"","sources":["../../../../src/transcription/whisper/whisper-cpp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAA;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAA;AAChC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EACL,6BAA6B,EAC7B,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,gBAAgB,CAAA;AACvB,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,8BAA8B,GAC/B,MAAM,aAAa,CAAA;AAEpB,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAEtC,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,IAAI,CAAC,mBAAmB,EAAE;QAAE,OAAO,KAAK,CAAA;IACxC,IAAI,CAAC,CAAC,MAAM,qBAAqB,EAAE,CAAC;QAAE,OAAO,KAAK,CAAA;IAClD,MAAM,KAAK,GAAG,MAAM,0BAA0B,EAAE,CAAA;IAChD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAA;AACvB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oCAAoC;IACxD,MAAM,SAAS,GAAG,MAAM,0BAA0B,EAAE,CAAA;IACpD,OAAO,SAAS,CAAC,CAAC,CAAC,mCAAmC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AAC1E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAAC,EACjD,QAAQ,EACR,SAAS,EACT,oBAAoB,EACpB,UAAU,GAMX;IACC,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,SAAS,GAAG,MAAM,0BAA0B,EAAE,CAAA;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,IAAI,KAAK,CAAC,oEAAoE,CAAC;YACtF,KAAK;SACN,CAAA;IACH,CAAC;IAED,MAAM,cAAc,GAAG,8BAA8B,CAAC,SAAS,CAAC,CAAA;IAChE,MAAM,YAAY,GAAG,CAAC,cAAc,IAAI,CAAC,MAAM,iBAAiB,EAAE,CAAC,CAAA;IACnE,IAAI,CAAC,cAAc,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,aAAa;YACvB,KAAK,EAAE,IAAI,KAAK,CACd,yDAAyD,SAAS,gCAAgC,CACnG;YACD,KAAK;SACN,CAAA;IACH,CAAC;IACD,MAAM,aAAa,GAAG,CAAC,GAAG,EAAE;QAC1B,IAAI,cAAc;YAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAoC,EAAE,CAAA;QAC5F,IAAI,CAAC,YAAY;YAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAoC,EAAE,CAAA;QAC3F,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,yBAAyB,UAAU,EAAE,MAAM,CAAC,CAAA;QAC3E,OAAO;YACL,IAAI,EAAE,OAAO;YACb,OAAO,EAAE,KAAK,IAAI,EAAE;gBAClB,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;YAC1C,CAAC;SACF,CAAA;IACH,CAAC,CAAC,EAAE,CAAA;IAEJ,IAAI,CAAC;QACH,IAAI,CAAC,cAAc,IAAI,YAAY,EAAE,CAAC;YACpC,0FAA0F;YAC1F,6CAA6C;YAC7C,IAAI,CAAC;gBACH,MAAM,uBAAuB,CAAC,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,CAAC,IAAI,EAAE,CAAC,CAAA;gBACtF,KAAK,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAA;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,8BAA8B,CAAC;oBACnC,SAAS,EAAE,QAAQ;oBACnB,UAAU,EAAE,aAAa,CAAC,IAAI;iBAC/B,CAAC,CAAA;gBACF,KAAK,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAA;gBACvE,KAAK,CAAC,IAAI,CAAC,yCAAyC,SAAS,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAA;YAC3F,CAAC;YACD,UAAU,EAAE,CAAC;gBACX,SAAS,EAAE,IAAI;gBACf,KAAK,EAAE,IAAI;gBACX,wBAAwB,EAAE,IAAI;gBAC9B,oBAAoB;aACrB,CAAC,CAAA;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,6BAA6B,UAAU,EAAE,EAAE,CAAC,CAAA;QAC9E,MAAM,SAAS,GAAG,GAAG,UAAU,MAAM,CAAA;QAErC,MAAM,IAAI,GAAG;YACX,SAAS;YACT,SAAS;YACT,YAAY;YACZ,MAAM;YACN,iBAAiB;YACjB,aAAa;YACb,kBAAkB;YAClB,cAAc;YACd,eAAe;YACf,UAAU;YACV,aAAa,CAAC,IAAI;SACnB,CAAA;QAED,IAAI,CAAC;YACH,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,uBAAuB,EAAE,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,CAAA;gBAC5F,IAAI,MAAM,GAAG,EAAE,CAAA;gBACf,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAA;gBAChC,IAAI,mBAAmB,GAAG,CAAC,CAAC,CAAA;gBAC5B,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;oBACxC,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;wBAC1B,MAAM,IAAI,KAAK,CAAA;oBACjB,CAAC;oBAED,qFAAqF;oBACrF,kEAAkE;oBAClE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;oBAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;wBACtD,IAAI,CAAC,KAAK;4BAAE,SAAQ;wBACpB,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;wBAC5B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC;4BAAE,SAAQ;wBACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;wBACvD,IAAI,GAAG,KAAK,mBAAmB;4BAAE,SAAQ;wBACzC,mBAAmB,GAAG,GAAG,CAAA;wBACzB,MAAM,SAAS,GACb,OAAO,oBAAoB,KAAK,QAAQ,IAAI,oBAAoB,GAAG,CAAC;4BAClE,CAAC,CAAC,CAAC,oBAAoB,GAAG,GAAG,CAAC,GAAG,GAAG;4BACpC,CAAC,CAAC,IAAI,CAAA;wBACV,UAAU,EAAE,CAAC;4BACX,SAAS,EAAE,IAAI;4BACf,KAAK,EAAE,IAAI;4BACX,wBAAwB,EAAE,SAAS;4BACnC,oBAAoB;yBACrB,CAAC,CAAA;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAA;gBACF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;gBACxB,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;wBACf,OAAO,EAAE,CAAA;wBACT,OAAM;oBACR,CAAC;oBACD,MAAM,CAAC,IAAI,KAAK,CAAC,uBAAuB,IAAI,IAAI,SAAS,MAAM,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAA;gBAClF,CAAC,CAAC,CAAA;YACJ,CAAC,CAAC,CAAA;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,aAAa;gBACvB,KAAK,EAAE,SAAS,CAAC,oBAAoB,EAAE,KAAK,CAAC;gBAC7C,KAAK;aACN,CAAA;QACH,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;QAChE,MAAM,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;QAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;QACvB,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,aAAa;gBACvB,KAAK,EAAE,IAAI,KAAK,CAAC,iCAAiC,CAAC;gBACnD,KAAK;aACN,CAAA;QACH,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,sBAAsB,mCAAmC,CAAC,SAAS,CAAC,EAAE,CAAC,CAAA;QAClF,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAA;IAC9D,CAAC;YAAS,CAAC;QACT,MAAM,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IACjD,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB;IAC1B,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,GAAG,CAAA;AAC1E,CAAC;AAED,KAAK,UAAU,qBAAqB;IAClC,MAAM,GAAG,GAAG,uBAAuB,EAAE,CAAA;IACrC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAA;QAC9E,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAA;QACtC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAA;IACjD,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,uBAAuB;IAC9B,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAA;AACvD,CAAC;AAED,KAAK,UAAU,0BAA0B;IACvC,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IACvE,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACpC,OAAO,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAA;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAA;QACb,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IACvE,MAAM,cAAc,GAAG,IAAI;QACzB,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,eAAe,CAAC;QAC7E,CAAC,CAAC,IAAI,CAAA;IACR,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,CAAA;YAC1C,IAAI,IAAI,CAAC,MAAM,EAAE;gBAAE,OAAO,cAAc,CAAA;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,mCAAmC,CAAC,SAAiB;IAC5D,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,SAAS,CAAA;IACpD,IAAI,IAAI,GAAG,IAAI;SACZ,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IACxB,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;IAClB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;AACtC,CAAC;AAED,SAAS,8BAA8B,CAAC,SAAiB;IACvD,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAChE,OAAO,CACL,IAAI,KAAK,YAAY;QACrB,IAAI,KAAK,WAAW;QACpB,IAAI,KAAK,YAAY;QACrB,IAAI,KAAK,WAAW;QACpB,IAAI,KAAK,WAAW;QACpB,IAAI,KAAK,iBAAiB;QAC1B,IAAI,KAAK,YAAY;QACrB,IAAI,KAAK,aAAa;QACtB,IAAI,KAAK,WAAW,CACrB,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { MAX_OPENAI_UPLOAD_BYTES } from './whisper/constants.js';
|
|
2
|
+
export { transcribeMediaFileWithWhisper, transcribeMediaWithWhisper } from './whisper/core.js';
|
|
3
|
+
export { isFfmpegAvailable, probeMediaDurationSecondsWithFfprobe } from './whisper/ffmpeg.js';
|
|
4
|
+
export { isWhisperCppReady, resolveWhisperCppModelNameForDisplay, } from './whisper/whisper-cpp.js';
|
|
5
|
+
//# sourceMappingURL=whisper.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"whisper.js","sourceRoot":"","sources":["../../../src/transcription/whisper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAA;AAChE,OAAO,EAAE,8BAA8B,EAAE,0BAA0B,EAAE,MAAM,mBAAmB,CAAA;AAC9F,OAAO,EAAE,iBAAiB,EAAE,oCAAoC,EAAE,MAAM,qBAAqB,CAAA;AAM7F,OAAO,EACL,iBAAiB,EACjB,oCAAoC,GACrC,MAAM,0BAA0B,CAAA"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { createLinkPreviewClient, type LinkPreviewClient, type LinkPreviewClientOptions, } from './link-preview/client.js';
|
|
2
|
+
export { DEFAULT_CACHE_MODE, DEFAULT_MAX_CONTENT_CHARACTERS, DEFAULT_TIMEOUT_MS, type ExtractedLinkContent, type FetchLinkContentOptions, } from './link-preview/content/types.js';
|
|
3
|
+
export type { ConvertHtmlToMarkdown, FirecrawlScrapeResult, LinkPreviewDeps, LinkPreviewProgressEvent, ReadTweetWithBird, ScrapeWithFirecrawl, TranscriptCache, } from './link-preview/deps.js';
|
|
4
|
+
export { ProgressKind } from './link-preview/deps.js';
|
|
5
|
+
export { CACHE_MODES, type CacheMode, type CacheStatus, type TranscriptSource, } from './link-preview/types.js';
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { ExtractedLinkContent, FetchLinkContentOptions } from './content/types.js';
|
|
2
|
+
import type { ConvertHtmlToMarkdown, LinkPreviewDeps, LinkPreviewProgressEvent, ScrapeWithFirecrawl, TranscriptCache } from './deps.js';
|
|
3
|
+
export interface LinkPreviewClient {
|
|
4
|
+
fetchLinkContent(url: string, options?: FetchLinkContentOptions): Promise<ExtractedLinkContent>;
|
|
5
|
+
}
|
|
6
|
+
export interface LinkPreviewClientOptions {
|
|
7
|
+
fetch?: typeof fetch;
|
|
8
|
+
scrapeWithFirecrawl?: ScrapeWithFirecrawl | null;
|
|
9
|
+
apifyApiToken?: string | null;
|
|
10
|
+
ytDlpPath?: string | null;
|
|
11
|
+
falApiKey?: string | null;
|
|
12
|
+
openaiApiKey?: string | null;
|
|
13
|
+
convertHtmlToMarkdown?: ConvertHtmlToMarkdown | null;
|
|
14
|
+
transcriptCache?: TranscriptCache | null;
|
|
15
|
+
readTweetWithBird?: LinkPreviewDeps['readTweetWithBird'];
|
|
16
|
+
onProgress?: ((event: LinkPreviewProgressEvent) => void) | null;
|
|
17
|
+
}
|
|
18
|
+
export declare function createLinkPreviewClient(options?: LinkPreviewClientOptions): LinkPreviewClient;
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare function sanitizeHtmlForMarkdownConversion(html: string): string;
|
|
2
|
+
export declare function extractArticleContent(html: string): string;
|
|
3
|
+
export declare function collectSegmentsFromHtml(html: string): string[];
|
|
4
|
+
export declare function extractPlainText(html: string): string;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface ContentBudgetResult {
|
|
2
|
+
content: string;
|
|
3
|
+
truncated: boolean;
|
|
4
|
+
totalCharacters: number;
|
|
5
|
+
wordCount: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function normalizeForPrompt(input: string): string;
|
|
8
|
+
export declare function normalizeWhitespace(input: string): string;
|
|
9
|
+
export declare function decodeHtmlEntities(input: string): string;
|
|
10
|
+
export declare function normalizeCandidate(value: string | null | undefined): string | null;
|
|
11
|
+
export declare function clipAtSentenceBoundary(input: string, maxLength: number): string;
|
|
12
|
+
export declare function applyContentBudget(baseContent: string, maxCharacters: number): ContentBudgetResult;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare const BLOCKED_HTML_HINT_PATTERN: RegExp;
|
|
2
|
+
export declare const MIN_HTML_CONTENT_CHARACTERS = 200;
|
|
3
|
+
export declare const MIN_READABILITY_CONTENT_CHARACTERS = 200;
|
|
4
|
+
export declare const MIN_METADATA_DESCRIPTION_CHARACTERS = 120;
|
|
5
|
+
export declare const READABILITY_RELATIVE_THRESHOLD = 0.6;
|
|
6
|
+
export declare const MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK = 5000;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { FirecrawlScrapeResult, LinkPreviewProgressEvent, ScrapeWithFirecrawl } from '../deps.js';
|
|
2
|
+
import type { CacheMode, FirecrawlDiagnostics } from '../types.js';
|
|
3
|
+
export interface FirecrawlFetchResult {
|
|
4
|
+
payload: FirecrawlScrapeResult | null;
|
|
5
|
+
diagnostics: FirecrawlDiagnostics;
|
|
6
|
+
}
|
|
7
|
+
export declare function fetchHtmlDocument(fetchImpl: typeof fetch, url: string, { timeoutMs, onProgress, }?: {
|
|
8
|
+
timeoutMs?: number;
|
|
9
|
+
onProgress?: ((event: LinkPreviewProgressEvent) => void) | null;
|
|
10
|
+
}): Promise<string>;
|
|
11
|
+
export declare function fetchWithFirecrawl(url: string, scrapeWithFirecrawl: ScrapeWithFirecrawl | null, options?: {
|
|
12
|
+
timeoutMs?: number;
|
|
13
|
+
cacheMode?: CacheMode;
|
|
14
|
+
onProgress?: ((event: LinkPreviewProgressEvent) => void) | null;
|
|
15
|
+
reason?: string | null;
|
|
16
|
+
}): Promise<FirecrawlFetchResult>;
|