@steipete/summarize-core 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/esm/content/index.js +5 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +28 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +155 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/constants.js +7 -0
- package/dist/esm/content/link-preview/content/constants.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +124 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/firecrawl.js +86 -0
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/esm/content/link-preview/content/html.js +162 -0
- package/dist/esm/content/link-preview/content/html.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +345 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/jsonld.js +77 -0
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js +79 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/readability.js +53 -0
- package/dist/esm/content/link-preview/content/readability.js.map +1 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js +68 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +164 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/video.js +96 -0
- package/dist/esm/content/link-preview/content/video.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +20 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/content/transcript/cache.js +79 -0
- package/dist/esm/content/transcript/cache.js.map +1 -0
- package/dist/esm/content/transcript/index.js +130 -0
- package/dist/esm/content/transcript/index.js.map +1 -0
- package/dist/esm/content/transcript/normalize.js +43 -0
- package/dist/esm/content/transcript/normalize.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic.js +11 -0
- package/dist/esm/content/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +222 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js +38 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js +8 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js +2 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js +134 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/json.js +34 -0
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/media.js +345 -0
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/results.js +28 -0
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +253 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +218 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast.js +222 -0
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +166 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube.js +173 -0
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/transcript/types.js +2 -0
- package/dist/esm/content/transcript/types.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +259 -0
- package/dist/esm/content/transcript/utils.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/language.js +126 -0
- package/dist/esm/language.js.map +1 -0
- package/dist/esm/prompts/cli.js +20 -0
- package/dist/esm/prompts/cli.js.map +1 -0
- package/dist/esm/prompts/file.js +48 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +4 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +116 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/transcription/whisper/constants.js +8 -0
- package/dist/esm/transcription/whisper/constants.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +303 -0
- package/dist/esm/transcription/whisper/core.js.map +1 -0
- package/dist/esm/transcription/whisper/fal.js +41 -0
- package/dist/esm/transcription/whisper/fal.js.map +1 -0
- package/dist/esm/transcription/whisper/ffmpeg.js +179 -0
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +47 -0
- package/dist/esm/transcription/whisper/openai.js.map +1 -0
- package/dist/esm/transcription/whisper/types.js +2 -0
- package/dist/esm/transcription/whisper/types.js.map +1 -0
- package/dist/esm/transcription/whisper/utils.js +63 -0
- package/dist/esm/transcription/whisper/utils.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js +227 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/esm/transcription/whisper.js +5 -0
- package/dist/esm/transcription/whisper.js.map +1 -0
- package/dist/types/content/index.d.ts +5 -0
- package/dist/types/content/link-preview/client.d.ts +18 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/constants.d.ts +6 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/firecrawl.d.ts +14 -0
- package/dist/types/content/link-preview/content/html.d.ts +17 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/jsonld.d.ts +6 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/podcast-utils.d.ts +7 -0
- package/dist/types/content/link-preview/content/readability.d.ts +8 -0
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +4 -0
- package/dist/types/content/link-preview/content/types.d.ts +61 -0
- package/dist/types/content/link-preview/content/utils.d.ts +17 -0
- package/dist/types/content/link-preview/content/video.d.ts +5 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +167 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/types.d.ts +37 -0
- package/dist/types/content/transcript/cache.d.ts +29 -0
- package/dist/types/content/transcript/index.d.ts +9 -0
- package/dist/types/content/transcript/normalize.d.ts +3 -0
- package/dist/types/content/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +4 -0
- package/dist/types/content/transcript/providers/podcast/apple.d.ts +6 -0
- package/dist/types/content/transcript/providers/podcast/constants.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +11 -0
- package/dist/types/content/transcript/providers/podcast/itunes.d.ts +17 -0
- package/dist/types/content/transcript/providers/podcast/json.d.ts +8 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +42 -0
- package/dist/types/content/transcript/providers/podcast/results.d.ts +10 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +22 -0
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +24 -0
- package/dist/types/content/transcript/providers/podcast.d.ts +20 -0
- package/dist/types/content/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +17 -0
- package/dist/types/content/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/transcript/types.d.ts +30 -0
- package/dist/types/content/transcript/utils.d.ts +8 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/language.d.ts +25 -0
- package/dist/types/prompts/cli.d.ts +10 -0
- package/dist/types/prompts/file.d.ts +17 -0
- package/dist/types/prompts/index.d.ts +4 -0
- package/dist/types/prompts/link-summary.d.ts +29 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/transcription/whisper/constants.d.ts +7 -0
- package/dist/types/transcription/whisper/core.d.ts +20 -0
- package/dist/types/transcription/whisper/fal.d.ts +1 -0
- package/dist/types/transcription/whisper/ffmpeg.d.ts +16 -0
- package/dist/types/transcription/whisper/openai.d.ts +2 -0
- package/dist/types/transcription/whisper/types.d.ts +17 -0
- package/dist/types/transcription/whisper/utils.d.ts +5 -0
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +9 -0
- package/dist/types/transcription/whisper.d.ts +5 -0
- package/package.json +54 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
import { fetchWithTimeout } from '../../../link-preview/fetch-with-timeout.js';
|
|
2
|
+
import { decodeHtmlEntities, sanitizeYoutubeJsonResponse } from '../../utils.js';
|
|
3
|
+
import { extractYoutubeiBootstrap } from './api.js';
|
|
4
|
+
const REQUEST_HEADERS = {
|
|
5
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
6
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
7
|
+
};
|
|
8
|
+
const YT_INITIAL_PLAYER_RESPONSE_TOKEN = 'ytInitialPlayerResponse';
|
|
9
|
+
const INNERTUBE_API_KEY_REGEX = /"INNERTUBE_API_KEY":"([^"]+)"|INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/;
|
|
10
|
+
function extractBalancedJsonObject(source, startAt) {
|
|
11
|
+
const start = source.indexOf('{', startAt);
|
|
12
|
+
if (start < 0) {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
let depth = 0;
|
|
16
|
+
let inString = false;
|
|
17
|
+
let quote = null;
|
|
18
|
+
let escaping = false;
|
|
19
|
+
for (let i = start; i < source.length; i += 1) {
|
|
20
|
+
const ch = source[i];
|
|
21
|
+
if (!ch) {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (inString) {
|
|
25
|
+
if (escaping) {
|
|
26
|
+
escaping = false;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
if (ch === '\\') {
|
|
30
|
+
escaping = true;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (quote && ch === quote) {
|
|
34
|
+
inString = false;
|
|
35
|
+
quote = null;
|
|
36
|
+
}
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (ch === '"' || ch === "'") {
|
|
40
|
+
inString = true;
|
|
41
|
+
quote = ch;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (ch === '{') {
|
|
45
|
+
depth += 1;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (ch === '}') {
|
|
49
|
+
depth -= 1;
|
|
50
|
+
if (depth === 0) {
|
|
51
|
+
return source.slice(start, i + 1);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
function extractInitialPlayerResponse(html) {
|
|
58
|
+
const tokenIndex = html.indexOf(YT_INITIAL_PLAYER_RESPONSE_TOKEN);
|
|
59
|
+
if (tokenIndex < 0) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const assignmentIndex = html.indexOf('=', tokenIndex);
|
|
63
|
+
if (assignmentIndex < 0) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
const objectText = extractBalancedJsonObject(html, assignmentIndex);
|
|
67
|
+
if (!objectText) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
71
|
+
const parsed = JSON.parse(objectText);
|
|
72
|
+
return isObjectLike(parsed) ? parsed : null;
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
const isObjectLike = (value) => typeof value === 'object' && value !== null;
|
|
79
|
+
function extractInnertubeApiKey(html) {
|
|
80
|
+
const match = html.match(INNERTUBE_API_KEY_REGEX);
|
|
81
|
+
const key = match?.[1] ?? match?.[2] ?? null;
|
|
82
|
+
return typeof key === 'string' && key.trim().length > 0 ? key.trim() : null;
|
|
83
|
+
}
|
|
84
|
+
async function fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId }) {
|
|
85
|
+
const apiKey = extractInnertubeApiKey(html);
|
|
86
|
+
if (!apiKey) {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
try {
|
|
90
|
+
const userAgent = REQUEST_HEADERS['User-Agent'] ??
|
|
91
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
|
|
92
|
+
const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: {
|
|
95
|
+
'Content-Type': 'application/json',
|
|
96
|
+
'User-Agent': userAgent,
|
|
97
|
+
'Accept-Language': REQUEST_HEADERS['Accept-Language'] ?? 'en-US,en;q=0.9',
|
|
98
|
+
Accept: 'application/json',
|
|
99
|
+
},
|
|
100
|
+
body: JSON.stringify({
|
|
101
|
+
context: {
|
|
102
|
+
client: {
|
|
103
|
+
clientName: 'ANDROID',
|
|
104
|
+
clientVersion: '20.10.38',
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
videoId,
|
|
108
|
+
}),
|
|
109
|
+
});
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
const parsed = await response.json();
|
|
114
|
+
if (!isObjectLike(parsed)) {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
return await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
export const fetchTranscriptFromCaptionTracks = async (fetchImpl, { html, originalUrl, videoId }) => {
|
|
124
|
+
const initialPlayerResponse = extractInitialPlayerResponse(html);
|
|
125
|
+
if (initialPlayerResponse) {
|
|
126
|
+
const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, initialPlayerResponse);
|
|
127
|
+
if (transcript) {
|
|
128
|
+
return transcript;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const bootstrap = extractYoutubeiBootstrap(html);
|
|
132
|
+
if (!bootstrap) {
|
|
133
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
134
|
+
}
|
|
135
|
+
const { apiKey, clientName, clientVersion, context, pageCl, pageLabel, visitorData, xsrfToken } = bootstrap;
|
|
136
|
+
if (!apiKey) {
|
|
137
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
138
|
+
}
|
|
139
|
+
const contextRecord = context;
|
|
140
|
+
const clientContext = isObjectLike(contextRecord.client)
|
|
141
|
+
? contextRecord.client
|
|
142
|
+
: {};
|
|
143
|
+
const requestBody = {
|
|
144
|
+
context: {
|
|
145
|
+
...contextRecord,
|
|
146
|
+
client: {
|
|
147
|
+
...clientContext,
|
|
148
|
+
originalUrl,
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
videoId,
|
|
152
|
+
playbackContext: {
|
|
153
|
+
contentPlaybackContext: {
|
|
154
|
+
html5Preference: 'HTML5_PREF_WANTS',
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
contentCheckOk: true,
|
|
158
|
+
racyCheckOk: true,
|
|
159
|
+
};
|
|
160
|
+
try {
|
|
161
|
+
const userAgent = REQUEST_HEADERS['User-Agent'] ??
|
|
162
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
|
|
163
|
+
const headers = {
|
|
164
|
+
'Content-Type': 'application/json',
|
|
165
|
+
'User-Agent': userAgent,
|
|
166
|
+
Accept: 'application/json',
|
|
167
|
+
Origin: 'https://www.youtube.com',
|
|
168
|
+
Referer: originalUrl,
|
|
169
|
+
'X-Goog-AuthUser': '0',
|
|
170
|
+
'X-Youtube-Bootstrap-Logged-In': 'false',
|
|
171
|
+
};
|
|
172
|
+
if (clientName) {
|
|
173
|
+
headers['X-Youtube-Client-Name'] = clientName;
|
|
174
|
+
}
|
|
175
|
+
if (clientVersion) {
|
|
176
|
+
headers['X-Youtube-Client-Version'] = clientVersion;
|
|
177
|
+
}
|
|
178
|
+
if (visitorData) {
|
|
179
|
+
headers['X-Goog-Visitor-Id'] = visitorData;
|
|
180
|
+
}
|
|
181
|
+
if (typeof pageCl === 'number' && Number.isFinite(pageCl)) {
|
|
182
|
+
headers['X-Youtube-Page-CL'] = String(pageCl);
|
|
183
|
+
}
|
|
184
|
+
if (pageLabel) {
|
|
185
|
+
headers['X-Youtube-Page-Label'] = pageLabel;
|
|
186
|
+
}
|
|
187
|
+
if (xsrfToken) {
|
|
188
|
+
headers['X-Youtube-Identity-Token'] = xsrfToken;
|
|
189
|
+
}
|
|
190
|
+
const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
|
|
191
|
+
method: 'POST',
|
|
192
|
+
headers,
|
|
193
|
+
body: JSON.stringify(requestBody),
|
|
194
|
+
});
|
|
195
|
+
if (!response.ok) {
|
|
196
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
197
|
+
}
|
|
198
|
+
const raw = await response.text();
|
|
199
|
+
const sanitized = sanitizeYoutubeJsonResponse(raw);
|
|
200
|
+
const parsed = JSON.parse(sanitized);
|
|
201
|
+
if (!isObjectLike(parsed)) {
|
|
202
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
203
|
+
}
|
|
204
|
+
const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
|
|
205
|
+
if (transcript) {
|
|
206
|
+
return transcript;
|
|
207
|
+
}
|
|
208
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
209
|
+
}
|
|
210
|
+
catch {
|
|
211
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
const extractTranscriptFromPlayerPayload = async (fetchImpl, payload) => {
|
|
215
|
+
const payloadRecord = payload;
|
|
216
|
+
const captionsCandidate = payloadRecord.captions;
|
|
217
|
+
const captions = isObjectLike(captionsCandidate) ? captionsCandidate : null;
|
|
218
|
+
const rendererCandidate = (captions ? captions.playerCaptionsTracklistRenderer : null) ??
|
|
219
|
+
payloadRecord.playerCaptionsTracklistRenderer;
|
|
220
|
+
const renderer = isObjectLike(rendererCandidate)
|
|
221
|
+
? rendererCandidate
|
|
222
|
+
: null;
|
|
223
|
+
const captionTracks = Array.isArray(renderer?.captionTracks)
|
|
224
|
+
? renderer?.captionTracks
|
|
225
|
+
: null;
|
|
226
|
+
const automaticTracks = Array.isArray(renderer?.automaticCaptions)
|
|
227
|
+
? renderer?.automaticCaptions
|
|
228
|
+
: null;
|
|
229
|
+
const orderedTracks = [];
|
|
230
|
+
if (captionTracks) {
|
|
231
|
+
orderedTracks.push(...captionTracks.filter((track) => isObjectLike(track)));
|
|
232
|
+
}
|
|
233
|
+
if (automaticTracks) {
|
|
234
|
+
orderedTracks.push(...automaticTracks.filter((track) => isObjectLike(track)));
|
|
235
|
+
}
|
|
236
|
+
const filteredTracks = orderedTracks.filter((track) => isObjectLike(track));
|
|
237
|
+
const sortedTracks = [...filteredTracks].toSorted((a, b) => {
|
|
238
|
+
const aTrack = a;
|
|
239
|
+
const bTrack = b;
|
|
240
|
+
const aKind = typeof aTrack.kind === 'string' ? aTrack.kind : '';
|
|
241
|
+
const bKind = typeof bTrack.kind === 'string' ? bTrack.kind : '';
|
|
242
|
+
// Prefer manual captions over auto-generated (ASR).
|
|
243
|
+
if (aKind === 'asr' && bKind !== 'asr') {
|
|
244
|
+
return 1;
|
|
245
|
+
}
|
|
246
|
+
if (bKind === 'asr' && aKind !== 'asr') {
|
|
247
|
+
return -1;
|
|
248
|
+
}
|
|
249
|
+
const aLang = typeof aTrack.languageCode === 'string' ? aTrack.languageCode : '';
|
|
250
|
+
const bLang = typeof bTrack.languageCode === 'string' ? bTrack.languageCode : '';
|
|
251
|
+
if (aLang === 'en' && bLang !== 'en') {
|
|
252
|
+
return -1;
|
|
253
|
+
}
|
|
254
|
+
if (bLang === 'en' && aLang !== 'en') {
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
return 0;
|
|
258
|
+
});
|
|
259
|
+
// De-dupe after sorting so manual/English wins within each language.
|
|
260
|
+
const seenLanguages = new Set();
|
|
261
|
+
const normalizedTracks = [];
|
|
262
|
+
for (const candidate of sortedTracks) {
|
|
263
|
+
const trackRecord = candidate;
|
|
264
|
+
const languageCandidate = trackRecord.languageCode;
|
|
265
|
+
const lang = typeof languageCandidate === 'string' ? languageCandidate.toLowerCase() : '';
|
|
266
|
+
if (lang && seenLanguages.has(lang)) {
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
if (lang) {
|
|
270
|
+
seenLanguages.add(lang);
|
|
271
|
+
}
|
|
272
|
+
normalizedTracks.push(candidate);
|
|
273
|
+
}
|
|
274
|
+
return await findFirstTranscript(fetchImpl, normalizedTracks, 0);
|
|
275
|
+
};
|
|
276
|
+
const findFirstTranscript = async (fetchImpl, tracks, index) => {
|
|
277
|
+
if (index >= tracks.length) {
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
const candidate = await downloadCaptionTrack(fetchImpl, tracks[index] ?? {});
|
|
281
|
+
if (candidate) {
|
|
282
|
+
return candidate;
|
|
283
|
+
}
|
|
284
|
+
return findFirstTranscript(fetchImpl, tracks, index + 1);
|
|
285
|
+
};
|
|
286
|
+
const downloadCaptionTrack = async (fetchImpl, track) => {
|
|
287
|
+
const trackRecord = track;
|
|
288
|
+
const baseUrl = typeof trackRecord.baseUrl === 'string'
|
|
289
|
+
? trackRecord.baseUrl
|
|
290
|
+
: typeof trackRecord.url === 'string'
|
|
291
|
+
? trackRecord.url
|
|
292
|
+
: null;
|
|
293
|
+
if (!baseUrl) {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
const json3Url = (() => {
|
|
297
|
+
try {
|
|
298
|
+
const parsed = new URL(baseUrl);
|
|
299
|
+
parsed.searchParams.set('fmt', 'json3');
|
|
300
|
+
parsed.searchParams.set('alt', 'json');
|
|
301
|
+
return parsed.toString();
|
|
302
|
+
}
|
|
303
|
+
catch {
|
|
304
|
+
const separator = baseUrl.includes('?') ? '&' : '?';
|
|
305
|
+
return `${baseUrl}${separator}fmt=json3&alt=json`;
|
|
306
|
+
}
|
|
307
|
+
})();
|
|
308
|
+
try {
|
|
309
|
+
const response = await fetchWithTimeout(fetchImpl, json3Url, {
|
|
310
|
+
headers: REQUEST_HEADERS,
|
|
311
|
+
});
|
|
312
|
+
if (!response.ok) {
|
|
313
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
314
|
+
}
|
|
315
|
+
const text = await response.text();
|
|
316
|
+
if (text.length === 0) {
|
|
317
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
318
|
+
}
|
|
319
|
+
const jsonResult = parseJsonTranscript(text);
|
|
320
|
+
if (jsonResult) {
|
|
321
|
+
return jsonResult;
|
|
322
|
+
}
|
|
323
|
+
const xmlFallback = parseXmlTranscript(text);
|
|
324
|
+
if (xmlFallback) {
|
|
325
|
+
return xmlFallback;
|
|
326
|
+
}
|
|
327
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
328
|
+
}
|
|
329
|
+
catch {
|
|
330
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
const downloadXmlTranscript = async (fetchImpl, baseUrl) => {
|
|
334
|
+
const xmlUrl = baseUrl.replaceAll(/&fmt=[^&]+/g, '');
|
|
335
|
+
try {
|
|
336
|
+
const response = await fetchWithTimeout(fetchImpl, xmlUrl, { headers: REQUEST_HEADERS });
|
|
337
|
+
if (!response.ok) {
|
|
338
|
+
return null;
|
|
339
|
+
}
|
|
340
|
+
const text = await response.text();
|
|
341
|
+
const jsonResult = parseJsonTranscript(text);
|
|
342
|
+
if (jsonResult) {
|
|
343
|
+
return jsonResult;
|
|
344
|
+
}
|
|
345
|
+
return parseXmlTranscript(text);
|
|
346
|
+
}
|
|
347
|
+
catch {
|
|
348
|
+
return null;
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
const parseJsonTranscript = (raw) => {
|
|
352
|
+
try {
|
|
353
|
+
const parsed = JSON.parse(raw);
|
|
354
|
+
if (!isObjectLike(parsed)) {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
const payloadRecord = parsed;
|
|
358
|
+
const eventsUnknown = payloadRecord.events;
|
|
359
|
+
if (!Array.isArray(eventsUnknown)) {
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
const events = eventsUnknown;
|
|
363
|
+
const lines = [];
|
|
364
|
+
for (const event of events) {
|
|
365
|
+
if (!isObjectLike(event)) {
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
const eventRecord = event;
|
|
369
|
+
const segs = Array.isArray(eventRecord.segs) ? eventRecord.segs : null;
|
|
370
|
+
if (!segs) {
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
const text = segs
|
|
374
|
+
.map((seg) => {
|
|
375
|
+
if (!isObjectLike(seg)) {
|
|
376
|
+
return '';
|
|
377
|
+
}
|
|
378
|
+
const segRecord = seg;
|
|
379
|
+
return typeof segRecord.utf8 === 'string' ? segRecord.utf8 : '';
|
|
380
|
+
})
|
|
381
|
+
.join('')
|
|
382
|
+
.trim();
|
|
383
|
+
if (text.length > 0) {
|
|
384
|
+
lines.push(text);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const transcript = lines.join('\n').trim();
|
|
388
|
+
return transcript.length > 0 ? transcript : null;
|
|
389
|
+
}
|
|
390
|
+
catch {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
const parseXmlTranscript = (xml) => {
|
|
395
|
+
const pattern = /<text[^>]*>([\s\S]*?)<\/text>/gi;
|
|
396
|
+
const lines = [];
|
|
397
|
+
let match = pattern.exec(xml);
|
|
398
|
+
while (match) {
|
|
399
|
+
const content = match[1] ?? '';
|
|
400
|
+
const decoded = decodeHtmlEntities(content).replaceAll(/\s+/g, ' ').trim();
|
|
401
|
+
if (decoded.length > 0) {
|
|
402
|
+
lines.push(decoded);
|
|
403
|
+
}
|
|
404
|
+
match = pattern.exec(xml);
|
|
405
|
+
}
|
|
406
|
+
const transcript = lines.join('\n').trim();
|
|
407
|
+
return transcript.length > 0 ? transcript : null;
|
|
408
|
+
};
|
|
409
|
+
//# sourceMappingURL=captions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"captions.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/youtube/captions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,6CAA6C,CAAA;AAC9E,OAAO,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,gBAAgB,CAAA;AAChF,OAAO,EAAE,wBAAwB,EAAE,MAAM,UAAU,CAAA;AAQnD,MAAM,eAAe,GAA2B;IAC9C,YAAY,EACV,iHAAiH;IACnH,iBAAiB,EAAE,gBAAgB;CACpC,CAAA;AAED,MAAM,gCAAgC,GAAG,yBAAyB,CAAA;AAClE,MAAM,uBAAuB,GAAG,oEAAoE,CAAA;AAEpG,SAAS,yBAAyB,CAAC,MAAc,EAAE,OAAe;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;IAC1C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAqB,IAAI,CAAA;IAClC,IAAI,QAAQ,GAAG,KAAK,CAAA;IAEpB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;QACpB,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,SAAQ;QACV,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,GAAG,KAAK,CAAA;gBAChB,SAAQ;YACV,CAAC;YACD,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAA;gBACf,SAAQ;YACV,CAAC;YACD,IAAI,KAAK,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;gBAC1B,QAAQ,GAAG,KAAK,CAAA;gBAChB,KAAK,GAAG,IAAI,CAAA;YACd,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YAC7B,QAAQ,GAAG,IAAI,CAAA;YACf,KAAK,GAAG,EAAE,CAAA;YACV,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,SAAQ;QACV,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,4BAA4B,CAAC,IAAY;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAA;IACjE,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC,CAAA;IACrD,IAAI,eAAe,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,UAAU,GAAG,yBAAyB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAA;IACnE,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAA;QAC9C,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,YAAY,GAAG,CAAC,KAAc,EAAoC,EAAE,CACxE,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAA;AAE7C,SAAS,sBAAsB,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAA;IACjD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;IAC5C,OAAO,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC7E,CAAC;AAoBD,KAAK,UAAU,+BAA+B,CAC5C,SAAuB,EACvB,EAAE,IAAI,EAAE,OAAO,EAAqC;IAEpD,MAAM,MAAM,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QAEnH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,YAAY,EAAE,SAAS;gBACvB,iBAAiB,EAAE,eAAe,CAAC,iBAAiB,CAAC,IAAI,gBAAgB;gBACzE,MAAM,EAAE,kBAAkB;aAC3B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,OAAO,EAAE;oBACP,MAAM,EAAE;wBACN,UAAU,EAAE,SAAS;wBACrB,aAAa,EAAE,UAAU;qBAC1B;iBACF;gBACD,OAAO;aACR,CAAC;SACH,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,MAAM,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QAED,OAAO,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACpE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,gCAAgC,GAAG,KAAK,EACnD,SAAuB,EACvB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAA4B,EAChC,EAAE;IAC1B,MAAM,qBAAqB,GAAG,4BAA4B,CAAC,IAAI,CAAC,CAAA;IAChE,IAAI,qBAAqB,EAAE,CAAC;QAC1B,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,qBAAqB,CAAC,CAAA;QAC7F,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,wBAAwB,CAAC,IAAI,CAAC,CAAA;IAChD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,GAC7F,SAAS,CAAA;IACX,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,aAAa,GAAG,OAA+B,CAAA;IACrD,MAAM,aAAa,GAAG,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC;QACtD,CAAC,CAAE,aAAa,CAAC,MAAkC;QACnD,CAAC,CAAC,EAAE,CAAA;IACN,MAAM,WAAW,GAA4B;QAC3C,OAAO,EAAE;YACP,GAAG,aAAa;YAChB,MAAM,EAAE;gBACN,GAAG,aAAa;gBAChB,WAAW;aACZ;SACF;QACD,OAAO;QACP,eAAe,EAAE;YACf,sBAAsB,EAAE;gBACtB,eAAe,EAAE,kBAAkB;aACpC;SACF;QACD,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,IAAI;KAClB,CAAA;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QACnH,MAAM,OAAO,GAA2B;YACtC,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,SAAS;YACvB,MAAM,EAAE,kBAAkB;YAC1B,MAAM,EAAE,yBAAyB;YACjC,OAAO,EAAE,WAAW;YACpB,iBAAiB,EAAE,GAAG;YACtB,+BAA+B,EAAE,OAAO;SACzC,CAAA;QAED,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,CAAC,uBAAuB,CAAC,GAAG,UAAU,CAAA;QAC/C,CAAC;QACD,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,CAAC,0BAA0B,CAAC,GAAG,aAAa,CAAA;QACrD,CAAC;QACD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,CAAC,mBAAmB,CAAC,GAAG,WAAW,CAAA;QAC5C,CAAC;QACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1D,OAAO,CAAC,mBAAmB,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAA;QAC/C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,sBAAsB,CAAC,GAAG,SAAS,CAAA;QAC7C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,0BAA0B,CAAC,GAAG,SAAS,CAAA;QACjD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;SAClC,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACjC,MAAM,SAAS,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAA;QAClD,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;QAC9E,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QAED,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kCAAkC,GAAG,KAAK,EAC9C,SAAuB,EACvB,OAAgC,EACR,EAAE;IAC1B,MAAM,aAAa,GAAG,OAA0B,CAAA;IAEhD,MAAM,iBAAiB,GAAG,aAAa,CAAC,QAAQ,CAAA;IAChD,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAE,iBAAqC,CAAC,CAAC,CAAC,IAAI,CAAA;IAEhG,MAAM,iBAAiB,GACrB,CAAC,QAAQ,CAAC,CAAC,CAAE,QAA4B,CAAC,+BAA+B,CAAC,CAAC,CAAC,IAAI,CAAC;QACjF,aAAa,CAAC,+BAA+B,CAAA;IAE/C,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC;QAC9C,CAAC,CAAE,iBAAyC;QAC5C,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,aAAa,CAAC;QAC1D,CAAC,CAAE,QAAQ,EAAE,aAA2B;QACxC,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,iBAAiB,CAAC;QAChE,CAAC,CAAE,QAAQ,EAAE,iBAA+B;QAC5C,CAAC,CAAC,IAAI,CAAA;IAER,MAAM,aAAa,GAA8B,EAAE,CAAA;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,aAAa,CAAC,IAAI,CAChB,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC1F,CAAA;IACH,CAAC;IACD,IAAI,eAAe,EAAE,CAAC;QACpB,aAAa,CAAC,IAAI,CAChB,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC5F,CAAA;IACH,CAAC;IACD,MAAM,cAAc,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CACtF,YAAY,CAAC,KAAK,CAAC,CACpB,CAAA;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzD,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,oDAAoD;QACpD,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,OAAO,CAAC,CAAA;IACV,CAAC,CAAC,CAAA;IAEF,qEAAqE;IACrE,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;IACvC,MAAM,gBAAgB,GAA8B,EAAE,CAAA;IACtD,KAAK,MAAM,SAAS,IAAI,YAAY,EAAE,CAAC;QACrC,MAAM,WAAW,GAAG,SAA+B,CAAA;QACnD,MAAM,iBAAiB,GAAG,WAAW,CAAC,YAAY,CAAA;QAClD,MAAM,IAAI,GAAG,OAAO,iBAAiB,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACzF,IAAI,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACpC,SAAQ;QACV,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACT,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACzB,CAAC;QACD,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAClC,CAAC;IAED,OAAO,MAAM,mBAAmB,CAAC,SAAS,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAA;AAClE,CAAC,CAAA;AAED,MAAM,mBAAmB,GAAG,KAAK,EAC/B,SAAuB,EACvB,MAA0C,EAC1C,KAAa,EACW,EAAE;IAC1B,IAAI,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,SAAS,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;IAC5E,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;AAC1D,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,KAAK,EAChC,SAAuB,EACvB,KAA8B,EACN,EAAE;IAC1B,MAAM,WAAW,GAAG,KAA2B,CAAA;IAC/C,MAAM,OAAO,GACX,OAAO,WAAW,CAAC,OAAO,KAAK,QAAQ;QACrC,CAAC,CAAC,WAAW,CAAC,OAAO;QACrB,CAAC,CAAC,OAAO,WAAW,CAAC,GAAG,KAAK,QAAQ;YACnC,CAAC,CAAC,WAAW,CAAC,GAAG;YACjB,CAAC,CAAC,IAAI,CAAA;IACZ,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAA;YAC/B,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;YACvC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;YACtC,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;QAC1B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACnD,OAAO,GAAG,OAAO,GAAG,SAAS,oBAAoB,CAAA;QACnD,CAAC;IACH,CAAC,CAAC,EAAE,CAAA;IAEJ,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE;YAC3D,OAAO,EAAE,eAAe;SACzB,CAAC,CAAA;QACF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QACD,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,WAAW,CAAA;QACpB,CAAC;QACD,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;AACH,CAAC,CAAA;AAED,MAAM,qBAAqB,GAAG,KAAK,EACjC,SAAuB,EACvB,OAAe,EACS,EAAE;IAC1B,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC,CAAA;IACpD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAA;QACxF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAID,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACzD,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,aAAa,GAAG,MAAwB,CAAA;QAC9C,MAAM,aAAa,GAAG,aAAa,CAAC,MAAM,CAAA;QAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,MAAM,GAAG,aAAa,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,SAAQ;YACV,CAAC;YACD,MAAM,WAAW,GAAG,KAA2B,CAAA;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,WAAW,CAAC,IAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;YACrF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,SAAQ;YACV,CAAC;YACD,MAAM,IAAI,GAAG,IAAI;iBACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACX,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvB,OAAO,EAAE,CAAA;gBACX,CAAC;gBACD,MAAM,SAAS,GAAG,GAA2B,CAAA;gBAC7C,OAAO,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;YACjE,CAAC,CAAC;iBACD,IAAI,CAAC,EAAE,CAAC;iBACR,IAAI,EAAE,CAAA;YACT,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;QACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,MAAM,OAAO,GAAG,iCAAiC,CAAA;IACjD,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,KAAK,GAA2B,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACrD,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAC9B,MAAM,OAAO,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACrB,CAAC;QACD,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC3B,CAAC;IACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;IAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;AAClD,CAAC,CAAA"}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { promises as fs } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { isWhisperCppReady, probeMediaDurationSecondsWithFfprobe, transcribeMediaFileWithWhisper, } from '../../../../transcription/whisper.js';
|
|
7
|
+
import { ProgressKind } from '../../../link-preview/deps.js';
|
|
8
|
+
const YT_DLP_TIMEOUT_MS = 300_000;
|
|
9
|
+
const MAX_STDERR_BYTES = 8192;
|
|
10
|
+
export const fetchTranscriptWithYtDlp = async ({ ytDlpPath, openaiApiKey, falApiKey, url, onProgress, }) => {
|
|
11
|
+
const notes = [];
|
|
12
|
+
if (!ytDlpPath) {
|
|
13
|
+
return {
|
|
14
|
+
text: null,
|
|
15
|
+
provider: null,
|
|
16
|
+
error: new Error('yt-dlp is not configured (set YT_DLP_PATH or ensure yt-dlp is on PATH)'),
|
|
17
|
+
notes,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
const hasLocalWhisper = await isWhisperCppReady();
|
|
21
|
+
if (!openaiApiKey && !falApiKey && !hasLocalWhisper) {
|
|
22
|
+
return {
|
|
23
|
+
text: null,
|
|
24
|
+
provider: null,
|
|
25
|
+
error: new Error('No transcription providers available (install whisper-cpp or set OPENAI_API_KEY or FAL_KEY)'),
|
|
26
|
+
notes,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
const progress = typeof onProgress === 'function' ? onProgress : null;
|
|
30
|
+
const providerHint = hasLocalWhisper
|
|
31
|
+
? 'cpp'
|
|
32
|
+
: openaiApiKey && falApiKey
|
|
33
|
+
? 'openai->fal'
|
|
34
|
+
: openaiApiKey
|
|
35
|
+
? 'openai'
|
|
36
|
+
: falApiKey
|
|
37
|
+
? 'fal'
|
|
38
|
+
: 'unknown';
|
|
39
|
+
const modelId = hasLocalWhisper
|
|
40
|
+
? 'whisper.cpp'
|
|
41
|
+
: openaiApiKey && falApiKey
|
|
42
|
+
? 'whisper-1->fal-ai/wizper'
|
|
43
|
+
: openaiApiKey
|
|
44
|
+
? 'whisper-1'
|
|
45
|
+
: falApiKey
|
|
46
|
+
? 'fal-ai/wizper'
|
|
47
|
+
: null;
|
|
48
|
+
const outputFile = join(tmpdir(), `summarize-${randomUUID()}.mp3`);
|
|
49
|
+
try {
|
|
50
|
+
progress?.({
|
|
51
|
+
kind: ProgressKind.TranscriptMediaDownloadStart,
|
|
52
|
+
url,
|
|
53
|
+
service: 'youtube',
|
|
54
|
+
mediaUrl: url,
|
|
55
|
+
totalBytes: null,
|
|
56
|
+
});
|
|
57
|
+
await downloadAudio(ytDlpPath, url, outputFile);
|
|
58
|
+
const stat = await fs.stat(outputFile);
|
|
59
|
+
progress?.({
|
|
60
|
+
kind: ProgressKind.TranscriptMediaDownloadDone,
|
|
61
|
+
url,
|
|
62
|
+
service: 'youtube',
|
|
63
|
+
downloadedBytes: stat.size,
|
|
64
|
+
totalBytes: null,
|
|
65
|
+
});
|
|
66
|
+
const probedDurationSeconds = await probeMediaDurationSecondsWithFfprobe(outputFile);
|
|
67
|
+
progress?.({
|
|
68
|
+
kind: ProgressKind.TranscriptWhisperStart,
|
|
69
|
+
url,
|
|
70
|
+
service: 'youtube',
|
|
71
|
+
providerHint,
|
|
72
|
+
modelId,
|
|
73
|
+
totalDurationSeconds: probedDurationSeconds,
|
|
74
|
+
parts: null,
|
|
75
|
+
});
|
|
76
|
+
const result = await transcribeMediaFileWithWhisper({
|
|
77
|
+
filePath: outputFile,
|
|
78
|
+
mediaType: 'audio/mpeg',
|
|
79
|
+
filename: 'audio.mp3',
|
|
80
|
+
openaiApiKey,
|
|
81
|
+
falApiKey,
|
|
82
|
+
totalDurationSeconds: probedDurationSeconds,
|
|
83
|
+
onProgress: (event) => {
|
|
84
|
+
progress?.({
|
|
85
|
+
kind: ProgressKind.TranscriptWhisperProgress,
|
|
86
|
+
url,
|
|
87
|
+
service: 'youtube',
|
|
88
|
+
processedDurationSeconds: event.processedDurationSeconds,
|
|
89
|
+
totalDurationSeconds: event.totalDurationSeconds,
|
|
90
|
+
partIndex: event.partIndex,
|
|
91
|
+
parts: event.parts,
|
|
92
|
+
});
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
if (result.notes.length > 0)
|
|
96
|
+
notes.push(...result.notes);
|
|
97
|
+
return { text: result.text, provider: result.provider, error: result.error, notes };
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
return {
|
|
101
|
+
text: null,
|
|
102
|
+
provider: null,
|
|
103
|
+
error: wrapError('yt-dlp failed to download audio', error),
|
|
104
|
+
notes,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
finally {
|
|
108
|
+
await fs.unlink(outputFile).catch(() => { });
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
async function downloadAudio(ytDlpPath, url, outputFile) {
|
|
112
|
+
return new Promise((resolve, reject) => {
|
|
113
|
+
const args = [
|
|
114
|
+
'-x',
|
|
115
|
+
'--audio-format',
|
|
116
|
+
'mp3',
|
|
117
|
+
'--no-playlist',
|
|
118
|
+
'--retries',
|
|
119
|
+
'3',
|
|
120
|
+
'--no-warnings',
|
|
121
|
+
'-o',
|
|
122
|
+
outputFile,
|
|
123
|
+
url,
|
|
124
|
+
];
|
|
125
|
+
const proc = spawn(ytDlpPath, args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
126
|
+
let stderr = '';
|
|
127
|
+
if (proc.stderr) {
|
|
128
|
+
proc.stderr.setEncoding('utf8');
|
|
129
|
+
proc.stderr.on('data', (chunk) => {
|
|
130
|
+
if (stderr.length >= MAX_STDERR_BYTES)
|
|
131
|
+
return;
|
|
132
|
+
const remaining = MAX_STDERR_BYTES - stderr.length;
|
|
133
|
+
stderr += chunk.slice(0, remaining);
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
const timeout = setTimeout(() => {
|
|
137
|
+
proc.kill('SIGTERM');
|
|
138
|
+
reject(new Error('yt-dlp download timeout'));
|
|
139
|
+
}, YT_DLP_TIMEOUT_MS);
|
|
140
|
+
proc.on('close', (code, signal) => {
|
|
141
|
+
clearTimeout(timeout);
|
|
142
|
+
if (code === 0) {
|
|
143
|
+
resolve();
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
const detail = stderr.trim();
|
|
147
|
+
const suffix = detail ? `: ${detail}` : '';
|
|
148
|
+
if (code === null) {
|
|
149
|
+
reject(new Error(`yt-dlp terminated (${signal ?? 'unknown'})${suffix}`));
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
reject(new Error(`yt-dlp exited with code ${code}${suffix}`));
|
|
153
|
+
});
|
|
154
|
+
proc.on('error', (error) => {
|
|
155
|
+
clearTimeout(timeout);
|
|
156
|
+
reject(error);
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
function wrapError(prefix, error) {
|
|
161
|
+
if (error instanceof Error) {
|
|
162
|
+
return new Error(`${prefix}: ${error.message}`, { cause: error });
|
|
163
|
+
}
|
|
164
|
+
return new Error(`${prefix}: ${String(error)}`);
|
|
165
|
+
}
|
|
166
|
+
//# sourceMappingURL=yt-dlp.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"yt-dlp.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/youtube/yt-dlp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAA;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAA;AAChC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EACL,iBAAiB,EACjB,oCAAoC,EAEpC,8BAA8B,GAC/B,MAAM,sCAAsC,CAAA;AAE7C,OAAO,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAA;AAE5D,MAAM,iBAAiB,GAAG,OAAO,CAAA;AACjC,MAAM,gBAAgB,GAAG,IAAI,CAAA;AAiB7B,MAAM,CAAC,MAAM,wBAAwB,GAAG,KAAK,EAAE,EAC7C,SAAS,EACT,YAAY,EACZ,SAAS,EACT,GAAG,EACH,UAAU,GACG,EAAkC,EAAE;IACjD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,IAAI,KAAK,CAAC,wEAAwE,CAAC;YAC1F,KAAK;SACN,CAAA;IACH,CAAC;IACD,MAAM,eAAe,GAAG,MAAM,iBAAiB,EAAE,CAAA;IACjD,IAAI,CAAC,YAAY,IAAI,CAAC,SAAS,IAAI,CAAC,eAAe,EAAE,CAAC;QACpD,OAAO;YACL,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,IAAI,KAAK,CACd,6FAA6F,CAC9F;YACD,KAAK;SACN,CAAA;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IACrE,MAAM,YAAY,GAAyD,eAAe;QACxF,CAAC,CAAC,KAAK;QACP,CAAC,CAAC,YAAY,IAAI,SAAS;YACzB,CAAC,CAAC,aAAa;YACf,CAAC,CAAC,YAAY;gBACZ,CAAC,CAAC,QAAQ;gBACV,CAAC,CAAC,SAAS;oBACT,CAAC,CAAC,KAAK;oBACP,CAAC,CAAC,SAAS,CAAA;IACnB,MAAM,OAAO,GAAG,eAAe;QAC7B,CAAC,CAAC,aAAa;QACf,CAAC,CAAC,YAAY,IAAI,SAAS;YACzB,CAAC,CAAC,0BAA0B;YAC5B,CAAC,CAAC,YAAY;gBACZ,CAAC,CAAC,WAAW;gBACb,CAAC,CAAC,SAAS;oBACT,CAAC,CAAC,eAAe;oBACjB,CAAC,CAAC,IAAI,CAAA;IAEd,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,aAAa,UAAU,EAAE,MAAM,CAAC,CAAA;IAClE,IAAI,CAAC;QACH,QAAQ,EAAE,CAAC;YACT,IAAI,EAAE,YAAY,CAAC,4BAA4B;YAC/C,GAAG;YACH,OAAO,EAAE,SAAS;YAClB,QAAQ,EAAE,GAAG;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAA;QACF,MAAM,aAAa,CAAC,SAAS,EAAE,GAAG,EAAE,UAAU,CAAC,CAAA;QAC/C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;QACtC,QAAQ,EAAE,CAAC;YACT,IAAI,EAAE,YAAY,CAAC,2BAA2B;YAC9C,GAAG;YACH,OAAO,EAAE,SAAS;YAClB,eAAe,EAAE,IAAI,CAAC,IAAI;YAC1B,UAAU,EAAE,IAAI;SACjB,CAAC,CAAA;QAEF,MAAM,qBAAqB,GAAG,MAAM,oCAAoC,CAAC,UAAU,CAAC,CAAA;QACpF,QAAQ,EAAE,CAAC;YACT,IAAI,EAAE,YAAY,CAAC,sBAAsB;YACzC,GAAG;YACH,OAAO,EAAE,SAAS;YAClB,YAAY;YACZ,OAAO;YACP,oBAAoB,EAAE,qBAAqB;YAC3C,KAAK,EAAE,IAAI;SACZ,CAAC,CAAA;QACF,MAAM,MAAM,GAAG,MAAM,8BAA8B,CAAC;YAClD,QAAQ,EAAE,UAAU;YACpB,SAAS,EAAE,YAAY;YACvB,QAAQ,EAAE,WAAW;YACrB,YAAY;YACZ,SAAS;YACT,oBAAoB,EAAE,qBAAqB;YAC3C,UAAU,EAAE,CAAC,KAAK,EAAE,EAAE;gBACpB,QAAQ,EAAE,CAAC;oBACT,IAAI,EAAE,YAAY,CAAC,yBAAyB;oBAC5C,GAAG;oBACH,OAAO,EAAE,SAAS;oBAClB,wBAAwB,EAAE,KAAK,CAAC,wBAAwB;oBACxD,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;oBAChD,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;iBACnB,CAAC,CAAA;YACJ,CAAC;SACF,CAAC,CAAA;QACF,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAA;QACxD,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,KAAK,EAAE,CAAA;IACrF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,SAAS,CAAC,iCAAiC,EAAE,KAAK,CAAC;YAC1D,KAAK;SACN,CAAA;IACH,CAAC;YAAS,CAAC;QACT,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAC7C,CAAC;AACH,CAAC,CAAA;AAED,KAAK,UAAU,aAAa,CAAC,SAAiB,EAAE,GAAW,EAAE,UAAkB;IAC7E,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,IAAI,GAAG;YACX,IAAI;YACJ,gBAAgB;YAChB,KAAK;YACL,eAAe;YACf,WAAW;YACX,GAAG;YACH,eAAe;YACf,IAAI;YACJ,UAAU;YACV,GAAG;SACJ,CAAA;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,CAAA;QAC5E,IAAI,MAAM,GAAG,EAAE,CAAA;QAEf,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAA;YAC/B,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;gBACvC,IAAI,MAAM,CAAC,MAAM,IAAI,gBAAgB;oBAAE,OAAM;gBAC7C,MAAM,SAAS,GAAG,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAA;gBAClD,MAAM,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAA;YACrC,CAAC,CAAC,CAAA;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;YAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YACpB,MAAM,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC,CAAA;QAC9C,CAAC,EAAE,iBAAiB,CAAC,CAAA;QAErB,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;YAChC,YAAY,CAAC,OAAO,CAAC,CAAA;YACrB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,OAAO,EAAE,CAAA;gBACT,OAAM;YACR,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;YAC5B,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;YAC1C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,KAAK,CAAC,sBAAsB,MAAM,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC,CAAC,CAAA;gBACxE,OAAM;YACR,CAAC;YACD,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,GAAG,MAAM,EAAE,CAAC,CAAC,CAAA;QAC/D,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;YACzB,YAAY,CAAC,OAAO,CAAC,CAAA;YACrB,MAAM,CAAC,KAAK,CAAC,CAAA;QACf,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,MAAc,EAAE,KAAc;IAC/C,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,OAAO,IAAI,KAAK,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAA;IACnE,CAAC;IACD,OAAO,IAAI,KAAK,CAAC,GAAG,MAAM,KAAK,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;AACjD,CAAC"}
|