@steipete/summarize 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/cli.cjs +74333 -0
- package/dist/cli.cjs.map +7 -0
- package/dist/esm/cli-main.js +80 -0
- package/dist/esm/cli-main.js.map +1 -0
- package/dist/esm/cli.js +18 -0
- package/dist/esm/cli.js.map +1 -0
- package/dist/esm/config.js +33 -0
- package/dist/esm/config.js.map +1 -0
- package/dist/esm/content/asset.js +167 -0
- package/dist/esm/content/asset.js.map +1 -0
- package/dist/esm/content/index.js +4 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +20 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +150 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +120 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +275 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +127 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +2 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/cache.js +73 -0
- package/dist/esm/content/link-preview/transcript/cache.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/index.js +95 -0
- package/dist/esm/content/link-preview/transcript/index.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/normalize.js +43 -0
- package/dist/esm/content/link-preview/transcript/normalize.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/generic.js +11 -0
- package/dist/esm/content/link-preview/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/podcast.js +12 -0
- package/dist/esm/content/link-preview/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/twitter.js +12 -0
- package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +114 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube.js +74 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/types.js +2 -0
- package/dist/esm/content/link-preview/transcript/types.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/utils.js +193 -0
- package/dist/esm/content/link-preview/transcript/utils.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/costs.js +57 -0
- package/dist/esm/costs.js.map +1 -0
- package/dist/esm/firecrawl.js +54 -0
- package/dist/esm/firecrawl.js.map +1 -0
- package/dist/esm/flags.js +97 -0
- package/dist/esm/flags.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/llm/generate-text.js +296 -0
- package/dist/esm/llm/generate-text.js.map +1 -0
- package/dist/esm/llm/google-models.js +112 -0
- package/dist/esm/llm/google-models.js.map +1 -0
- package/dist/esm/llm/html-to-markdown.js +44 -0
- package/dist/esm/llm/html-to-markdown.js.map +1 -0
- package/dist/esm/llm/model-id.js +45 -0
- package/dist/esm/llm/model-id.js.map +1 -0
- package/dist/esm/pricing/litellm.js +25 -0
- package/dist/esm/pricing/litellm.js.map +1 -0
- package/dist/esm/prompts/file.js +14 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +3 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +105 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/run.js +1674 -0
- package/dist/esm/run.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/summarizeHome.js +20 -0
- package/dist/esm/summarizeHome.js.map +1 -0
- package/dist/esm/tty/live-markdown.js +52 -0
- package/dist/esm/tty/live-markdown.js.map +1 -0
- package/dist/esm/tty/osc-progress.js +8 -0
- package/dist/esm/tty/osc-progress.js.map +1 -0
- package/dist/esm/tty/spinner.js +33 -0
- package/dist/esm/tty/spinner.js.map +1 -0
- package/dist/esm/version.js +44 -0
- package/dist/esm/version.js.map +1 -0
- package/dist/types/cli-main.d.ts +11 -0
- package/dist/types/cli.d.ts +1 -0
- package/dist/types/config.d.ts +15 -0
- package/dist/types/content/asset.d.ts +44 -0
- package/dist/types/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/client.d.ts +14 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/types.d.ts +44 -0
- package/dist/types/content/link-preview/content/utils.d.ts +16 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +70 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/transcript/cache.d.ts +29 -0
- package/dist/types/content/link-preview/transcript/index.d.ts +9 -0
- package/dist/types/content/link-preview/transcript/normalize.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/podcast.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/types.d.ts +23 -0
- package/dist/types/content/link-preview/transcript/utils.d.ts +7 -0
- package/dist/types/content/link-preview/types.d.ts +36 -0
- package/dist/types/costs.d.ts +31 -0
- package/dist/types/firecrawl.d.ts +5 -0
- package/dist/types/flags.d.ts +23 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/llm/generate-text.d.ts +43 -0
- package/dist/types/llm/google-models.d.ts +10 -0
- package/dist/types/llm/html-to-markdown.d.ts +15 -0
- package/dist/types/llm/model-id.d.ts +14 -0
- package/dist/types/pricing/litellm.d.ts +13 -0
- package/dist/types/prompts/file.d.ts +6 -0
- package/dist/types/prompts/index.d.ts +3 -0
- package/dist/types/prompts/link-summary.d.ts +27 -0
- package/dist/types/run.d.ts +8 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/summarizeHome.d.ts +6 -0
- package/dist/types/tty/live-markdown.d.ts +10 -0
- package/dist/types/tty/osc-progress.d.ts +3 -0
- package/dist/types/tty/spinner.d.ts +10 -0
- package/dist/types/version.d.ts +2 -0
- package/docs/README.md +11 -0
- package/docs/config.md +28 -0
- package/docs/extract-only.md +13 -0
- package/docs/firecrawl.md +17 -0
- package/docs/llm.md +33 -0
- package/docs/openai.md +18 -0
- package/docs/site/.nojekyll +1 -0
- package/docs/site/404.html +37 -0
- package/docs/site/assets/site.css +577 -0
- package/docs/site/assets/site.js +69 -0
- package/docs/site/docs/config.html +73 -0
- package/docs/site/docs/extract-only.html +79 -0
- package/docs/site/docs/firecrawl.html +72 -0
- package/docs/site/docs/index.html +89 -0
- package/docs/site/docs/llm.html +70 -0
- package/docs/site/docs/openai.html +66 -0
- package/docs/site/docs/website.html +70 -0
- package/docs/site/docs/youtube.html +62 -0
- package/docs/site/index.html +125 -0
- package/docs/website.md +27 -0
- package/docs/youtube.md +32 -0
- package/package.json +76 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
import { fetchWithTimeout } from '../../../fetch-with-timeout.js';
|
|
2
|
+
import { decodeHtmlEntities, sanitizeYoutubeJsonResponse } from '../../utils.js';
|
|
3
|
+
import { extractYoutubeiBootstrap } from './api.js';
|
|
4
|
+
const REQUEST_HEADERS = {
|
|
5
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
6
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
7
|
+
};
|
|
8
|
+
const YT_INITIAL_PLAYER_RESPONSE_TOKEN = 'ytInitialPlayerResponse';
|
|
9
|
+
const INNERTUBE_API_KEY_REGEX = /"INNERTUBE_API_KEY":"([^"]+)"|INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/;
|
|
10
|
+
function extractBalancedJsonObject(source, startAt) {
|
|
11
|
+
const start = source.indexOf('{', startAt);
|
|
12
|
+
if (start < 0) {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
let depth = 0;
|
|
16
|
+
let inString = false;
|
|
17
|
+
let quote = null;
|
|
18
|
+
let escaping = false;
|
|
19
|
+
for (let i = start; i < source.length; i += 1) {
|
|
20
|
+
const ch = source[i];
|
|
21
|
+
if (!ch) {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (inString) {
|
|
25
|
+
if (escaping) {
|
|
26
|
+
escaping = false;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
if (ch === '\\') {
|
|
30
|
+
escaping = true;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (quote && ch === quote) {
|
|
34
|
+
inString = false;
|
|
35
|
+
quote = null;
|
|
36
|
+
}
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (ch === '"' || ch === "'") {
|
|
40
|
+
inString = true;
|
|
41
|
+
quote = ch;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (ch === '{') {
|
|
45
|
+
depth += 1;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (ch === '}') {
|
|
49
|
+
depth -= 1;
|
|
50
|
+
if (depth === 0) {
|
|
51
|
+
return source.slice(start, i + 1);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
function extractInitialPlayerResponse(html) {
|
|
58
|
+
const tokenIndex = html.indexOf(YT_INITIAL_PLAYER_RESPONSE_TOKEN);
|
|
59
|
+
if (tokenIndex < 0) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const assignmentIndex = html.indexOf('=', tokenIndex);
|
|
63
|
+
if (assignmentIndex < 0) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
const objectText = extractBalancedJsonObject(html, assignmentIndex);
|
|
67
|
+
if (!objectText) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
71
|
+
const parsed = JSON.parse(objectText);
|
|
72
|
+
return isObjectLike(parsed) ? parsed : null;
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
const isObjectLike = (value) => typeof value === 'object' && value !== null;
|
|
79
|
+
function extractInnertubeApiKey(html) {
|
|
80
|
+
const match = html.match(INNERTUBE_API_KEY_REGEX);
|
|
81
|
+
const key = match?.[1] ?? match?.[2] ?? null;
|
|
82
|
+
return typeof key === 'string' && key.trim().length > 0 ? key.trim() : null;
|
|
83
|
+
}
|
|
84
|
+
async function fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId }) {
|
|
85
|
+
const apiKey = extractInnertubeApiKey(html);
|
|
86
|
+
if (!apiKey) {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
try {
|
|
90
|
+
const userAgent = REQUEST_HEADERS['User-Agent'] ??
|
|
91
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
|
|
92
|
+
const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: {
|
|
95
|
+
'Content-Type': 'application/json',
|
|
96
|
+
'User-Agent': userAgent,
|
|
97
|
+
'Accept-Language': REQUEST_HEADERS['Accept-Language'] ?? 'en-US,en;q=0.9',
|
|
98
|
+
Accept: 'application/json',
|
|
99
|
+
},
|
|
100
|
+
body: JSON.stringify({
|
|
101
|
+
context: {
|
|
102
|
+
client: {
|
|
103
|
+
clientName: 'ANDROID',
|
|
104
|
+
clientVersion: '20.10.38',
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
videoId,
|
|
108
|
+
}),
|
|
109
|
+
});
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
const parsed = await response.json();
|
|
114
|
+
if (!isObjectLike(parsed)) {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
return await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
export const fetchTranscriptFromCaptionTracks = async (fetchImpl, { html, originalUrl, videoId }) => {
|
|
124
|
+
const initialPlayerResponse = extractInitialPlayerResponse(html);
|
|
125
|
+
if (initialPlayerResponse) {
|
|
126
|
+
const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, initialPlayerResponse);
|
|
127
|
+
if (transcript) {
|
|
128
|
+
return transcript;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const bootstrap = extractYoutubeiBootstrap(html);
|
|
132
|
+
if (!bootstrap) {
|
|
133
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
134
|
+
}
|
|
135
|
+
const { apiKey, clientName, clientVersion, context, pageCl, pageLabel, visitorData, xsrfToken } = bootstrap;
|
|
136
|
+
if (!apiKey) {
|
|
137
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
138
|
+
}
|
|
139
|
+
const contextRecord = context;
|
|
140
|
+
const clientContext = isObjectLike(contextRecord.client)
|
|
141
|
+
? contextRecord.client
|
|
142
|
+
: {};
|
|
143
|
+
const requestBody = {
|
|
144
|
+
context: {
|
|
145
|
+
...contextRecord,
|
|
146
|
+
client: {
|
|
147
|
+
...clientContext,
|
|
148
|
+
originalUrl,
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
videoId,
|
|
152
|
+
playbackContext: {
|
|
153
|
+
contentPlaybackContext: {
|
|
154
|
+
html5Preference: 'HTML5_PREF_WANTS',
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
contentCheckOk: true,
|
|
158
|
+
racyCheckOk: true,
|
|
159
|
+
};
|
|
160
|
+
try {
|
|
161
|
+
const userAgent = REQUEST_HEADERS['User-Agent'] ??
|
|
162
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
|
|
163
|
+
const headers = {
|
|
164
|
+
'Content-Type': 'application/json',
|
|
165
|
+
'User-Agent': userAgent,
|
|
166
|
+
Accept: 'application/json',
|
|
167
|
+
Origin: 'https://www.youtube.com',
|
|
168
|
+
Referer: originalUrl,
|
|
169
|
+
'X-Goog-AuthUser': '0',
|
|
170
|
+
'X-Youtube-Bootstrap-Logged-In': 'false',
|
|
171
|
+
};
|
|
172
|
+
if (clientName) {
|
|
173
|
+
headers['X-Youtube-Client-Name'] = clientName;
|
|
174
|
+
}
|
|
175
|
+
if (clientVersion) {
|
|
176
|
+
headers['X-Youtube-Client-Version'] = clientVersion;
|
|
177
|
+
}
|
|
178
|
+
if (visitorData) {
|
|
179
|
+
headers['X-Goog-Visitor-Id'] = visitorData;
|
|
180
|
+
}
|
|
181
|
+
if (typeof pageCl === 'number' && Number.isFinite(pageCl)) {
|
|
182
|
+
headers['X-Youtube-Page-CL'] = String(pageCl);
|
|
183
|
+
}
|
|
184
|
+
if (pageLabel) {
|
|
185
|
+
headers['X-Youtube-Page-Label'] = pageLabel;
|
|
186
|
+
}
|
|
187
|
+
if (xsrfToken) {
|
|
188
|
+
headers['X-Youtube-Identity-Token'] = xsrfToken;
|
|
189
|
+
}
|
|
190
|
+
const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
|
|
191
|
+
method: 'POST',
|
|
192
|
+
headers,
|
|
193
|
+
body: JSON.stringify(requestBody),
|
|
194
|
+
});
|
|
195
|
+
if (!response.ok) {
|
|
196
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
197
|
+
}
|
|
198
|
+
const raw = await response.text();
|
|
199
|
+
const sanitized = sanitizeYoutubeJsonResponse(raw);
|
|
200
|
+
const parsed = JSON.parse(sanitized);
|
|
201
|
+
if (!isObjectLike(parsed)) {
|
|
202
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
203
|
+
}
|
|
204
|
+
const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
|
|
205
|
+
if (transcript) {
|
|
206
|
+
return transcript;
|
|
207
|
+
}
|
|
208
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
209
|
+
}
|
|
210
|
+
catch {
|
|
211
|
+
return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
const extractTranscriptFromPlayerPayload = async (fetchImpl, payload) => {
|
|
215
|
+
const payloadRecord = payload;
|
|
216
|
+
const captionsCandidate = payloadRecord.captions;
|
|
217
|
+
const captions = isObjectLike(captionsCandidate) ? captionsCandidate : null;
|
|
218
|
+
const rendererCandidate = (captions ? captions.playerCaptionsTracklistRenderer : null) ??
|
|
219
|
+
payloadRecord.playerCaptionsTracklistRenderer;
|
|
220
|
+
const renderer = isObjectLike(rendererCandidate)
|
|
221
|
+
? rendererCandidate
|
|
222
|
+
: null;
|
|
223
|
+
const captionTracks = Array.isArray(renderer?.captionTracks)
|
|
224
|
+
? renderer?.captionTracks
|
|
225
|
+
: null;
|
|
226
|
+
const automaticTracks = Array.isArray(renderer?.automaticCaptions)
|
|
227
|
+
? renderer?.automaticCaptions
|
|
228
|
+
: null;
|
|
229
|
+
const orderedTracks = [];
|
|
230
|
+
if (captionTracks) {
|
|
231
|
+
orderedTracks.push(...captionTracks.filter((track) => isObjectLike(track)));
|
|
232
|
+
}
|
|
233
|
+
if (automaticTracks) {
|
|
234
|
+
orderedTracks.push(...automaticTracks.filter((track) => isObjectLike(track)));
|
|
235
|
+
}
|
|
236
|
+
const seenLanguages = new Set();
|
|
237
|
+
const normalizedTracks = [];
|
|
238
|
+
for (const candidate of orderedTracks) {
|
|
239
|
+
if (!isObjectLike(candidate)) {
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
const trackRecord = candidate;
|
|
243
|
+
const languageCandidate = trackRecord.languageCode;
|
|
244
|
+
const lang = typeof languageCandidate === 'string' ? languageCandidate.toLowerCase() : '';
|
|
245
|
+
if (lang && seenLanguages.has(lang)) {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
if (lang) {
|
|
249
|
+
seenLanguages.add(lang);
|
|
250
|
+
}
|
|
251
|
+
normalizedTracks.push(candidate);
|
|
252
|
+
}
|
|
253
|
+
const sortedTracks = [...normalizedTracks].toSorted((a, b) => {
|
|
254
|
+
const aTrack = a;
|
|
255
|
+
const bTrack = b;
|
|
256
|
+
const aKind = typeof aTrack.kind === 'string' ? aTrack.kind : '';
|
|
257
|
+
const bKind = typeof bTrack.kind === 'string' ? bTrack.kind : '';
|
|
258
|
+
if (aKind === 'asr' && bKind !== 'asr') {
|
|
259
|
+
return -1;
|
|
260
|
+
}
|
|
261
|
+
if (bKind === 'asr' && aKind !== 'asr') {
|
|
262
|
+
return 1;
|
|
263
|
+
}
|
|
264
|
+
const aLang = typeof aTrack.languageCode === 'string' ? aTrack.languageCode : '';
|
|
265
|
+
const bLang = typeof bTrack.languageCode === 'string' ? bTrack.languageCode : '';
|
|
266
|
+
if (aLang === 'en' && bLang !== 'en') {
|
|
267
|
+
return -1;
|
|
268
|
+
}
|
|
269
|
+
if (bLang === 'en' && aLang !== 'en') {
|
|
270
|
+
return 1;
|
|
271
|
+
}
|
|
272
|
+
return 0;
|
|
273
|
+
});
|
|
274
|
+
return await findFirstTranscript(fetchImpl, sortedTracks, 0);
|
|
275
|
+
};
|
|
276
|
+
const findFirstTranscript = async (fetchImpl, tracks, index) => {
|
|
277
|
+
if (index >= tracks.length) {
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
const candidate = await downloadCaptionTrack(fetchImpl, tracks[index] ?? {});
|
|
281
|
+
if (candidate) {
|
|
282
|
+
return candidate;
|
|
283
|
+
}
|
|
284
|
+
return findFirstTranscript(fetchImpl, tracks, index + 1);
|
|
285
|
+
};
|
|
286
|
+
const downloadCaptionTrack = async (fetchImpl, track) => {
|
|
287
|
+
const trackRecord = track;
|
|
288
|
+
const baseUrl = typeof trackRecord.baseUrl === 'string'
|
|
289
|
+
? trackRecord.baseUrl
|
|
290
|
+
: typeof trackRecord.url === 'string'
|
|
291
|
+
? trackRecord.url
|
|
292
|
+
: null;
|
|
293
|
+
if (!baseUrl) {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
const json3Url = (() => {
|
|
297
|
+
try {
|
|
298
|
+
const parsed = new URL(baseUrl);
|
|
299
|
+
parsed.searchParams.set('fmt', 'json3');
|
|
300
|
+
parsed.searchParams.set('alt', 'json');
|
|
301
|
+
return parsed.toString();
|
|
302
|
+
}
|
|
303
|
+
catch {
|
|
304
|
+
const separator = baseUrl.includes('?') ? '&' : '?';
|
|
305
|
+
return `${baseUrl}${separator}fmt=json3&alt=json`;
|
|
306
|
+
}
|
|
307
|
+
})();
|
|
308
|
+
try {
|
|
309
|
+
const response = await fetchWithTimeout(fetchImpl, json3Url, {
|
|
310
|
+
headers: REQUEST_HEADERS,
|
|
311
|
+
});
|
|
312
|
+
if (!response.ok) {
|
|
313
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
314
|
+
}
|
|
315
|
+
const text = await response.text();
|
|
316
|
+
if (text.length === 0) {
|
|
317
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
318
|
+
}
|
|
319
|
+
const jsonResult = parseJsonTranscript(text);
|
|
320
|
+
if (jsonResult) {
|
|
321
|
+
return jsonResult;
|
|
322
|
+
}
|
|
323
|
+
const xmlFallback = parseXmlTranscript(text);
|
|
324
|
+
if (xmlFallback) {
|
|
325
|
+
return xmlFallback;
|
|
326
|
+
}
|
|
327
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
328
|
+
}
|
|
329
|
+
catch {
|
|
330
|
+
return await downloadXmlTranscript(fetchImpl, baseUrl);
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
const downloadXmlTranscript = async (fetchImpl, baseUrl) => {
|
|
334
|
+
const xmlUrl = baseUrl.replaceAll(/&fmt=[^&]+/g, '');
|
|
335
|
+
try {
|
|
336
|
+
const response = await fetchWithTimeout(fetchImpl, xmlUrl, { headers: REQUEST_HEADERS });
|
|
337
|
+
if (!response.ok) {
|
|
338
|
+
return null;
|
|
339
|
+
}
|
|
340
|
+
const text = await response.text();
|
|
341
|
+
const jsonResult = parseJsonTranscript(text);
|
|
342
|
+
if (jsonResult) {
|
|
343
|
+
return jsonResult;
|
|
344
|
+
}
|
|
345
|
+
return parseXmlTranscript(text);
|
|
346
|
+
}
|
|
347
|
+
catch {
|
|
348
|
+
return null;
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
const parseJsonTranscript = (raw) => {
|
|
352
|
+
try {
|
|
353
|
+
const parsed = JSON.parse(raw);
|
|
354
|
+
if (!isObjectLike(parsed)) {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
const payloadRecord = parsed;
|
|
358
|
+
const eventsUnknown = payloadRecord.events;
|
|
359
|
+
if (!Array.isArray(eventsUnknown)) {
|
|
360
|
+
return null;
|
|
361
|
+
}
|
|
362
|
+
const events = eventsUnknown;
|
|
363
|
+
const lines = [];
|
|
364
|
+
for (const event of events) {
|
|
365
|
+
if (!isObjectLike(event)) {
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
const eventRecord = event;
|
|
369
|
+
const segs = Array.isArray(eventRecord.segs) ? eventRecord.segs : null;
|
|
370
|
+
if (!segs) {
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
const text = segs
|
|
374
|
+
.map((seg) => {
|
|
375
|
+
if (!isObjectLike(seg)) {
|
|
376
|
+
return '';
|
|
377
|
+
}
|
|
378
|
+
const segRecord = seg;
|
|
379
|
+
return typeof segRecord.utf8 === 'string' ? segRecord.utf8 : '';
|
|
380
|
+
})
|
|
381
|
+
.join('')
|
|
382
|
+
.trim();
|
|
383
|
+
if (text.length > 0) {
|
|
384
|
+
lines.push(text);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const transcript = lines.join('\n').trim();
|
|
388
|
+
return transcript.length > 0 ? transcript : null;
|
|
389
|
+
}
|
|
390
|
+
catch {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
const parseXmlTranscript = (xml) => {
|
|
395
|
+
const pattern = /<text[^>]*>([\s\S]*?)<\/text>/gi;
|
|
396
|
+
const lines = [];
|
|
397
|
+
let match = pattern.exec(xml);
|
|
398
|
+
while (match) {
|
|
399
|
+
const content = match[1] ?? '';
|
|
400
|
+
const decoded = decodeHtmlEntities(content).replaceAll(/\s+/g, ' ').trim();
|
|
401
|
+
if (decoded.length > 0) {
|
|
402
|
+
lines.push(decoded);
|
|
403
|
+
}
|
|
404
|
+
match = pattern.exec(xml);
|
|
405
|
+
}
|
|
406
|
+
const transcript = lines.join('\n').trim();
|
|
407
|
+
return transcript.length > 0 ? transcript : null;
|
|
408
|
+
};
|
|
409
|
+
//# sourceMappingURL=captions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"captions.js","sourceRoot":"","sources":["../../../../../../../src/content/link-preview/transcript/providers/youtube/captions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAA;AACjE,OAAO,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,gBAAgB,CAAA;AAChF,OAAO,EAAE,wBAAwB,EAAE,MAAM,UAAU,CAAA;AAQnD,MAAM,eAAe,GAA2B;IAC9C,YAAY,EACV,iHAAiH;IACnH,iBAAiB,EAAE,gBAAgB;CACpC,CAAA;AAED,MAAM,gCAAgC,GAAG,yBAAyB,CAAA;AAClE,MAAM,uBAAuB,GAAG,oEAAoE,CAAA;AAEpG,SAAS,yBAAyB,CAAC,MAAc,EAAE,OAAe;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;IAC1C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAqB,IAAI,CAAA;IAClC,IAAI,QAAQ,GAAG,KAAK,CAAA;IAEpB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;QACpB,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,SAAQ;QACV,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,GAAG,KAAK,CAAA;gBAChB,SAAQ;YACV,CAAC;YACD,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAA;gBACf,SAAQ;YACV,CAAC;YACD,IAAI,KAAK,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;gBAC1B,QAAQ,GAAG,KAAK,CAAA;gBAChB,KAAK,GAAG,IAAI,CAAA;YACd,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YAC7B,QAAQ,GAAG,IAAI,CAAA;YACf,KAAK,GAAG,EAAE,CAAA;YACV,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,SAAQ;QACV,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,4BAA4B,CAAC,IAAY;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAA;IACjE,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC,CAAA;IACrD,IAAI,eAAe,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,UAAU,GAAG,yBAAyB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAA;IACnE,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAA;QAC9C,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,YAAY,GAAG,CAAC,KAAc,EAAoC,EAAE,CACxE,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAA;AAE7C,SAAS,sBAAsB,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAA;IACjD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;IAC5C,OAAO,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC7E,CAAC;AAoBD,KAAK,UAAU,+BAA+B,CAC5C,SAAuB,EACvB,EAAE,IAAI,EAAE,OAAO,EAAqC;IAEpD,MAAM,MAAM,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QAEnH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,YAAY,EAAE,SAAS;gBACvB,iBAAiB,EAAE,eAAe,CAAC,iBAAiB,CAAC,IAAI,gBAAgB;gBACzE,MAAM,EAAE,kBAAkB;aAC3B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,OAAO,EAAE;oBACP,MAAM,EAAE;wBACN,UAAU,EAAE,SAAS;wBACrB,aAAa,EAAE,UAAU;qBAC1B;iBACF;gBACD,OAAO;aACR,CAAC;SACH,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,MAAM,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QAED,OAAO,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACpE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,gCAAgC,GAAG,KAAK,EACnD,SAAuB,EACvB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAA4B,EAChC,EAAE;IAC1B,MAAM,qBAAqB,GAAG,4BAA4B,CAAC,IAAI,CAAC,CAAA;IAChE,IAAI,qBAAqB,EAAE,CAAC;QAC1B,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,qBAAqB,CAAC,CAAA;QAC7F,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,wBAAwB,CAAC,IAAI,CAAC,CAAA;IAChD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,GAC7F,SAAS,CAAA;IACX,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,aAAa,GAAG,OAA+B,CAAA;IACrD,MAAM,aAAa,GAAG,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC;QACtD,CAAC,CAAE,aAAa,CAAC,MAAkC;QACnD,CAAC,CAAC,EAAE,CAAA;IACN,MAAM,WAAW,GAA4B;QAC3C,OAAO,EAAE;YACP,GAAG,aAAa;YAChB,MAAM,EAAE;gBACN,GAAG,aAAa;gBAChB,WAAW;aACZ;SACF;QACD,OAAO;QACP,eAAe,EAAE;YACf,sBAAsB,EAAE;gBACtB,eAAe,EAAE,kBAAkB;aACpC;SACF;QACD,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,IAAI;KAClB,CAAA;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QACnH,MAAM,OAAO,GAA2B;YACtC,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,SAAS;YACvB,MAAM,EAAE,kBAAkB;YAC1B,MAAM,EAAE,yBAAyB;YACjC,OAAO,EAAE,WAAW;YACpB,iBAAiB,EAAE,GAAG;YACtB,+BAA+B,EAAE,OAAO;SACzC,CAAA;QAED,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,CAAC,uBAAuB,CAAC,GAAG,UAAU,CAAA;QAC/C,CAAC;QACD,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,CAAC,0BAA0B,CAAC,GAAG,aAAa,CAAA;QACrD,CAAC;QACD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,CAAC,mBAAmB,CAAC,GAAG,WAAW,CAAA;QAC5C,CAAC;QACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1D,OAAO,CAAC,mBAAmB,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAA;QAC/C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,sBAAsB,CAAC,GAAG,SAAS,CAAA;QAC7C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,0BAA0B,CAAC,GAAG,SAAS,CAAA;QACjD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;SAClC,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACjC,MAAM,SAAS,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAA;QAClD,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;QAC9E,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QAED,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kCAAkC,GAAG,KAAK,EAC9C,SAAuB,EACvB,OAAgC,EACR,EAAE;IAC1B,MAAM,aAAa,GAAG,OAA0B,CAAA;IAEhD,MAAM,iBAAiB,GAAG,aAAa,CAAC,QAAQ,CAAA;IAChD,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAE,iBAAqC,CAAC,CAAC,CAAC,IAAI,CAAA;IAEhG,MAAM,iBAAiB,GACrB,CAAC,QAAQ,CAAC,CAAC,CAAE,QAA4B,CAAC,+BAA+B,CAAC,CAAC,CAAC,IAAI,CAAC;QACjF,aAAa,CAAC,+BAA+B,CAAA;IAE/C,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC;QAC9C,CAAC,CAAE,iBAAyC;QAC5C,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,aAAa,CAAC;QAC1D,CAAC,CAAE,QAAQ,EAAE,aAA2B;QACxC,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,iBAAiB,CAAC;QAChE,CAAC,CAAE,QAAQ,EAAE,iBAA+B;QAC5C,CAAC,CAAC,IAAI,CAAA;IAER,MAAM,aAAa,GAA8B,EAAE,CAAA;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,aAAa,CAAC,IAAI,CAChB,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC1F,CAAA;IACH,CAAC;IACD,IAAI,eAAe,EAAE,CAAC;QACpB,aAAa,CAAC,IAAI,CAChB,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC5F,CAAA;IACH,CAAC;IACD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;IACvC,MAAM,gBAAgB,GAA8B,EAAE,CAAA;IACtD,KAAK,MAAM,SAAS,IAAI,aAAa,EAAE,CAAC;QACtC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,SAAQ;QACV,CAAC;QACD,MAAM,WAAW,GAAG,SAA+B,CAAA;QACnD,MAAM,iBAAiB,GAAG,WAAW,CAAC,YAAY,CAAA;QAClD,MAAM,IAAI,GAAG,OAAO,iBAAiB,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACzF,IAAI,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACpC,SAAQ;QACV,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACT,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACzB,CAAC;QACD,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAClC,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,gBAAgB,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3D,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,OAAO,CAAC,CAAA;IACV,CAAC,CAAC,CAAA;IAEF,OAAO,MAAM,mBAAmB,CAAC,SAAS,EAAE,YAAY,EAAE,CAAC,CAAC,CAAA;AAC9D,CAAC,CAAA;AAED,MAAM,mBAAmB,GAAG,KAAK,EAC/B,SAAuB,EACvB,MAA0C,EAC1C,KAAa,EACW,EAAE;IAC1B,IAAI,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,SAAS,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;IAC5E,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;AAC1D,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,KAAK,EAChC,SAAuB,EACvB,KAA8B,EACN,EAAE;IAC1B,MAAM,WAAW,GAAG,KAA2B,CAAA;IAC/C,MAAM,OAAO,GACX,OAAO,WAAW,CAAC,OAAO,KAAK,QAAQ;QACrC,CAAC,CAAC,WAAW,CAAC,OAAO;QACrB,CAAC,CAAC,OAAO,WAAW,CAAC,GAAG,KAAK,QAAQ;YACnC,CAAC,CAAC,WAAW,CAAC,GAAG;YACjB,CAAC,CAAC,IAAI,CAAA;IACZ,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAA;YAC/B,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;YACvC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;YACtC,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;QAC1B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACnD,OAAO,GAAG,OAAO,GAAG,SAAS,oBAAoB,CAAA;QACnD,CAAC;IACH,CAAC,CAAC,EAAE,CAAA;IAEJ,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE;YAC3D,OAAO,EAAE,eAAe;SACzB,CAAC,CAAA;QACF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QACD,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,WAAW,CAAA;QACpB,CAAC;QACD,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;AACH,CAAC,CAAA;AAED,MAAM,qBAAqB,GAAG,KAAK,EACjC,SAAuB,EACvB,OAAe,EACS,EAAE;IAC1B,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC,CAAA;IACpD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAA;QACxF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAID,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACzD,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,aAAa,GAAG,MAAwB,CAAA;QAC9C,MAAM,aAAa,GAAG,aAAa,CAAC,MAAM,CAAA;QAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,MAAM,GAAG,aAAa,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,SAAQ;YACV,CAAC;YACD,MAAM,WAAW,GAAG,KAA2B,CAAA;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,WAAW,CAAC,IAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;YACrF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,SAAQ;YACV,CAAC;YACD,MAAM,IAAI,GAAG,IAAI;iBACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACX,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvB,OAAO,EAAE,CAAA;gBACX,CAAC;gBACD,MAAM,SAAS,GAAG,GAA2B,CAAA;gBAC7C,OAAO,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;YACjE,CAAC,CAAC;iBACD,IAAI,CAAC,EAAE,CAAC;iBACR,IAAI,EAAE,CAAA;YACT,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;QACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,MAAM,OAAO,GAAG,iCAAiC,CAAA;IACjD,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,KAAK,GAA2B,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACrD,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAC9B,MAAM,OAAO,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACrB,CAAC;QACD,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC3B,CAAC;IACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;IAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;AAClD,CAAC,CAAA"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
import { fetchWithTimeout } from '../../../fetch-with-timeout.js';
|
|
4
|
+
import { sanitizeYoutubeJsonResponse } from '../../utils.js';
|
|
5
|
+
const execFileAsync = promisify(execFile);
|
|
6
|
+
const isRecord = (value) => typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
7
|
+
const parseJson3Transcript = (raw) => {
|
|
8
|
+
try {
|
|
9
|
+
const parsed = JSON.parse(raw);
|
|
10
|
+
if (!isRecord(parsed)) {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
const eventsUnknown = parsed.events;
|
|
14
|
+
if (!Array.isArray(eventsUnknown)) {
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
const lines = [];
|
|
18
|
+
for (const event of eventsUnknown) {
|
|
19
|
+
if (!isRecord(event)) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
const eventRecord = event;
|
|
23
|
+
const segs = Array.isArray(eventRecord.segs) ? eventRecord.segs : null;
|
|
24
|
+
if (!segs) {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const text = segs
|
|
28
|
+
.map((seg) => {
|
|
29
|
+
if (!isRecord(seg)) {
|
|
30
|
+
return '';
|
|
31
|
+
}
|
|
32
|
+
const segRecord = seg;
|
|
33
|
+
return typeof segRecord.utf8 === 'string' ? segRecord.utf8 : '';
|
|
34
|
+
})
|
|
35
|
+
.join('')
|
|
36
|
+
.trim();
|
|
37
|
+
if (text.length > 0) {
|
|
38
|
+
lines.push(text);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const transcript = lines.join('\n').trim();
|
|
42
|
+
return transcript.length > 0 ? transcript : null;
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
function pickCaptionUrl(info) {
|
|
49
|
+
const sources = [info.subtitles, info.automatic_captions];
|
|
50
|
+
const candidates = [];
|
|
51
|
+
for (const source of sources) {
|
|
52
|
+
if (!isRecord(source))
|
|
53
|
+
continue;
|
|
54
|
+
for (const [lang, entries] of Object.entries(source)) {
|
|
55
|
+
candidates.push([lang, entries]);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const languagePreference = (lang) => {
|
|
59
|
+
const lower = lang.toLowerCase();
|
|
60
|
+
if (lower === 'en')
|
|
61
|
+
return 0;
|
|
62
|
+
if (lower.startsWith('en-'))
|
|
63
|
+
return 1;
|
|
64
|
+
if (lower.startsWith('en'))
|
|
65
|
+
return 2;
|
|
66
|
+
return 10;
|
|
67
|
+
};
|
|
68
|
+
const sorted = candidates.toSorted(([a], [b]) => languagePreference(a) - languagePreference(b));
|
|
69
|
+
for (const [, entries] of sorted) {
|
|
70
|
+
if (!Array.isArray(entries))
|
|
71
|
+
continue;
|
|
72
|
+
const normalized = entries.filter((entry) => isRecord(entry));
|
|
73
|
+
const json3 = normalized.find((entry) => entry.ext === 'json3' && typeof entry.url === 'string');
|
|
74
|
+
if (json3?.url && typeof json3.url === 'string') {
|
|
75
|
+
return json3.url;
|
|
76
|
+
}
|
|
77
|
+
const vtt = normalized.find((entry) => entry.ext === 'vtt' && typeof entry.url === 'string');
|
|
78
|
+
if (vtt?.url && typeof vtt.url === 'string') {
|
|
79
|
+
return vtt.url;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
export async function fetchTranscriptWithYtDlp(fetchImpl, url, { timeoutMs } = {}) {
|
|
85
|
+
try {
|
|
86
|
+
const { stdout } = await execFileAsync('yt-dlp', ['--dump-single-json', '--no-playlist', '--no-warnings', url], { timeout: typeof timeoutMs === 'number' && Number.isFinite(timeoutMs) ? timeoutMs : 60_000 });
|
|
87
|
+
const parsed = JSON.parse(stdout);
|
|
88
|
+
if (!isRecord(parsed)) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
const info = parsed;
|
|
92
|
+
const captionUrl = pickCaptionUrl(info);
|
|
93
|
+
if (!captionUrl) {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
const response = await fetchWithTimeout(fetchImpl, captionUrl, undefined, 60_000);
|
|
97
|
+
if (!response.ok) {
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
const raw = await response.text();
|
|
101
|
+
const sanitized = sanitizeYoutubeJsonResponse(raw);
|
|
102
|
+
return parseJson3Transcript(sanitized);
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
const code = error && typeof error === 'object' && 'code' in error
|
|
106
|
+
? error.code
|
|
107
|
+
: null;
|
|
108
|
+
if (code === 'ENOENT') {
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=ytdlp.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ytdlp.js","sourceRoot":"","sources":["../../../../../../../src/content/link-preview/transcript/providers/youtube/ytdlp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AAErC,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAA;AACjE,OAAO,EAAE,2BAA2B,EAAE,MAAM,gBAAgB,CAAA;AAE5D,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAA;AAazC,MAAM,QAAQ,GAAG,CAAC,KAAc,EAAoC,EAAE,CACpE,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;AAEtE,MAAM,oBAAoB,GAAG,CAAC,GAAW,EAAiB,EAAE;IAI1D,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,aAAa,GAAI,MAAkC,CAAC,MAAM,CAAA;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACrB,SAAQ;YACV,CAAC;YACD,MAAM,WAAW,GAAG,KAA2B,CAAA;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,WAAW,CAAC,IAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;YACrF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,SAAQ;YACV,CAAC;YACD,MAAM,IAAI,GAAG,IAAI;iBACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACX,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,OAAO,EAAE,CAAA;gBACX,CAAC;gBACD,MAAM,SAAS,GAAG,GAA2B,CAAA;gBAC7C,OAAO,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;YACjE,CAAC,CAAC;iBACD,IAAI,CAAC,EAAE,CAAC;iBACR,IAAI,EAAE,CAAA;YACT,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAED,SAAS,cAAc,CAAC,IAAe;IACrC,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAA;IACzD,MAAM,UAAU,GAA6B,EAAE,CAAA;IAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,SAAQ;QAC/B,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACrD,UAAU,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAA;QAClC,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,CAAC,IAAY,EAAU,EAAE;QAClD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;QAChC,IAAI,KAAK,KAAK,IAAI;YAAE,OAAO,CAAC,CAAA;QAC5B,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,OAAO,CAAC,CAAA;QACrC,IAAI,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,CAAC,CAAA;QACpC,OAAO,EAAE,CAAA;IACX,CAAC,CAAA;IAED,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAA;IAE/F,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;YAAE,SAAQ;QACrC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAA8B,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;QACzF,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,KAAK,OAAO,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAA;QAChG,IAAI,KAAK,EAAE,GAAG,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAChD,OAAO,KAAK,CAAC,GAAG,CAAA;QAClB,CAAC;QACD,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAA;QAC5F,IAAI,GAAG,EAAE,GAAG,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,GAAG,CAAC,GAAG,CAAA;QAChB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,SAAuB,EACvB,GAAW,EACX,EAAE,SAAS,KAA6B,EAAE;IAE1C,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,QAAQ,EACR,CAAC,oBAAoB,EAAE,eAAe,EAAE,eAAe,EAAE,GAAG,CAAC,EAC7D,EAAE,OAAO,EAAE,OAAO,SAAS,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,EAAE,CAC9F,CAAA;QAED,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QAC1C,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,IAAI,GAAG,MAAmB,CAAA;QAChC,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;QACvC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;QACjF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACjC,MAAM,SAAS,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAA;QAClD,OAAO,oBAAoB,CAAC,SAAS,CAAC,CAAA;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,GACR,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,IAAI,KAAK;YACnD,CAAC,CAAE,KAA4B,CAAC,IAAI;YACpC,CAAC,CAAC,IAAI,CAAA;QACV,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { normalizeTranscriptText } from '../normalize.js';
|
|
2
|
+
import { extractYouTubeVideoId } from '../utils.js';
|
|
3
|
+
import { extractYoutubeiTranscriptConfig, fetchTranscriptFromTranscriptEndpoint, } from './youtube/api.js';
|
|
4
|
+
import { fetchTranscriptWithApify } from './youtube/apify.js';
|
|
5
|
+
import { fetchTranscriptFromCaptionTracks } from './youtube/captions.js';
|
|
6
|
+
const YOUTUBE_URL_PATTERN = /youtube\.com|youtu\.be/i;
|
|
7
|
+
export const canHandle = ({ url }) => YOUTUBE_URL_PATTERN.test(url);
|
|
8
|
+
export const fetchTranscript = async (context, options) => {
|
|
9
|
+
const attemptedProviders = [];
|
|
10
|
+
const { html, url } = context;
|
|
11
|
+
const mode = options.youtubeTranscriptMode;
|
|
12
|
+
if (!html) {
|
|
13
|
+
return { text: null, source: null, attemptedProviders };
|
|
14
|
+
}
|
|
15
|
+
const effectiveVideoIdCandidate = context.resourceKey ?? extractYouTubeVideoId(url);
|
|
16
|
+
const effectiveVideoId = typeof effectiveVideoIdCandidate === 'string' && effectiveVideoIdCandidate.trim().length > 0
|
|
17
|
+
? effectiveVideoIdCandidate.trim()
|
|
18
|
+
: null;
|
|
19
|
+
if (!effectiveVideoId) {
|
|
20
|
+
return { text: null, source: null, attemptedProviders };
|
|
21
|
+
}
|
|
22
|
+
if (mode !== 'apify') {
|
|
23
|
+
const config = extractYoutubeiTranscriptConfig(html);
|
|
24
|
+
if (config) {
|
|
25
|
+
attemptedProviders.push('youtubei');
|
|
26
|
+
const transcript = await fetchTranscriptFromTranscriptEndpoint(options.fetch, {
|
|
27
|
+
config,
|
|
28
|
+
originalUrl: url,
|
|
29
|
+
});
|
|
30
|
+
if (transcript) {
|
|
31
|
+
return {
|
|
32
|
+
text: normalizeTranscriptText(transcript),
|
|
33
|
+
source: 'youtubei',
|
|
34
|
+
metadata: { provider: 'youtubei' },
|
|
35
|
+
attemptedProviders,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
attemptedProviders.push('captionTracks');
|
|
40
|
+
const captionTranscript = await fetchTranscriptFromCaptionTracks(options.fetch, {
|
|
41
|
+
html,
|
|
42
|
+
originalUrl: url,
|
|
43
|
+
videoId: effectiveVideoId,
|
|
44
|
+
});
|
|
45
|
+
if (captionTranscript) {
|
|
46
|
+
return {
|
|
47
|
+
text: normalizeTranscriptText(captionTranscript),
|
|
48
|
+
source: 'captionTracks',
|
|
49
|
+
metadata: { provider: 'captionTracks' },
|
|
50
|
+
attemptedProviders,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (mode !== 'web') {
|
|
55
|
+
attemptedProviders.push('apify');
|
|
56
|
+
const apifyTranscript = await fetchTranscriptWithApify(options.fetch, options.apifyApiToken, url);
|
|
57
|
+
if (apifyTranscript) {
|
|
58
|
+
return {
|
|
59
|
+
text: normalizeTranscriptText(apifyTranscript),
|
|
60
|
+
source: 'apify',
|
|
61
|
+
metadata: { provider: 'apify' },
|
|
62
|
+
attemptedProviders,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
attemptedProviders.push('unavailable');
|
|
67
|
+
return {
|
|
68
|
+
text: null,
|
|
69
|
+
source: 'unavailable',
|
|
70
|
+
metadata: { provider: 'youtube', reason: 'no_transcript_available' },
|
|
71
|
+
attemptedProviders,
|
|
72
|
+
};
|
|
73
|
+
};
|
|
74
|
+
//# sourceMappingURL=youtube.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../../../../src/content/link-preview/transcript/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAA;AAOzD,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAA;AACnD,OAAO,EACL,+BAA+B,EAC/B,qCAAqC,GACtC,MAAM,kBAAkB,CAAA;AACzB,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAA;AAC7D,OAAO,EAAE,gCAAgC,EAAE,MAAM,uBAAuB,CAAA;AAExE,MAAM,mBAAmB,GAAG,yBAAyB,CAAA;AAErD,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAmB,EAAW,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAE7F,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,OAAwB,EACxB,OAA6B,EACJ,EAAE;IAC3B,MAAM,kBAAkB,GAAuB,EAAE,CAAA;IACjD,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,OAAO,CAAA;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,qBAAqB,CAAA;IAE1C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAA;IACzD,CAAC;IAED,MAAM,yBAAyB,GAAG,OAAO,CAAC,WAAW,IAAI,qBAAqB,CAAC,GAAG,CAAC,CAAA;IACnF,MAAM,gBAAgB,GACpB,OAAO,yBAAyB,KAAK,QAAQ,IAAI,yBAAyB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAC1F,CAAC,CAAC,yBAAyB,CAAC,IAAI,EAAE;QAClC,CAAC,CAAC,IAAI,CAAA;IACV,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAA;IACzD,CAAC;IAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,+BAA+B,CAAC,IAAI,CAAC,CAAA;QACpD,IAAI,MAAM,EAAE,CAAC;YACX,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;YACnC,MAAM,UAAU,GAAG,MAAM,qCAAqC,CAAC,OAAO,CAAC,KAAK,EAAE;gBAC5E,MAAM;gBACN,WAAW,EAAE,GAAG;aACjB,CAAC,CAAA;YACF,IAAI,UAAU,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,uBAAuB,CAAC,UAAU,CAAC;oBACzC,MAAM,EAAE,UAAU;oBAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE;oBAClC,kBAAkB;iBACnB,CAAA;YACH,CAAC;QACH,CAAC;QAED,kBAAkB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QACxC,MAAM,iBAAiB,GAAG,MAAM,gCAAgC,CAAC,OAAO,CAAC,KAAK,EAAE;YAC9E,IAAI;YACJ,WAAW,EAAE,GAAG;YAChB,OAAO,EAAE,gBAAgB;SAC1B,CAAC,CAAA;QACF,IAAI,iBAAiB,EAAE,CAAC;YACtB,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,iBAAiB,CAAC;gBAChD,MAAM,EAAE,eAAe;gBACvB,QAAQ,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;gBACvC,kBAAkB;aACnB,CAAA;QACH,CAAC;IACH,CAAC;IAED,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAChC,MAAM,eAAe,GAAG,MAAM,wBAAwB,CACpD,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,aAAa,EACrB,GAAG,CACJ,CAAA;QACD,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,eAAe,CAAC;gBAC9C,MAAM,EAAE,OAAO;gBACf,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE;gBAC/B,kBAAkB;aACnB,CAAA;QACH,CAAC;IACH,CAAC;IAED,kBAAkB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;IACtC,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,aAAa;QACrB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,yBAAyB,EAAE;QACpE,kBAAkB;KACnB,CAAA;AACH,CAAC,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/transcript/types.ts"],"names":[],"mappings":""}
|