@steipete/summarize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/LICENSE +21 -0
  3. package/README.md +185 -0
  4. package/dist/cli.cjs +74333 -0
  5. package/dist/cli.cjs.map +7 -0
  6. package/dist/esm/cli-main.js +80 -0
  7. package/dist/esm/cli-main.js.map +1 -0
  8. package/dist/esm/cli.js +18 -0
  9. package/dist/esm/cli.js.map +1 -0
  10. package/dist/esm/config.js +33 -0
  11. package/dist/esm/config.js.map +1 -0
  12. package/dist/esm/content/asset.js +167 -0
  13. package/dist/esm/content/asset.js.map +1 -0
  14. package/dist/esm/content/index.js +4 -0
  15. package/dist/esm/content/index.js.map +1 -0
  16. package/dist/esm/content/link-preview/client.js +20 -0
  17. package/dist/esm/content/link-preview/client.js.map +1 -0
  18. package/dist/esm/content/link-preview/content/article.js +150 -0
  19. package/dist/esm/content/link-preview/content/article.js.map +1 -0
  20. package/dist/esm/content/link-preview/content/cleaner.js +55 -0
  21. package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
  22. package/dist/esm/content/link-preview/content/fetcher.js +120 -0
  23. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
  24. package/dist/esm/content/link-preview/content/index.js +275 -0
  25. package/dist/esm/content/link-preview/content/index.js.map +1 -0
  26. package/dist/esm/content/link-preview/content/parsers.js +77 -0
  27. package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
  28. package/dist/esm/content/link-preview/content/types.js +4 -0
  29. package/dist/esm/content/link-preview/content/types.js.map +1 -0
  30. package/dist/esm/content/link-preview/content/utils.js +127 -0
  31. package/dist/esm/content/link-preview/content/utils.js.map +1 -0
  32. package/dist/esm/content/link-preview/content/youtube.js +82 -0
  33. package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
  34. package/dist/esm/content/link-preview/deps.js +2 -0
  35. package/dist/esm/content/link-preview/deps.js.map +1 -0
  36. package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
  37. package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
  38. package/dist/esm/content/link-preview/transcript/cache.js +73 -0
  39. package/dist/esm/content/link-preview/transcript/cache.js.map +1 -0
  40. package/dist/esm/content/link-preview/transcript/index.js +95 -0
  41. package/dist/esm/content/link-preview/transcript/index.js.map +1 -0
  42. package/dist/esm/content/link-preview/transcript/normalize.js +43 -0
  43. package/dist/esm/content/link-preview/transcript/normalize.js.map +1 -0
  44. package/dist/esm/content/link-preview/transcript/providers/generic.js +11 -0
  45. package/dist/esm/content/link-preview/transcript/providers/generic.js.map +1 -0
  46. package/dist/esm/content/link-preview/transcript/providers/podcast.js +12 -0
  47. package/dist/esm/content/link-preview/transcript/providers/podcast.js.map +1 -0
  48. package/dist/esm/content/link-preview/transcript/providers/twitter.js +12 -0
  49. package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +1 -0
  50. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js +257 -0
  51. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js.map +1 -0
  52. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js +55 -0
  53. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js.map +1 -0
  54. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +409 -0
  55. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -0
  56. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +114 -0
  57. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +1 -0
  58. package/dist/esm/content/link-preview/transcript/providers/youtube.js +74 -0
  59. package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -0
  60. package/dist/esm/content/link-preview/transcript/types.js +2 -0
  61. package/dist/esm/content/link-preview/transcript/types.js.map +1 -0
  62. package/dist/esm/content/link-preview/transcript/utils.js +193 -0
  63. package/dist/esm/content/link-preview/transcript/utils.js.map +1 -0
  64. package/dist/esm/content/link-preview/types.js +2 -0
  65. package/dist/esm/content/link-preview/types.js.map +1 -0
  66. package/dist/esm/costs.js +57 -0
  67. package/dist/esm/costs.js.map +1 -0
  68. package/dist/esm/firecrawl.js +54 -0
  69. package/dist/esm/firecrawl.js.map +1 -0
  70. package/dist/esm/flags.js +97 -0
  71. package/dist/esm/flags.js.map +1 -0
  72. package/dist/esm/index.js +4 -0
  73. package/dist/esm/index.js.map +1 -0
  74. package/dist/esm/llm/generate-text.js +296 -0
  75. package/dist/esm/llm/generate-text.js.map +1 -0
  76. package/dist/esm/llm/google-models.js +112 -0
  77. package/dist/esm/llm/google-models.js.map +1 -0
  78. package/dist/esm/llm/html-to-markdown.js +44 -0
  79. package/dist/esm/llm/html-to-markdown.js.map +1 -0
  80. package/dist/esm/llm/model-id.js +45 -0
  81. package/dist/esm/llm/model-id.js.map +1 -0
  82. package/dist/esm/pricing/litellm.js +25 -0
  83. package/dist/esm/pricing/litellm.js.map +1 -0
  84. package/dist/esm/prompts/file.js +14 -0
  85. package/dist/esm/prompts/file.js.map +1 -0
  86. package/dist/esm/prompts/index.js +3 -0
  87. package/dist/esm/prompts/index.js.map +1 -0
  88. package/dist/esm/prompts/link-summary.js +105 -0
  89. package/dist/esm/prompts/link-summary.js.map +1 -0
  90. package/dist/esm/run.js +1674 -0
  91. package/dist/esm/run.js.map +1 -0
  92. package/dist/esm/shared/contracts.js +2 -0
  93. package/dist/esm/shared/contracts.js.map +1 -0
  94. package/dist/esm/summarizeHome.js +20 -0
  95. package/dist/esm/summarizeHome.js.map +1 -0
  96. package/dist/esm/tty/live-markdown.js +52 -0
  97. package/dist/esm/tty/live-markdown.js.map +1 -0
  98. package/dist/esm/tty/osc-progress.js +8 -0
  99. package/dist/esm/tty/osc-progress.js.map +1 -0
  100. package/dist/esm/tty/spinner.js +33 -0
  101. package/dist/esm/tty/spinner.js.map +1 -0
  102. package/dist/esm/version.js +44 -0
  103. package/dist/esm/version.js.map +1 -0
  104. package/dist/types/cli-main.d.ts +11 -0
  105. package/dist/types/cli.d.ts +1 -0
  106. package/dist/types/config.d.ts +15 -0
  107. package/dist/types/content/asset.d.ts +44 -0
  108. package/dist/types/content/index.d.ts +4 -0
  109. package/dist/types/content/link-preview/client.d.ts +14 -0
  110. package/dist/types/content/link-preview/content/article.d.ts +4 -0
  111. package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
  112. package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
  113. package/dist/types/content/link-preview/content/index.d.ts +4 -0
  114. package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
  115. package/dist/types/content/link-preview/content/types.d.ts +44 -0
  116. package/dist/types/content/link-preview/content/utils.d.ts +16 -0
  117. package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
  118. package/dist/types/content/link-preview/deps.d.ts +70 -0
  119. package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
  120. package/dist/types/content/link-preview/transcript/cache.d.ts +29 -0
  121. package/dist/types/content/link-preview/transcript/index.d.ts +9 -0
  122. package/dist/types/content/link-preview/transcript/normalize.d.ts +3 -0
  123. package/dist/types/content/link-preview/transcript/providers/generic.d.ts +3 -0
  124. package/dist/types/content/link-preview/transcript/providers/podcast.d.ts +3 -0
  125. package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +3 -0
  126. package/dist/types/content/link-preview/transcript/providers/youtube/api.d.ts +26 -0
  127. package/dist/types/content/link-preview/transcript/providers/youtube/apify.d.ts +1 -0
  128. package/dist/types/content/link-preview/transcript/providers/youtube/captions.d.ts +7 -0
  129. package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +3 -0
  130. package/dist/types/content/link-preview/transcript/providers/youtube.d.ts +3 -0
  131. package/dist/types/content/link-preview/transcript/types.d.ts +23 -0
  132. package/dist/types/content/link-preview/transcript/utils.d.ts +7 -0
  133. package/dist/types/content/link-preview/types.d.ts +36 -0
  134. package/dist/types/costs.d.ts +31 -0
  135. package/dist/types/firecrawl.d.ts +5 -0
  136. package/dist/types/flags.d.ts +23 -0
  137. package/dist/types/index.d.ts +4 -0
  138. package/dist/types/llm/generate-text.d.ts +43 -0
  139. package/dist/types/llm/google-models.d.ts +10 -0
  140. package/dist/types/llm/html-to-markdown.d.ts +15 -0
  141. package/dist/types/llm/model-id.d.ts +14 -0
  142. package/dist/types/pricing/litellm.d.ts +13 -0
  143. package/dist/types/prompts/file.d.ts +6 -0
  144. package/dist/types/prompts/index.d.ts +3 -0
  145. package/dist/types/prompts/link-summary.d.ts +27 -0
  146. package/dist/types/run.d.ts +8 -0
  147. package/dist/types/shared/contracts.d.ts +2 -0
  148. package/dist/types/summarizeHome.d.ts +6 -0
  149. package/dist/types/tty/live-markdown.d.ts +10 -0
  150. package/dist/types/tty/osc-progress.d.ts +3 -0
  151. package/dist/types/tty/spinner.d.ts +10 -0
  152. package/dist/types/version.d.ts +2 -0
  153. package/docs/README.md +11 -0
  154. package/docs/config.md +28 -0
  155. package/docs/extract-only.md +13 -0
  156. package/docs/firecrawl.md +17 -0
  157. package/docs/llm.md +33 -0
  158. package/docs/openai.md +18 -0
  159. package/docs/site/.nojekyll +1 -0
  160. package/docs/site/404.html +37 -0
  161. package/docs/site/assets/site.css +577 -0
  162. package/docs/site/assets/site.js +69 -0
  163. package/docs/site/docs/config.html +73 -0
  164. package/docs/site/docs/extract-only.html +79 -0
  165. package/docs/site/docs/firecrawl.html +72 -0
  166. package/docs/site/docs/index.html +89 -0
  167. package/docs/site/docs/llm.html +70 -0
  168. package/docs/site/docs/openai.html +66 -0
  169. package/docs/site/docs/website.html +70 -0
  170. package/docs/site/docs/youtube.html +62 -0
  171. package/docs/site/index.html +125 -0
  172. package/docs/website.md +27 -0
  173. package/docs/youtube.md +32 -0
  174. package/package.json +76 -0
@@ -0,0 +1,409 @@
1
+ import { fetchWithTimeout } from '../../../fetch-with-timeout.js';
2
+ import { decodeHtmlEntities, sanitizeYoutubeJsonResponse } from '../../utils.js';
3
+ import { extractYoutubeiBootstrap } from './api.js';
4
+ const REQUEST_HEADERS = {
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
6
+ 'Accept-Language': 'en-US,en;q=0.9',
7
+ };
8
+ const YT_INITIAL_PLAYER_RESPONSE_TOKEN = 'ytInitialPlayerResponse';
9
+ const INNERTUBE_API_KEY_REGEX = /"INNERTUBE_API_KEY":"([^"]+)"|INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/;
10
+ function extractBalancedJsonObject(source, startAt) {
11
+ const start = source.indexOf('{', startAt);
12
+ if (start < 0) {
13
+ return null;
14
+ }
15
+ let depth = 0;
16
+ let inString = false;
17
+ let quote = null;
18
+ let escaping = false;
19
+ for (let i = start; i < source.length; i += 1) {
20
+ const ch = source[i];
21
+ if (!ch) {
22
+ continue;
23
+ }
24
+ if (inString) {
25
+ if (escaping) {
26
+ escaping = false;
27
+ continue;
28
+ }
29
+ if (ch === '\\') {
30
+ escaping = true;
31
+ continue;
32
+ }
33
+ if (quote && ch === quote) {
34
+ inString = false;
35
+ quote = null;
36
+ }
37
+ continue;
38
+ }
39
+ if (ch === '"' || ch === "'") {
40
+ inString = true;
41
+ quote = ch;
42
+ continue;
43
+ }
44
+ if (ch === '{') {
45
+ depth += 1;
46
+ continue;
47
+ }
48
+ if (ch === '}') {
49
+ depth -= 1;
50
+ if (depth === 0) {
51
+ return source.slice(start, i + 1);
52
+ }
53
+ }
54
+ }
55
+ return null;
56
+ }
57
+ function extractInitialPlayerResponse(html) {
58
+ const tokenIndex = html.indexOf(YT_INITIAL_PLAYER_RESPONSE_TOKEN);
59
+ if (tokenIndex < 0) {
60
+ return null;
61
+ }
62
+ const assignmentIndex = html.indexOf('=', tokenIndex);
63
+ if (assignmentIndex < 0) {
64
+ return null;
65
+ }
66
+ const objectText = extractBalancedJsonObject(html, assignmentIndex);
67
+ if (!objectText) {
68
+ return null;
69
+ }
70
+ try {
71
+ const parsed = JSON.parse(objectText);
72
+ return isObjectLike(parsed) ? parsed : null;
73
+ }
74
+ catch {
75
+ return null;
76
+ }
77
+ }
78
+ const isObjectLike = (value) => typeof value === 'object' && value !== null;
79
+ function extractInnertubeApiKey(html) {
80
+ const match = html.match(INNERTUBE_API_KEY_REGEX);
81
+ const key = match?.[1] ?? match?.[2] ?? null;
82
+ return typeof key === 'string' && key.trim().length > 0 ? key.trim() : null;
83
+ }
84
+ async function fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId }) {
85
+ const apiKey = extractInnertubeApiKey(html);
86
+ if (!apiKey) {
87
+ return null;
88
+ }
89
+ try {
90
+ const userAgent = REQUEST_HEADERS['User-Agent'] ??
91
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
92
+ const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
93
+ method: 'POST',
94
+ headers: {
95
+ 'Content-Type': 'application/json',
96
+ 'User-Agent': userAgent,
97
+ 'Accept-Language': REQUEST_HEADERS['Accept-Language'] ?? 'en-US,en;q=0.9',
98
+ Accept: 'application/json',
99
+ },
100
+ body: JSON.stringify({
101
+ context: {
102
+ client: {
103
+ clientName: 'ANDROID',
104
+ clientVersion: '20.10.38',
105
+ },
106
+ },
107
+ videoId,
108
+ }),
109
+ });
110
+ if (!response.ok) {
111
+ return null;
112
+ }
113
+ const parsed = await response.json();
114
+ if (!isObjectLike(parsed)) {
115
+ return null;
116
+ }
117
+ return await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
118
+ }
119
+ catch {
120
+ return null;
121
+ }
122
+ }
123
+ export const fetchTranscriptFromCaptionTracks = async (fetchImpl, { html, originalUrl, videoId }) => {
124
+ const initialPlayerResponse = extractInitialPlayerResponse(html);
125
+ if (initialPlayerResponse) {
126
+ const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, initialPlayerResponse);
127
+ if (transcript) {
128
+ return transcript;
129
+ }
130
+ }
131
+ const bootstrap = extractYoutubeiBootstrap(html);
132
+ if (!bootstrap) {
133
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
134
+ }
135
+ const { apiKey, clientName, clientVersion, context, pageCl, pageLabel, visitorData, xsrfToken } = bootstrap;
136
+ if (!apiKey) {
137
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
138
+ }
139
+ const contextRecord = context;
140
+ const clientContext = isObjectLike(contextRecord.client)
141
+ ? contextRecord.client
142
+ : {};
143
+ const requestBody = {
144
+ context: {
145
+ ...contextRecord,
146
+ client: {
147
+ ...clientContext,
148
+ originalUrl,
149
+ },
150
+ },
151
+ videoId,
152
+ playbackContext: {
153
+ contentPlaybackContext: {
154
+ html5Preference: 'HTML5_PREF_WANTS',
155
+ },
156
+ },
157
+ contentCheckOk: true,
158
+ racyCheckOk: true,
159
+ };
160
+ try {
161
+ const userAgent = REQUEST_HEADERS['User-Agent'] ??
162
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36';
163
+ const headers = {
164
+ 'Content-Type': 'application/json',
165
+ 'User-Agent': userAgent,
166
+ Accept: 'application/json',
167
+ Origin: 'https://www.youtube.com',
168
+ Referer: originalUrl,
169
+ 'X-Goog-AuthUser': '0',
170
+ 'X-Youtube-Bootstrap-Logged-In': 'false',
171
+ };
172
+ if (clientName) {
173
+ headers['X-Youtube-Client-Name'] = clientName;
174
+ }
175
+ if (clientVersion) {
176
+ headers['X-Youtube-Client-Version'] = clientVersion;
177
+ }
178
+ if (visitorData) {
179
+ headers['X-Goog-Visitor-Id'] = visitorData;
180
+ }
181
+ if (typeof pageCl === 'number' && Number.isFinite(pageCl)) {
182
+ headers['X-Youtube-Page-CL'] = String(pageCl);
183
+ }
184
+ if (pageLabel) {
185
+ headers['X-Youtube-Page-Label'] = pageLabel;
186
+ }
187
+ if (xsrfToken) {
188
+ headers['X-Youtube-Identity-Token'] = xsrfToken;
189
+ }
190
+ const response = await fetchWithTimeout(fetchImpl, `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
191
+ method: 'POST',
192
+ headers,
193
+ body: JSON.stringify(requestBody),
194
+ });
195
+ if (!response.ok) {
196
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
197
+ }
198
+ const raw = await response.text();
199
+ const sanitized = sanitizeYoutubeJsonResponse(raw);
200
+ const parsed = JSON.parse(sanitized);
201
+ if (!isObjectLike(parsed)) {
202
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
203
+ }
204
+ const transcript = await extractTranscriptFromPlayerPayload(fetchImpl, parsed);
205
+ if (transcript) {
206
+ return transcript;
207
+ }
208
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
209
+ }
210
+ catch {
211
+ return await fetchTranscriptViaAndroidPlayer(fetchImpl, { html, videoId });
212
+ }
213
+ };
214
+ const extractTranscriptFromPlayerPayload = async (fetchImpl, payload) => {
215
+ const payloadRecord = payload;
216
+ const captionsCandidate = payloadRecord.captions;
217
+ const captions = isObjectLike(captionsCandidate) ? captionsCandidate : null;
218
+ const rendererCandidate = (captions ? captions.playerCaptionsTracklistRenderer : null) ??
219
+ payloadRecord.playerCaptionsTracklistRenderer;
220
+ const renderer = isObjectLike(rendererCandidate)
221
+ ? rendererCandidate
222
+ : null;
223
+ const captionTracks = Array.isArray(renderer?.captionTracks)
224
+ ? renderer?.captionTracks
225
+ : null;
226
+ const automaticTracks = Array.isArray(renderer?.automaticCaptions)
227
+ ? renderer?.automaticCaptions
228
+ : null;
229
+ const orderedTracks = [];
230
+ if (captionTracks) {
231
+ orderedTracks.push(...captionTracks.filter((track) => isObjectLike(track)));
232
+ }
233
+ if (automaticTracks) {
234
+ orderedTracks.push(...automaticTracks.filter((track) => isObjectLike(track)));
235
+ }
236
+ const seenLanguages = new Set();
237
+ const normalizedTracks = [];
238
+ for (const candidate of orderedTracks) {
239
+ if (!isObjectLike(candidate)) {
240
+ continue;
241
+ }
242
+ const trackRecord = candidate;
243
+ const languageCandidate = trackRecord.languageCode;
244
+ const lang = typeof languageCandidate === 'string' ? languageCandidate.toLowerCase() : '';
245
+ if (lang && seenLanguages.has(lang)) {
246
+ continue;
247
+ }
248
+ if (lang) {
249
+ seenLanguages.add(lang);
250
+ }
251
+ normalizedTracks.push(candidate);
252
+ }
253
+ const sortedTracks = [...normalizedTracks].toSorted((a, b) => {
254
+ const aTrack = a;
255
+ const bTrack = b;
256
+ const aKind = typeof aTrack.kind === 'string' ? aTrack.kind : '';
257
+ const bKind = typeof bTrack.kind === 'string' ? bTrack.kind : '';
258
+ if (aKind === 'asr' && bKind !== 'asr') {
259
+ return -1;
260
+ }
261
+ if (bKind === 'asr' && aKind !== 'asr') {
262
+ return 1;
263
+ }
264
+ const aLang = typeof aTrack.languageCode === 'string' ? aTrack.languageCode : '';
265
+ const bLang = typeof bTrack.languageCode === 'string' ? bTrack.languageCode : '';
266
+ if (aLang === 'en' && bLang !== 'en') {
267
+ return -1;
268
+ }
269
+ if (bLang === 'en' && aLang !== 'en') {
270
+ return 1;
271
+ }
272
+ return 0;
273
+ });
274
+ return await findFirstTranscript(fetchImpl, sortedTracks, 0);
275
+ };
276
+ const findFirstTranscript = async (fetchImpl, tracks, index) => {
277
+ if (index >= tracks.length) {
278
+ return null;
279
+ }
280
+ const candidate = await downloadCaptionTrack(fetchImpl, tracks[index] ?? {});
281
+ if (candidate) {
282
+ return candidate;
283
+ }
284
+ return findFirstTranscript(fetchImpl, tracks, index + 1);
285
+ };
286
+ const downloadCaptionTrack = async (fetchImpl, track) => {
287
+ const trackRecord = track;
288
+ const baseUrl = typeof trackRecord.baseUrl === 'string'
289
+ ? trackRecord.baseUrl
290
+ : typeof trackRecord.url === 'string'
291
+ ? trackRecord.url
292
+ : null;
293
+ if (!baseUrl) {
294
+ return null;
295
+ }
296
+ const json3Url = (() => {
297
+ try {
298
+ const parsed = new URL(baseUrl);
299
+ parsed.searchParams.set('fmt', 'json3');
300
+ parsed.searchParams.set('alt', 'json');
301
+ return parsed.toString();
302
+ }
303
+ catch {
304
+ const separator = baseUrl.includes('?') ? '&' : '?';
305
+ return `${baseUrl}${separator}fmt=json3&alt=json`;
306
+ }
307
+ })();
308
+ try {
309
+ const response = await fetchWithTimeout(fetchImpl, json3Url, {
310
+ headers: REQUEST_HEADERS,
311
+ });
312
+ if (!response.ok) {
313
+ return await downloadXmlTranscript(fetchImpl, baseUrl);
314
+ }
315
+ const text = await response.text();
316
+ if (text.length === 0) {
317
+ return await downloadXmlTranscript(fetchImpl, baseUrl);
318
+ }
319
+ const jsonResult = parseJsonTranscript(text);
320
+ if (jsonResult) {
321
+ return jsonResult;
322
+ }
323
+ const xmlFallback = parseXmlTranscript(text);
324
+ if (xmlFallback) {
325
+ return xmlFallback;
326
+ }
327
+ return await downloadXmlTranscript(fetchImpl, baseUrl);
328
+ }
329
+ catch {
330
+ return await downloadXmlTranscript(fetchImpl, baseUrl);
331
+ }
332
+ };
333
+ const downloadXmlTranscript = async (fetchImpl, baseUrl) => {
334
+ const xmlUrl = baseUrl.replaceAll(/&fmt=[^&]+/g, '');
335
+ try {
336
+ const response = await fetchWithTimeout(fetchImpl, xmlUrl, { headers: REQUEST_HEADERS });
337
+ if (!response.ok) {
338
+ return null;
339
+ }
340
+ const text = await response.text();
341
+ const jsonResult = parseJsonTranscript(text);
342
+ if (jsonResult) {
343
+ return jsonResult;
344
+ }
345
+ return parseXmlTranscript(text);
346
+ }
347
+ catch {
348
+ return null;
349
+ }
350
+ };
351
+ const parseJsonTranscript = (raw) => {
352
+ try {
353
+ const parsed = JSON.parse(raw);
354
+ if (!isObjectLike(parsed)) {
355
+ return null;
356
+ }
357
+ const payloadRecord = parsed;
358
+ const eventsUnknown = payloadRecord.events;
359
+ if (!Array.isArray(eventsUnknown)) {
360
+ return null;
361
+ }
362
+ const events = eventsUnknown;
363
+ const lines = [];
364
+ for (const event of events) {
365
+ if (!isObjectLike(event)) {
366
+ continue;
367
+ }
368
+ const eventRecord = event;
369
+ const segs = Array.isArray(eventRecord.segs) ? eventRecord.segs : null;
370
+ if (!segs) {
371
+ continue;
372
+ }
373
+ const text = segs
374
+ .map((seg) => {
375
+ if (!isObjectLike(seg)) {
376
+ return '';
377
+ }
378
+ const segRecord = seg;
379
+ return typeof segRecord.utf8 === 'string' ? segRecord.utf8 : '';
380
+ })
381
+ .join('')
382
+ .trim();
383
+ if (text.length > 0) {
384
+ lines.push(text);
385
+ }
386
+ }
387
+ const transcript = lines.join('\n').trim();
388
+ return transcript.length > 0 ? transcript : null;
389
+ }
390
+ catch {
391
+ return null;
392
+ }
393
+ };
394
+ const parseXmlTranscript = (xml) => {
395
+ const pattern = /<text[^>]*>([\s\S]*?)<\/text>/gi;
396
+ const lines = [];
397
+ let match = pattern.exec(xml);
398
+ while (match) {
399
+ const content = match[1] ?? '';
400
+ const decoded = decodeHtmlEntities(content).replaceAll(/\s+/g, ' ').trim();
401
+ if (decoded.length > 0) {
402
+ lines.push(decoded);
403
+ }
404
+ match = pattern.exec(xml);
405
+ }
406
+ const transcript = lines.join('\n').trim();
407
+ return transcript.length > 0 ? transcript : null;
408
+ };
409
+ //# sourceMappingURL=captions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"captions.js","sourceRoot":"","sources":["../../../../../../../src/content/link-preview/transcript/providers/youtube/captions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAA;AACjE,OAAO,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,gBAAgB,CAAA;AAChF,OAAO,EAAE,wBAAwB,EAAE,MAAM,UAAU,CAAA;AAQnD,MAAM,eAAe,GAA2B;IAC9C,YAAY,EACV,iHAAiH;IACnH,iBAAiB,EAAE,gBAAgB;CACpC,CAAA;AAED,MAAM,gCAAgC,GAAG,yBAAyB,CAAA;AAClE,MAAM,uBAAuB,GAAG,oEAAoE,CAAA;AAEpG,SAAS,yBAAyB,CAAC,MAAc,EAAE,OAAe;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;IAC1C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAqB,IAAI,CAAA;IAClC,IAAI,QAAQ,GAAG,KAAK,CAAA;IAEpB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;QACpB,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,SAAQ;QACV,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,GAAG,KAAK,CAAA;gBAChB,SAAQ;YACV,CAAC;YACD,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAA;gBACf,SAAQ;YACV,CAAC;YACD,IAAI,KAAK,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;gBAC1B,QAAQ,GAAG,KAAK,CAAA;gBAChB,KAAK,GAAG,IAAI,CAAA;YACd,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YAC7B,QAAQ,GAAG,IAAI,CAAA;YACf,KAAK,GAAG,EAAE,CAAA;YACV,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,SAAQ;QACV,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,4BAA4B,CAAC,IAAY;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAA;IACjE,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC,CAAA;IACrD,IAAI,eAAe,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,UAAU,GAAG,yBAAyB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAA;IACnE,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAA;QAC9C,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,YAAY,GAAG,CAAC,KAAc,EAAoC,EAAE,CACxE,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAA;AAE7C,SAAS,sBAAsB,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAA;IACjD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;IAC5C,OAAO,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC7E,CAAC;AAoBD,KAAK,UAAU,+BAA+B,CAC5C,SAAuB,EACvB,EAAE,IAAI,EAAE,OAAO,EAAqC;IAEpD,MAAM,MAAM,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAA;IAC3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QAEnH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,YAAY,EAAE,SAAS;gBACvB,iBAAiB,EAAE,eAAe,CAAC,iBAAiB,CAAC,IAAI,gBAAgB;gBACzE,MAAM,EAAE,kBAAkB;aAC3B;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,OAAO,EAAE;oBACP,MAAM,EAAE;wBACN,UAAU,EAAE,SAAS;wBACrB,aAAa,EAAE,UAAU;qBAC1B;iBACF;gBACD,OAAO;aACR,CAAC;SACH,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,MAAM,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QAED,OAAO,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACpE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,CAAC,MAAM,gCAAgC,GAAG,KAAK,EACnD,SAAuB,EACvB,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAA4B,EAChC,EAAE;IAC1B,MAAM,qBAAqB,GAAG,4BAA4B,CAAC,IAAI,CAAC,CAAA;IAChE,IAAI,qBAAqB,EAAE,CAAC;QAC1B,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,qBAAqB,CAAC,CAAA;QAC7F,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,wBAAwB,CAAC,IAAI,CAAC,CAAA;IAChD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,GAC7F,SAAS,CAAA;IACX,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAED,MAAM,aAAa,GAAG,OAA+B,CAAA;IACrD,MAAM,aAAa,GAAG,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC;QACtD,CAAC,CAAE,aAAa,CAAC,MAAkC;QACnD,CAAC,CAAC,EAAE,CAAA;IACN,MAAM,WAAW,GAA4B;QAC3C,OAAO,EAAE;YACP,GAAG,aAAa;YAChB,MAAM,EAAE;gBACN,GAAG,aAAa;gBAChB,WAAW;aACZ;SACF;QACD,OAAO;QACP,eAAe,EAAE;YACf,sBAAsB,EAAE;gBACtB,eAAe,EAAE,kBAAkB;aACpC;SACF;QACD,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,IAAI;KAClB,CAAA;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GACb,eAAe,CAAC,YAAY,CAAC;YAC7B,iHAAiH,CAAA;QACnH,MAAM,OAAO,GAA2B;YACtC,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,SAAS;YACvB,MAAM,EAAE,kBAAkB;YAC1B,MAAM,EAAE,yBAAyB;YACjC,OAAO,EAAE,WAAW;YACpB,iBAAiB,EAAE,GAAG;YACtB,+BAA+B,EAAE,OAAO;SACzC,CAAA;QAED,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,CAAC,uBAAuB,CAAC,GAAG,UAAU,CAAA;QAC/C,CAAC;QACD,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,CAAC,0BAA0B,CAAC,GAAG,aAAa,CAAA;QACrD,CAAC;QACD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,CAAC,mBAAmB,CAAC,GAAG,WAAW,CAAA;QAC5C,CAAC;QACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1D,OAAO,CAAC,mBAAmB,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAA;QAC/C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,sBAAsB,CAAC,GAAG,SAAS,CAAA;QAC7C,CAAC;QACD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,0BAA0B,CAAC,GAAG,SAAS,CAAA;QACjD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CACrC,SAAS,EACT,kDAAkD,MAAM,EAAE,EAC1D;YACE,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;SAClC,CACF,CAAA;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACjC,MAAM,SAAS,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAA;QAClD,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QAC7C,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAC5E,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,kCAAkC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;QAC9E,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QAED,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,+BAA+B,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5E,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kCAAkC,GAAG,KAAK,EAC9C,SAAuB,EACvB,OAAgC,EACR,EAAE;IAC1B,MAAM,aAAa,GAAG,OAA0B,CAAA;IAEhD,MAAM,iBAAiB,GAAG,aAAa,CAAC,QAAQ,CAAA;IAChD,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAE,iBAAqC,CAAC,CAAC,CAAC,IAAI,CAAA;IAEhG,MAAM,iBAAiB,GACrB,CAAC,QAAQ,CAAC,CAAC,CAAE,QAA4B,CAAC,+BAA+B,CAAC,CAAC,CAAC,IAAI,CAAC;QACjF,aAAa,CAAC,+BAA+B,CAAA;IAE/C,MAAM,QAAQ,GAAG,YAAY,CAAC,iBAAiB,CAAC;QAC9C,CAAC,CAAE,iBAAyC;QAC5C,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,aAAa,CAAC;QAC1D,CAAC,CAAE,QAAQ,EAAE,aAA2B;QACxC,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,iBAAiB,CAAC;QAChE,CAAC,CAAE,QAAQ,EAAE,iBAA+B;QAC5C,CAAC,CAAC,IAAI,CAAA;IAER,MAAM,aAAa,GAA8B,EAAE,CAAA;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,aAAa,CAAC,IAAI,CAChB,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC1F,CAAA;IACH,CAAC;IACD,IAAI,eAAe,EAAE,CAAC;QACpB,aAAa,CAAC,IAAI,CAChB,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,KAAK,EAAoC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAC5F,CAAA;IACH,CAAC;IACD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAA;IACvC,MAAM,gBAAgB,GAA8B,EAAE,CAAA;IACtD,KAAK,MAAM,SAAS,IAAI,aAAa,EAAE,CAAC;QACtC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,SAAQ;QACV,CAAC;QACD,MAAM,WAAW,GAAG,SAA+B,CAAA;QACnD,MAAM,iBAAiB,GAAG,WAAW,CAAC,YAAY,CAAA;QAClD,MAAM,IAAI,GAAG,OAAO,iBAAiB,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACzF,IAAI,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACpC,SAAQ;QACV,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACT,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACzB,CAAC;QACD,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAClC,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,gBAAgB,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3D,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,MAAM,GAAG,CAAuB,CAAA;QACtC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;QAChE,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAA;QAChF,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAC,CAAA;QACX,CAAC;QACD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACrC,OAAO,CAAC,CAAA;QACV,CAAC;QACD,OAAO,CAAC,CAAA;IACV,CAAC,CAAC,CAAA;IAEF,OAAO,MAAM,mBAAmB,CAAC,SAAS,EAAE,YAAY,EAAE,CAAC,CAAC,CAAA;AAC9D,CAAC,CAAA;AAED,MAAM,mBAAmB,GAAG,KAAK,EAC/B,SAAuB,EACvB,MAA0C,EAC1C,KAAa,EACW,EAAE;IAC1B,IAAI,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,SAAS,GAAG,MAAM,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAA;IAC5E,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;AAC1D,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,KAAK,EAChC,SAAuB,EACvB,KAA8B,EACN,EAAE;IAC1B,MAAM,WAAW,GAAG,KAA2B,CAAA;IAC/C,MAAM,OAAO,GACX,OAAO,WAAW,CAAC,OAAO,KAAK,QAAQ;QACrC,CAAC,CAAC,WAAW,CAAC,OAAO;QACrB,CAAC,CAAC,OAAO,WAAW,CAAC,GAAG,KAAK,QAAQ;YACnC,CAAC,CAAC,WAAW,CAAC,GAAG;YACjB,CAAC,CAAC,IAAI,CAAA;IACZ,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAA;YAC/B,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;YACvC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;YACtC,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAA;QAC1B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACnD,OAAO,GAAG,OAAO,GAAG,SAAS,oBAAoB,CAAA;QACnD,CAAC;IACH,CAAC,CAAC,EAAE,CAAA;IAEJ,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE;YAC3D,OAAO,EAAE,eAAe;SACzB,CAAC,CAAA;QACF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;QACxD,CAAC;QACD,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,WAAW,CAAA;QACpB,CAAC;QACD,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IACxD,CAAC;AACH,CAAC,CAAA;AAED,MAAM,qBAAqB,GAAG,KAAK,EACjC,SAAuB,EACvB,OAAe,EACS,EAAE;IAC1B,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC,CAAA;IACpD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAA;QACxF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAClC,MAAM,UAAU,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;QACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAID,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACzD,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,aAAa,GAAG,MAAwB,CAAA;QAC9C,MAAM,aAAa,GAAG,aAAa,CAAC,MAAM,CAAA;QAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,MAAM,GAAG,aAAa,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,SAAQ;YACV,CAAC;YACD,MAAM,WAAW,GAAG,KAA2B,CAAA;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,WAAW,CAAC,IAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;YACrF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,SAAQ;YACV,CAAC;YACD,MAAM,IAAI,GAAG,IAAI;iBACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACX,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvB,OAAO,EAAE,CAAA;gBACX,CAAC;gBACD,MAAM,SAAS,GAAG,GAA2B,CAAA;gBAC7C,OAAO,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;YACjE,CAAC,CAAC;iBACD,IAAI,CAAC,EAAE,CAAC;iBACR,IAAI,EAAE,CAAA;YACT,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;QACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,MAAM,OAAO,GAAG,iCAAiC,CAAA;IACjD,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,KAAK,GAA2B,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACrD,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QAC9B,MAAM,OAAO,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACrB,CAAC;QACD,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC3B,CAAC;IACD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;IAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;AAClD,CAAC,CAAA"}
@@ -0,0 +1,114 @@
1
+ import { execFile } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import { fetchWithTimeout } from '../../../fetch-with-timeout.js';
4
+ import { sanitizeYoutubeJsonResponse } from '../../utils.js';
5
+ const execFileAsync = promisify(execFile);
6
+ const isRecord = (value) => typeof value === 'object' && value !== null && !Array.isArray(value);
7
+ const parseJson3Transcript = (raw) => {
8
+ try {
9
+ const parsed = JSON.parse(raw);
10
+ if (!isRecord(parsed)) {
11
+ return null;
12
+ }
13
+ const eventsUnknown = parsed.events;
14
+ if (!Array.isArray(eventsUnknown)) {
15
+ return null;
16
+ }
17
+ const lines = [];
18
+ for (const event of eventsUnknown) {
19
+ if (!isRecord(event)) {
20
+ continue;
21
+ }
22
+ const eventRecord = event;
23
+ const segs = Array.isArray(eventRecord.segs) ? eventRecord.segs : null;
24
+ if (!segs) {
25
+ continue;
26
+ }
27
+ const text = segs
28
+ .map((seg) => {
29
+ if (!isRecord(seg)) {
30
+ return '';
31
+ }
32
+ const segRecord = seg;
33
+ return typeof segRecord.utf8 === 'string' ? segRecord.utf8 : '';
34
+ })
35
+ .join('')
36
+ .trim();
37
+ if (text.length > 0) {
38
+ lines.push(text);
39
+ }
40
+ }
41
+ const transcript = lines.join('\n').trim();
42
+ return transcript.length > 0 ? transcript : null;
43
+ }
44
+ catch {
45
+ return null;
46
+ }
47
+ };
48
+ function pickCaptionUrl(info) {
49
+ const sources = [info.subtitles, info.automatic_captions];
50
+ const candidates = [];
51
+ for (const source of sources) {
52
+ if (!isRecord(source))
53
+ continue;
54
+ for (const [lang, entries] of Object.entries(source)) {
55
+ candidates.push([lang, entries]);
56
+ }
57
+ }
58
+ const languagePreference = (lang) => {
59
+ const lower = lang.toLowerCase();
60
+ if (lower === 'en')
61
+ return 0;
62
+ if (lower.startsWith('en-'))
63
+ return 1;
64
+ if (lower.startsWith('en'))
65
+ return 2;
66
+ return 10;
67
+ };
68
+ const sorted = candidates.toSorted(([a], [b]) => languagePreference(a) - languagePreference(b));
69
+ for (const [, entries] of sorted) {
70
+ if (!Array.isArray(entries))
71
+ continue;
72
+ const normalized = entries.filter((entry) => isRecord(entry));
73
+ const json3 = normalized.find((entry) => entry.ext === 'json3' && typeof entry.url === 'string');
74
+ if (json3?.url && typeof json3.url === 'string') {
75
+ return json3.url;
76
+ }
77
+ const vtt = normalized.find((entry) => entry.ext === 'vtt' && typeof entry.url === 'string');
78
+ if (vtt?.url && typeof vtt.url === 'string') {
79
+ return vtt.url;
80
+ }
81
+ }
82
+ return null;
83
+ }
84
+ export async function fetchTranscriptWithYtDlp(fetchImpl, url, { timeoutMs } = {}) {
85
+ try {
86
+ const { stdout } = await execFileAsync('yt-dlp', ['--dump-single-json', '--no-playlist', '--no-warnings', url], { timeout: typeof timeoutMs === 'number' && Number.isFinite(timeoutMs) ? timeoutMs : 60_000 });
87
+ const parsed = JSON.parse(stdout);
88
+ if (!isRecord(parsed)) {
89
+ return null;
90
+ }
91
+ const info = parsed;
92
+ const captionUrl = pickCaptionUrl(info);
93
+ if (!captionUrl) {
94
+ return null;
95
+ }
96
+ const response = await fetchWithTimeout(fetchImpl, captionUrl, undefined, 60_000);
97
+ if (!response.ok) {
98
+ return null;
99
+ }
100
+ const raw = await response.text();
101
+ const sanitized = sanitizeYoutubeJsonResponse(raw);
102
+ return parseJson3Transcript(sanitized);
103
+ }
104
+ catch (error) {
105
+ const code = error && typeof error === 'object' && 'code' in error
106
+ ? error.code
107
+ : null;
108
+ if (code === 'ENOENT') {
109
+ return null;
110
+ }
111
+ return null;
112
+ }
113
+ }
114
+ //# sourceMappingURL=ytdlp.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ytdlp.js","sourceRoot":"","sources":["../../../../../../../src/content/link-preview/transcript/providers/youtube/ytdlp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AAErC,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAA;AACjE,OAAO,EAAE,2BAA2B,EAAE,MAAM,gBAAgB,CAAA;AAE5D,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAA;AAazC,MAAM,QAAQ,GAAG,CAAC,KAAc,EAAoC,EAAE,CACpE,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;AAEtE,MAAM,oBAAoB,GAAG,CAAC,GAAW,EAAiB,EAAE;IAI1D,IAAI,CAAC;QACH,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,aAAa,GAAI,MAAkC,CAAC,MAAM,CAAA;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACrB,SAAQ;YACV,CAAC;YACD,MAAM,WAAW,GAAG,KAA2B,CAAA;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,WAAW,CAAC,IAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;YACrF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,SAAQ;YACV,CAAC;YACD,MAAM,IAAI,GAAG,IAAI;iBACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACX,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,OAAO,EAAE,CAAA;gBACX,CAAC;gBACD,MAAM,SAAS,GAAG,GAA2B,CAAA;gBAC7C,OAAO,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;YACjE,CAAC,CAAC;iBACD,IAAI,CAAC,EAAE,CAAC;iBACR,IAAI,EAAE,CAAA;YACT,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QAC1C,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAClD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC,CAAA;AAED,SAAS,cAAc,CAAC,IAAe;IACrC,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAA;IACzD,MAAM,UAAU,GAA6B,EAAE,CAAA;IAE/C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,SAAQ;QAC/B,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACrD,UAAU,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAA;QAClC,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,CAAC,IAAY,EAAU,EAAE;QAClD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;QAChC,IAAI,KAAK,KAAK,IAAI;YAAE,OAAO,CAAC,CAAA;QAC5B,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,OAAO,CAAC,CAAA;QACrC,IAAI,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,CAAC,CAAA;QACpC,OAAO,EAAE,CAAA;IACX,CAAC,CAAA;IAED,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAA;IAE/F,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;YAAE,SAAQ;QACrC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAA8B,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;QACzF,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,KAAK,OAAO,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAA;QAChG,IAAI,KAAK,EAAE,GAAG,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAChD,OAAO,KAAK,CAAC,GAAG,CAAA;QAClB,CAAC;QACD,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAA;QAC5F,IAAI,GAAG,EAAE,GAAG,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,GAAG,CAAC,GAAG,CAAA;QAChB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,SAAuB,EACvB,GAAW,EACX,EAAE,SAAS,KAA6B,EAAE;IAE1C,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,QAAQ,EACR,CAAC,oBAAoB,EAAE,eAAe,EAAE,eAAe,EAAE,GAAG,CAAC,EAC7D,EAAE,OAAO,EAAE,OAAO,SAAS,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,EAAE,CAC9F,CAAA;QAED,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QAC1C,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,IAAI,GAAG,MAAmB,CAAA;QAChC,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;QACvC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;QACjF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACjC,MAAM,SAAS,GAAG,2BAA2B,CAAC,GAAG,CAAC,CAAA;QAClD,OAAO,oBAAoB,CAAC,SAAS,CAAC,CAAA;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,GACR,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,IAAI,KAAK;YACnD,CAAC,CAAE,KAA4B,CAAC,IAAI;YACpC,CAAC,CAAC,IAAI,CAAA;QACV,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC"}
@@ -0,0 +1,74 @@
1
+ import { normalizeTranscriptText } from '../normalize.js';
2
+ import { extractYouTubeVideoId } from '../utils.js';
3
+ import { extractYoutubeiTranscriptConfig, fetchTranscriptFromTranscriptEndpoint, } from './youtube/api.js';
4
+ import { fetchTranscriptWithApify } from './youtube/apify.js';
5
+ import { fetchTranscriptFromCaptionTracks } from './youtube/captions.js';
6
+ const YOUTUBE_URL_PATTERN = /youtube\.com|youtu\.be/i;
7
+ export const canHandle = ({ url }) => YOUTUBE_URL_PATTERN.test(url);
8
+ export const fetchTranscript = async (context, options) => {
9
+ const attemptedProviders = [];
10
+ const { html, url } = context;
11
+ const mode = options.youtubeTranscriptMode;
12
+ if (!html) {
13
+ return { text: null, source: null, attemptedProviders };
14
+ }
15
+ const effectiveVideoIdCandidate = context.resourceKey ?? extractYouTubeVideoId(url);
16
+ const effectiveVideoId = typeof effectiveVideoIdCandidate === 'string' && effectiveVideoIdCandidate.trim().length > 0
17
+ ? effectiveVideoIdCandidate.trim()
18
+ : null;
19
+ if (!effectiveVideoId) {
20
+ return { text: null, source: null, attemptedProviders };
21
+ }
22
+ if (mode !== 'apify') {
23
+ const config = extractYoutubeiTranscriptConfig(html);
24
+ if (config) {
25
+ attemptedProviders.push('youtubei');
26
+ const transcript = await fetchTranscriptFromTranscriptEndpoint(options.fetch, {
27
+ config,
28
+ originalUrl: url,
29
+ });
30
+ if (transcript) {
31
+ return {
32
+ text: normalizeTranscriptText(transcript),
33
+ source: 'youtubei',
34
+ metadata: { provider: 'youtubei' },
35
+ attemptedProviders,
36
+ };
37
+ }
38
+ }
39
+ attemptedProviders.push('captionTracks');
40
+ const captionTranscript = await fetchTranscriptFromCaptionTracks(options.fetch, {
41
+ html,
42
+ originalUrl: url,
43
+ videoId: effectiveVideoId,
44
+ });
45
+ if (captionTranscript) {
46
+ return {
47
+ text: normalizeTranscriptText(captionTranscript),
48
+ source: 'captionTracks',
49
+ metadata: { provider: 'captionTracks' },
50
+ attemptedProviders,
51
+ };
52
+ }
53
+ }
54
+ if (mode !== 'web') {
55
+ attemptedProviders.push('apify');
56
+ const apifyTranscript = await fetchTranscriptWithApify(options.fetch, options.apifyApiToken, url);
57
+ if (apifyTranscript) {
58
+ return {
59
+ text: normalizeTranscriptText(apifyTranscript),
60
+ source: 'apify',
61
+ metadata: { provider: 'apify' },
62
+ attemptedProviders,
63
+ };
64
+ }
65
+ }
66
+ attemptedProviders.push('unavailable');
67
+ return {
68
+ text: null,
69
+ source: 'unavailable',
70
+ metadata: { provider: 'youtube', reason: 'no_transcript_available' },
71
+ attemptedProviders,
72
+ };
73
+ };
74
+ //# sourceMappingURL=youtube.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../../../../src/content/link-preview/transcript/providers/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAA;AAOzD,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAA;AACnD,OAAO,EACL,+BAA+B,EAC/B,qCAAqC,GACtC,MAAM,kBAAkB,CAAA;AACzB,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAA;AAC7D,OAAO,EAAE,gCAAgC,EAAE,MAAM,uBAAuB,CAAA;AAExE,MAAM,mBAAmB,GAAG,yBAAyB,CAAA;AAErD,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAmB,EAAW,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAE7F,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,OAAwB,EACxB,OAA6B,EACJ,EAAE;IAC3B,MAAM,kBAAkB,GAAuB,EAAE,CAAA;IACjD,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,OAAO,CAAA;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,qBAAqB,CAAA;IAE1C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAA;IACzD,CAAC;IAED,MAAM,yBAAyB,GAAG,OAAO,CAAC,WAAW,IAAI,qBAAqB,CAAC,GAAG,CAAC,CAAA;IACnF,MAAM,gBAAgB,GACpB,OAAO,yBAAyB,KAAK,QAAQ,IAAI,yBAAyB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAC1F,CAAC,CAAC,yBAAyB,CAAC,IAAI,EAAE;QAClC,CAAC,CAAC,IAAI,CAAA;IACV,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAA;IACzD,CAAC;IAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,+BAA+B,CAAC,IAAI,CAAC,CAAA;QACpD,IAAI,MAAM,EAAE,CAAC;YACX,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;YACnC,MAAM,UAAU,GAAG,MAAM,qCAAqC,CAAC,OAAO,CAAC,KAAK,EAAE;gBAC5E,MAAM;gBACN,WAAW,EAAE,GAAG;aACjB,CAAC,CAAA;YACF,IAAI,UAAU,EAAE,CAAC;gBACf,OAAO;oBACL,IAAI,EAAE,uBAAuB,CAAC,UAAU,CAAC;oBACzC,MAAM,EAAE,UAAU;oBAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE;oBAClC,kBAAkB;iBACnB,CAAA;YACH,CAAC;QACH,CAAC;QAED,kBAAkB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;QACxC,MAAM,iBAAiB,GAAG,MAAM,gCAAgC,CAAC,OAAO,CAAC,KAAK,EAAE;YAC9E,IAAI;YACJ,WAAW,EAAE,GAAG;YAChB,OAAO,EAAE,gBAAgB;SAC1B,CAAC,CAAA;QACF,IAAI,iBAAiB,EAAE,CAAC;YACtB,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,iBAAiB,CAAC;gBAChD,MAAM,EAAE,eAAe;gBACvB,QAAQ,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;gBACvC,kBAAkB;aACnB,CAAA;QACH,CAAC;IACH,CAAC;IAED,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAChC,MAAM,eAAe,GAAG,MAAM,wBAAwB,CACpD,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,aAAa,EACrB,GAAG,CACJ,CAAA;QACD,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,eAAe,CAAC;gBAC9C,MAAM,EAAE,OAAO;gBACf,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE;gBAC/B,kBAAkB;aACnB,CAAA;QACH,CAAC;IACH,CAAC;IAED,kBAAkB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;IACtC,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,aAAa;QACrB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,yBAAyB,EAAE;QACpE,kBAAkB;KACnB,CAAA;AACH,CAAC,CAAA"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/transcript/types.ts"],"names":[],"mappings":""}