webpeel 0.13.4 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -162
- package/dist/cli-auth.js +7 -7
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +197 -26
- package/dist/cli.js.map +1 -1
- package/dist/core/auto-extract.d.ts +83 -0
- package/dist/core/auto-extract.d.ts.map +1 -0
- package/dist/core/auto-extract.js +565 -0
- package/dist/core/auto-extract.js.map +1 -0
- package/dist/core/deep-fetch.d.ts +75 -0
- package/dist/core/deep-fetch.d.ts.map +1 -0
- package/dist/core/deep-fetch.js +406 -0
- package/dist/core/deep-fetch.js.map +1 -0
- package/dist/core/domain-extractors.d.ts +34 -0
- package/dist/core/domain-extractors.d.ts.map +1 -0
- package/dist/core/domain-extractors.js +654 -0
- package/dist/core/domain-extractors.js.map +1 -0
- package/dist/core/markdown.d.ts +8 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +25 -0
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/quick-answer.d.ts +28 -0
- package/dist/core/quick-answer.d.ts.map +1 -0
- package/dist/core/quick-answer.js +288 -0
- package/dist/core/quick-answer.js.map +1 -0
- package/dist/core/readability.d.ts +58 -0
- package/dist/core/readability.d.ts.map +1 -0
- package/dist/core/readability.js +496 -0
- package/dist/core/readability.js.map +1 -0
- package/dist/core/search-provider.d.ts.map +1 -1
- package/dist/core/search-provider.js +3 -6
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +70 -5
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/watch-manager.d.ts +140 -0
- package/dist/core/watch-manager.d.ts.map +1 -0
- package/dist/core/watch-manager.js +348 -0
- package/dist/core/watch-manager.js.map +1 -0
- package/dist/core/youtube.d.ts +91 -0
- package/dist/core/youtube.d.ts.map +1 -0
- package/dist/core/youtube.js +380 -0
- package/dist/core/youtube.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +103 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +58 -16
- package/dist/mcp/server.js.map +1 -1
- package/dist/server/app.d.ts.map +1 -1
- package/dist/server/app.js +19 -1
- package/dist/server/app.js.map +1 -1
- package/dist/server/routes/deep-fetch.d.ts +9 -0
- package/dist/server/routes/deep-fetch.d.ts.map +1 -0
- package/dist/server/routes/deep-fetch.js +38 -0
- package/dist/server/routes/deep-fetch.js.map +1 -0
- package/dist/server/routes/extract.d.ts.map +1 -1
- package/dist/server/routes/extract.js +11 -0
- package/dist/server/routes/extract.js.map +1 -1
- package/dist/server/routes/fetch.d.ts.map +1 -1
- package/dist/server/routes/fetch.js +45 -19
- package/dist/server/routes/fetch.js.map +1 -1
- package/dist/server/routes/mcp.d.ts +2 -1
- package/dist/server/routes/mcp.d.ts.map +1 -1
- package/dist/server/routes/mcp.js +307 -38
- package/dist/server/routes/mcp.js.map +1 -1
- package/dist/server/routes/quick-answer.d.ts +9 -0
- package/dist/server/routes/quick-answer.d.ts.map +1 -0
- package/dist/server/routes/quick-answer.js +84 -0
- package/dist/server/routes/quick-answer.js.map +1 -0
- package/dist/server/routes/watch.d.ts +16 -0
- package/dist/server/routes/watch.d.ts.map +1 -0
- package/dist/server/routes/watch.js +219 -0
- package/dist/server/routes/watch.js.map +1 -0
- package/dist/server/routes/youtube.d.ts +7 -0
- package/dist/server/routes/youtube.d.ts.map +1 -0
- package/dist/server/routes/youtube.js +87 -0
- package/dist/server/routes/youtube.js.map +1 -0
- package/dist/types.d.ts +18 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +14 -5
- package/package.json +1 -1
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YouTube transcript extraction — no API key required.
|
|
3
|
+
*
|
|
4
|
+
* YouTube embeds caption/transcript data directly in the page HTML as JSON
|
|
5
|
+
* (inside ytInitialPlayerResponse). We parse that JSON, extract caption
|
|
6
|
+
* track URLs, fetch the timedtext XML, and return structured transcript data.
|
|
7
|
+
*/
|
|
8
|
+
import { simpleFetch } from './fetcher.js';
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// URL parsing
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
/**
|
|
13
|
+
* Extract the video ID from any common YouTube URL format.
|
|
14
|
+
* Returns null if the URL is not a recognisable YouTube URL.
|
|
15
|
+
*
|
|
16
|
+
* Supported formats:
|
|
17
|
+
* https://www.youtube.com/watch?v=VIDEO_ID
|
|
18
|
+
* https://youtu.be/VIDEO_ID
|
|
19
|
+
* https://www.youtube.com/embed/VIDEO_ID
|
|
20
|
+
* https://m.youtube.com/watch?v=VIDEO_ID
|
|
21
|
+
* URLs with extra params (&t=120, &list=PLxxx, etc.)
|
|
22
|
+
*/
|
|
23
|
+
export function parseYouTubeUrl(url) {
|
|
24
|
+
if (!url || typeof url !== 'string')
|
|
25
|
+
return null;
|
|
26
|
+
let parsed;
|
|
27
|
+
try {
|
|
28
|
+
parsed = new URL(url.trim());
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '').replace(/^m\./, '');
|
|
34
|
+
if (host === 'youtu.be') {
|
|
35
|
+
// https://youtu.be/VIDEO_ID
|
|
36
|
+
const id = parsed.pathname.slice(1).split('/')[0];
|
|
37
|
+
return isValidVideoId(id) ? id : null;
|
|
38
|
+
}
|
|
39
|
+
if (host === 'youtube.com') {
|
|
40
|
+
// /watch?v=VIDEO_ID
|
|
41
|
+
if (parsed.pathname === '/watch' || parsed.pathname === '/watch/') {
|
|
42
|
+
const id = parsed.searchParams.get('v');
|
|
43
|
+
return id && isValidVideoId(id) ? id : null;
|
|
44
|
+
}
|
|
45
|
+
// /embed/VIDEO_ID
|
|
46
|
+
if (parsed.pathname.startsWith('/embed/')) {
|
|
47
|
+
const id = parsed.pathname.split('/')[2];
|
|
48
|
+
return id && isValidVideoId(id) ? id : null;
|
|
49
|
+
}
|
|
50
|
+
// /shorts/VIDEO_ID
|
|
51
|
+
if (parsed.pathname.startsWith('/shorts/')) {
|
|
52
|
+
const id = parsed.pathname.split('/')[2];
|
|
53
|
+
return id && isValidVideoId(id) ? id : null;
|
|
54
|
+
}
|
|
55
|
+
// /v/VIDEO_ID (old embed format)
|
|
56
|
+
if (parsed.pathname.startsWith('/v/')) {
|
|
57
|
+
const id = parsed.pathname.split('/')[2];
|
|
58
|
+
return id && isValidVideoId(id) ? id : null;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
function isValidVideoId(id) {
|
|
64
|
+
return typeof id === 'string' && /^[A-Za-z0-9_-]{11}$/.test(id);
|
|
65
|
+
}
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Video info extraction
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
/**
|
|
70
|
+
* Extract video metadata from YouTube page HTML.
|
|
71
|
+
* Parses ytInitialPlayerResponse JSON embedded in the page.
|
|
72
|
+
*/
|
|
73
|
+
export function extractVideoInfo(html) {
|
|
74
|
+
const playerResponse = extractPlayerResponse(html);
|
|
75
|
+
const videoDetails = playerResponse?.videoDetails ?? {};
|
|
76
|
+
const microformat = playerResponse?.microformat?.playerMicroformatRenderer ?? {};
|
|
77
|
+
const videoId = videoDetails.videoId ?? '';
|
|
78
|
+
const title = videoDetails.title ??
|
|
79
|
+
microformat.title?.simpleText ??
|
|
80
|
+
extractMetaTag(html, 'og:title') ??
|
|
81
|
+
'';
|
|
82
|
+
const channel = videoDetails.author ?? microformat.ownerChannelName ?? '';
|
|
83
|
+
const lengthSeconds = parseInt(videoDetails.lengthSeconds ?? microformat.lengthSeconds ?? '0', 10);
|
|
84
|
+
const viewCount = videoDetails.viewCount ?? microformat.viewCount ?? '';
|
|
85
|
+
const publishDate = microformat.publishDate ?? microformat.uploadDate ?? '';
|
|
86
|
+
const description = videoDetails.shortDescription ??
|
|
87
|
+
microformat.description?.simpleText ??
|
|
88
|
+
extractMetaTag(html, 'og:description') ??
|
|
89
|
+
'';
|
|
90
|
+
const thumbnail = videoDetails.thumbnail?.thumbnails?.slice(-1)[0]?.url ??
|
|
91
|
+
microformat.thumbnail?.thumbnails?.slice(-1)[0]?.url ??
|
|
92
|
+
`https://img.youtube.com/vi/${videoId}/maxresdefault.jpg`;
|
|
93
|
+
// likeCount is often not available without auth
|
|
94
|
+
const likeCount = videoDetails.likeCount ?? '';
|
|
95
|
+
return {
|
|
96
|
+
videoId,
|
|
97
|
+
title,
|
|
98
|
+
channel,
|
|
99
|
+
description,
|
|
100
|
+
duration: formatDuration(lengthSeconds),
|
|
101
|
+
publishDate,
|
|
102
|
+
viewCount,
|
|
103
|
+
likeCount,
|
|
104
|
+
thumbnail,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
// Transcript extraction
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
/**
|
|
111
|
+
* Fetch and return the transcript for a YouTube video.
|
|
112
|
+
*
|
|
113
|
+
* @param url - Any YouTube URL format
|
|
114
|
+
* @param options.language - Preferred language code (default: "en")
|
|
115
|
+
*/
|
|
116
|
+
export async function getYouTubeTranscript(url, options = {}) {
|
|
117
|
+
const videoId = parseYouTubeUrl(url);
|
|
118
|
+
if (!videoId) {
|
|
119
|
+
throw new Error(`Not a valid YouTube URL: ${url}`);
|
|
120
|
+
}
|
|
121
|
+
const preferredLang = options.language ?? 'en';
|
|
122
|
+
// Fetch the video page
|
|
123
|
+
const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
|
|
124
|
+
const fetchResult = await simpleFetch(videoUrl, undefined, 30000);
|
|
125
|
+
const html = fetchResult.html;
|
|
126
|
+
// Extract player response
|
|
127
|
+
const playerResponse = extractPlayerResponse(html);
|
|
128
|
+
if (!playerResponse) {
|
|
129
|
+
throw new Error(`Could not parse YouTube page data for video ${videoId}`);
|
|
130
|
+
}
|
|
131
|
+
// Extract video info
|
|
132
|
+
const videoDetails = playerResponse.videoDetails ?? {};
|
|
133
|
+
const title = videoDetails.title ?? '';
|
|
134
|
+
const channel = videoDetails.author ?? '';
|
|
135
|
+
const lengthSeconds = parseInt(videoDetails.lengthSeconds ?? '0', 10);
|
|
136
|
+
// Extract caption tracks
|
|
137
|
+
const captionTracks = extractCaptionTracks(playerResponse);
|
|
138
|
+
if (captionTracks.length === 0) {
|
|
139
|
+
throw new Error(`No captions available for video ${videoId}`);
|
|
140
|
+
}
|
|
141
|
+
const availableLanguages = captionTracks.map(t => t.languageCode);
|
|
142
|
+
// Select best track: prefer manual over auto-generated, prefer requested language
|
|
143
|
+
const selectedTrack = selectBestTrack(captionTracks, preferredLang);
|
|
144
|
+
// Fetch the caption XML
|
|
145
|
+
const captionXml = await fetchCaptionXml(selectedTrack.baseUrl);
|
|
146
|
+
// Parse segments
|
|
147
|
+
const segments = parseCaptionXml(captionXml);
|
|
148
|
+
const fullText = segments.map(s => s.text).join(' ').replace(/\s+/g, ' ').trim();
|
|
149
|
+
return {
|
|
150
|
+
videoId,
|
|
151
|
+
title,
|
|
152
|
+
channel,
|
|
153
|
+
duration: formatDuration(lengthSeconds),
|
|
154
|
+
language: selectedTrack.languageCode,
|
|
155
|
+
segments,
|
|
156
|
+
fullText,
|
|
157
|
+
availableLanguages,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Extract the ytInitialPlayerResponse JSON object from page HTML.
|
|
162
|
+
*/
|
|
163
|
+
export function extractPlayerResponse(html) {
|
|
164
|
+
// Try a few patterns YouTube uses to embed this data
|
|
165
|
+
const patterns = [
|
|
166
|
+
// Modern: var ytInitialPlayerResponse = {...};
|
|
167
|
+
/var ytInitialPlayerResponse\s*=\s*(\{.+?\});\s*(?:var|<\/script>)/s,
|
|
168
|
+
// Also try without trailing var (some pages end differently)
|
|
169
|
+
/ytInitialPlayerResponse\s*=\s*(\{.+?\})(?:;|\s*<\/script>)/s,
|
|
170
|
+
];
|
|
171
|
+
for (const pattern of patterns) {
|
|
172
|
+
const match = html.match(pattern);
|
|
173
|
+
if (match) {
|
|
174
|
+
try {
|
|
175
|
+
return JSON.parse(match[1]);
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
// Try to find a valid JSON boundary by walking the string
|
|
179
|
+
const start = html.indexOf('ytInitialPlayerResponse');
|
|
180
|
+
if (start === -1)
|
|
181
|
+
continue;
|
|
182
|
+
const braceStart = html.indexOf('{', start);
|
|
183
|
+
if (braceStart === -1)
|
|
184
|
+
continue;
|
|
185
|
+
const jsonStr = extractJsonObject(html, braceStart);
|
|
186
|
+
if (jsonStr) {
|
|
187
|
+
try {
|
|
188
|
+
return JSON.parse(jsonStr);
|
|
189
|
+
}
|
|
190
|
+
catch {
|
|
191
|
+
/* fall through to next pattern */
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
// Fallback: search for captionTracks directly
|
|
198
|
+
const captionIdx = html.indexOf('"captionTracks"');
|
|
199
|
+
if (captionIdx !== -1) {
|
|
200
|
+
// Walk back to find the enclosing object
|
|
201
|
+
const braceStart = html.lastIndexOf('{', captionIdx);
|
|
202
|
+
if (braceStart !== -1) {
|
|
203
|
+
const jsonStr = extractJsonObject(html, braceStart);
|
|
204
|
+
if (jsonStr) {
|
|
205
|
+
try {
|
|
206
|
+
return JSON.parse(jsonStr);
|
|
207
|
+
}
|
|
208
|
+
catch { /* ignore */ }
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Extract a complete JSON object starting at position `start` in `str`.
|
|
216
|
+
* Handles nested objects/arrays and string literals.
|
|
217
|
+
*/
|
|
218
|
+
function extractJsonObject(str, start) {
|
|
219
|
+
let depth = 0;
|
|
220
|
+
let inString = false;
|
|
221
|
+
let escape = false;
|
|
222
|
+
for (let i = start; i < str.length; i++) {
|
|
223
|
+
const ch = str[i];
|
|
224
|
+
if (escape) {
|
|
225
|
+
escape = false;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (ch === '\\' && inString) {
|
|
229
|
+
escape = true;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
if (ch === '"') {
|
|
233
|
+
inString = !inString;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
if (inString)
|
|
237
|
+
continue;
|
|
238
|
+
if (ch === '{' || ch === '[')
|
|
239
|
+
depth++;
|
|
240
|
+
else if (ch === '}' || ch === ']') {
|
|
241
|
+
depth--;
|
|
242
|
+
if (depth === 0) {
|
|
243
|
+
return str.slice(start, i + 1);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Extract caption tracks from the player response.
|
|
251
|
+
*/
|
|
252
|
+
function extractCaptionTracks(playerResponse) {
|
|
253
|
+
try {
|
|
254
|
+
const tracks = playerResponse?.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
|
255
|
+
if (!Array.isArray(tracks))
|
|
256
|
+
return [];
|
|
257
|
+
return tracks.map((t) => ({
|
|
258
|
+
baseUrl: t.baseUrl ?? '',
|
|
259
|
+
languageCode: (t.languageCode ?? 'unknown').toLowerCase(),
|
|
260
|
+
name: t.name?.simpleText ?? t.name?.runs?.[0]?.text ?? t.languageCode ?? '',
|
|
261
|
+
isAutoGenerated: (t.kind === 'asr') ||
|
|
262
|
+
(t.vssId?.startsWith('a.') ?? false) ||
|
|
263
|
+
String(t.name?.simpleText ?? '').toLowerCase().includes('auto') ||
|
|
264
|
+
false,
|
|
265
|
+
})).filter(t => t.baseUrl);
|
|
266
|
+
}
|
|
267
|
+
catch {
|
|
268
|
+
return [];
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Pick the best caption track for the requested language.
|
|
273
|
+
* Priority: manual track in preferred language > auto-generated in preferred language > any manual > any
|
|
274
|
+
*/
|
|
275
|
+
function selectBestTrack(tracks, preferredLang) {
|
|
276
|
+
const lang = preferredLang.toLowerCase().split('-')[0]; // "en-US" → "en"
|
|
277
|
+
// 1. Manual in preferred language
|
|
278
|
+
const manualPref = tracks.find(t => !t.isAutoGenerated && t.languageCode.startsWith(lang));
|
|
279
|
+
if (manualPref)
|
|
280
|
+
return manualPref;
|
|
281
|
+
// 2. Auto-generated in preferred language
|
|
282
|
+
const autoPref = tracks.find(t => t.isAutoGenerated && t.languageCode.startsWith(lang));
|
|
283
|
+
if (autoPref)
|
|
284
|
+
return autoPref;
|
|
285
|
+
// 3. Any manual track
|
|
286
|
+
const anyManual = tracks.find(t => !t.isAutoGenerated);
|
|
287
|
+
if (anyManual)
|
|
288
|
+
return anyManual;
|
|
289
|
+
// 4. Fall back to first available
|
|
290
|
+
return tracks[0];
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Fetch the caption XML from YouTube's timedtext API.
|
|
294
|
+
*/
|
|
295
|
+
async function fetchCaptionXml(baseUrl) {
|
|
296
|
+
// Ensure we request plain text (not ASS format)
|
|
297
|
+
const url = new URL(baseUrl);
|
|
298
|
+
url.searchParams.set('fmt', 'srv3'); // srv3 is a clean XML format
|
|
299
|
+
// Some older tracks need fmt=xml
|
|
300
|
+
url.searchParams.delete('fmt');
|
|
301
|
+
const result = await simpleFetch(url.toString(), undefined, 15000);
|
|
302
|
+
return result.html;
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Parse YouTube caption XML into transcript segments.
|
|
306
|
+
*
|
|
307
|
+
* Format: <transcript><text start="0.5" dur="2.1">Hello & world</text>...</transcript>
|
|
308
|
+
*/
|
|
309
|
+
export function parseCaptionXml(xml) {
|
|
310
|
+
const segments = [];
|
|
311
|
+
// Match all <text> elements with their attributes
|
|
312
|
+
const textRegex = /<text\s+([^>]*)>([\s\S]*?)<\/text>/g;
|
|
313
|
+
let match;
|
|
314
|
+
while ((match = textRegex.exec(xml)) !== null) {
|
|
315
|
+
const attrs = match[1];
|
|
316
|
+
const rawText = match[2];
|
|
317
|
+
const start = parseFloat(extractAttr(attrs, 'start') ?? '0');
|
|
318
|
+
const duration = parseFloat(extractAttr(attrs, 'dur') ?? '0');
|
|
319
|
+
const text = decodeHtmlEntities(rawText.trim());
|
|
320
|
+
if (text) {
|
|
321
|
+
segments.push({ text, start, duration });
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return segments;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Extract an attribute value from an HTML/XML attribute string.
|
|
328
|
+
*/
|
|
329
|
+
function extractAttr(attrs, name) {
|
|
330
|
+
const regex = new RegExp(`${name}="([^"]*)"`, 'i');
|
|
331
|
+
const m = attrs.match(regex);
|
|
332
|
+
return m ? m[1] : null;
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Decode common HTML entities found in YouTube caption XML.
|
|
336
|
+
*
|
|
337
|
+
* Order of operations:
|
|
338
|
+
* 1. Strip real HTML tags (e.g. <font color="...">) — these appear literally in the XML
|
|
339
|
+
* 2. Decode all HTML entities (including < → < which represents literal angle brackets)
|
|
340
|
+
*/
|
|
341
|
+
export function decodeHtmlEntities(text) {
|
|
342
|
+
return text
|
|
343
|
+
// Step 1: strip real inline HTML tags (literal <...> in the text, not entities)
|
|
344
|
+
.replace(/<[^>]+>/g, '')
|
|
345
|
+
// Step 2: decode HTML entities
|
|
346
|
+
.replace(/</g, '<')
|
|
347
|
+
.replace(/>/g, '>')
|
|
348
|
+
.replace(/&/g, '&')
|
|
349
|
+
.replace(/"/g, '"')
|
|
350
|
+
.replace(/'/g, "'")
|
|
351
|
+
.replace(/'/g, "'")
|
|
352
|
+
.replace(/'/g, "'")
|
|
353
|
+
.replace(///g, '/')
|
|
354
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
|
|
355
|
+
.replace(/&#x([0-9A-Fa-f]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
|
|
356
|
+
.trim();
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Format seconds into MM:SS or HH:MM:SS.
|
|
360
|
+
*/
|
|
361
|
+
export function formatDuration(seconds) {
|
|
362
|
+
if (!seconds || isNaN(seconds))
|
|
363
|
+
return '0:00';
|
|
364
|
+
const h = Math.floor(seconds / 3600);
|
|
365
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
366
|
+
const s = Math.floor(seconds % 60);
|
|
367
|
+
if (h > 0) {
|
|
368
|
+
return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
|
|
369
|
+
}
|
|
370
|
+
return `${m}:${String(s).padStart(2, '0')}`;
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Extract a meta tag value from HTML (og:title, og:description, etc.)
|
|
374
|
+
*/
|
|
375
|
+
function extractMetaTag(html, property) {
|
|
376
|
+
const regex = new RegExp(`<meta[^>]+(?:property|name)=["']${property.replace(/:/g, '\\:')}["'][^>]+content=["']([^"']+)["']`, 'i');
|
|
377
|
+
const m = html.match(regex) ?? html.match(new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']${property.replace(/:/g, '\\:')}["']`, 'i'));
|
|
378
|
+
return m ? decodeHtmlEntities(m[1]) : null;
|
|
379
|
+
}
|
|
380
|
+
//# sourceMappingURL=youtube.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../src/core/youtube.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AA4C3C,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAEjD,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAErF,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QACxB,4BAA4B;QAC5B,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACxC,CAAC;IAED,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QAC3B,oBAAoB;QACpB,IAAI,MAAM,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAClE,MAAM,EAAE,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACxC,OAAO,EAAE,IAAI,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAED,kBAAkB;QAClB,IAAI,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC1C,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACzC,OAAO,EAAE,IAAI,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAED,mBAAmB;QACnB,IAAI,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3C,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACzC,OAAO,EAAE,IAAI,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAED,iCAAiC;QACjC,IAAI,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACtC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACzC,OAAO,EAAE,IAAI,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,EAAU;IAChC,OAAO,OAAO,EAAE,KAAK,QAAQ,IAAI,qBAAqB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAEnD,MAAM,YAAY,GAAG,cAAc,EAAE,YAAY,IAAI,EAAE,CAAC;IACxD,MAAM,WAAW,GAAG,cAAc,EAAE,WAAW,EAAE,yBAAyB,IAAI,EAAE,CAAC;IAEjF,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,IAAI,EAAE,CAAC;IAC3C,MAAM,KAAK,GACT,YAAY,CAAC,KAAK;QAClB,WAAW,CAAC,KAAK,EAAE,UAAU;QAC7B,cAAc,CAAC,IAAI,EAAE,UAAU,CAAC;QAChC,EAAE,CAAC;IACL,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,IAAI,WAAW,CAAC,gBAAgB,IAAI,EAAE,CAAC;IAC1E,MAAM,aAAa,GAAG,QAAQ,CAAC,YAAY,CAAC,aAAa,IAAI,WAAW,CAAC,aAAa,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;IACnG,MAAM,SAAS,GAAG,YAAY,CAAC,SAAS,IAAI,WAAW,CAAC,SAAS,IAAI,EAAE,CAAC;IACxE,MAAM,WAAW,GAAG,WAAW,CAAC,WAAW,IAAI,WAAW,CAAC,UAAU,IAAI,EAAE,CAAC;IAC5E,MAAM,WAAW,GACf,YAAY,CAAC,gBAAgB;QAC7B,WAAW,CAAC,WAAW,EAAE,UAAU;QACnC,cAAc,CAAC,IAAI,EAAE,gBAAgB,CAAC;QACtC,EAAE,CAAC;IACL,MAAM,SAAS,GACb,YAAY,CAAC,SAAS,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG;QACrD,WAAW,CAAC,SAAS,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG;QACpD,8BAA8B,OAAO,oBAAoB,CAAC;IAE5D,gDAAgD;IAChD,MAAM,SAAS,GAAG,YAAY,CAAC,SAAS,IAAI,EAAE,CAAC;IAE/C,OAAO;QACL,OAAO;QACP,KAAK;QACL,OAAO;QACP,WAAW;QACX,QAAQ,EAAE,cAAc,CAAC,aAAa,CAAC;QACvC,WAAW;QACX,SAAS;QACT,SAAS;QACT,SAAS;KACV,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,GAAW,EACX,UAAiC,EAAE;IAEnC,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAE/C,uBAAuB;IACvB,MAAM,QAAQ,GAAG,mCAAmC,OAAO,EAAE,CAAC;IAC9D,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;IAE9B,0BAA0B;IAC1B,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IACnD,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,EAAE,CAAC,CAAC;IAC5E,CAAC;IAED,qBAAqB;IACrB,MAAM,YAAY,GAAG,cAAc,CAAC,YAAY,IAAI,EAAE,CAAC;IACvD,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,IAAI,EAAE,CAAC;IACvC,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,IAAI,EAAE,CAAC;IAC1C,MAAM,aAAa,GAAG,QAAQ,CAAC,YAAY,CAAC,aAAa,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;IAEtE,yBAAyB;IACzB,MAAM,aAAa,GAAmB,oBAAoB,CAAC,cAAc,CAAC,CAAC;IAC3E,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,mCAAmC,OAAO,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,kBAAkB,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAElE,kFAAkF;IAClF,MAAM,aAAa,GAAG,eAAe,CAAC,aAAa,EAAE,aAAa,CAAC,CAAC;IAEpE,wBAAwB;IACxB,MAAM,UAAU,GAAG,MAAM,eAAe,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAEhE,iBAAiB;IACjB,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE7C,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAEjF,OAAO;QACL,OAAO;QACP,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,cAAc,CAAC,aAAa,CAAC;QACvC,QAAQ,EAAE,aAAa,CAAC,YAAY;QACpC,QAAQ;QACR,QAAQ;QACR,kBAAkB;KACnB,CAAC;AACJ,CAAC;AAaD;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,qDAAqD;IACrD,MAAM,QAAQ,GAAG;QACf,+CAA+C;QAC/C,oEAAoE;QACpE,6DAA6D;QAC7D,6DAA6D;KAC9D,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;YAAC,MAAM,CAAC;gBACP,0DAA0D;gBAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,yBAAyB,CAAC,CAAC;gBACtD,IAAI,KAAK,KAAK,CAAC,CAAC;oBAAE,SAAS;gBAC3B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;gBAC5C,IAAI,UAAU,KAAK,CAAC,CAAC;oBAAE,SAAS;gBAChC,MAAM,OAAO,GAAG,iBAAiB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;gBACpD,IAAI,OAAO,EAAE,CAAC;oBACZ,IAAI,CAAC;wBACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;oBAC7B,CAAC;oBAAC,MAAM,CAAC;wBACP,kCAAkC;oBACpC,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACnD,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,yCAAyC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QACrD,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;YACtB,MAAM,OAAO,GAAG,iBAAiB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;YACpD,IAAI,OAAO,EAAE,CAAC;gBACZ,IAAI,CAAC;oBACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC7B,CAAC;gBAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,GAAW,EAAE,KAAa;IACnD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAElB,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,GAAG,KAAK,CAAC;YACf,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,GAAG,IAAI,CAAC;YACd,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,QAAQ,GAAG,CAAC,QAAQ,CAAC;YACrB,SAAS;QACX,CAAC;QAED,IAAI,QAAQ;YAAE,SAAS;QAEvB,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG;YAAE,KAAK,EAAE,CAAC;aACjC,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YAClC,KAAK,EAAE,CAAC;YACR,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,cAAmC;IAC/D,IAAI,CAAC;QACH,MAAM,MAAM,GACV,cAAc,EAAE,QAAQ,EAAE,+BAA+B,EAAE,aAAa,CAAC;QAC3E,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,OAAO,EAAE,CAAC;QAEtC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC;YAC7B,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;YACxB,YAAY,EAAE,CAAC,CAAC,CAAC,YAAY,IAAI,SAAS,CAAC,CAAC,WAAW,EAAE;YACzD,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,UAAU,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,CAAC,CAAC,YAAY,IAAI,EAAE;YAC3E,eAAe,EACb,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC;gBAClB,CAAC,CAAC,CAAC,KAAK,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC;gBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,EAAE,UAAU,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC/D,KAAK;SACR,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,eAAe,CAAC,MAAsB,EAAE,aAAqB;IACpE,MAAM,IAAI,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB;IAEzE,kCAAkC;IAClC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC3F,IAAI,UAAU;QAAE,OAAO,UAAU,CAAC;IAElC,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IACxF,IAAI,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAE9B,sBAAsB;IACtB,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvD,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC;IAEhC,kCAAkC;IAClC,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe,CAAC,OAAe;IAC5C,gDAAgD;IAChD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC7B,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAE,6BAA6B;IACnE,iCAAiC;IACjC,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;IACnE,OAAO,MAAM,CAAC,IAAI,CAAC;AACrB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,MAAM,QAAQ,GAAwB,EAAE,CAAC;IAEzC,kDAAkD;IAClD,MAAM,SAAS,GAAG,qCAAqC,CAAC;IACxD,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEzB,MAAM,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC;QAC7D,MAAM,QAAQ,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,kBAAkB,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAEhD,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,KAAa,EAAE,IAAY;IAC9C,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,GAAG,IAAI,YAAY,EAAE,GAAG,CAAC,CAAC;IACnD,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC7B,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACzB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI;QACT,gFAAgF;SAC/E,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QACxB,+BAA+B;SAC9B,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;SAC1E,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;SAClF,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC;IAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;IACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;IAEnC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACV,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IAC5E,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;AAC9C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAY,EAAE,QAAgB;IACpD,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,mCAAmC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,mCAAmC,EACnG,GAAG,CACJ,CAAC;IACF,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CACvC,IAAI,MAAM,CAAC,gEAAgE,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,CACrH,CAAC;IACF,OAAO,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7C,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from './core/fetcher.js';
|
|
7
7
|
import type { PeelOptions, PeelResult } from './types.js';
|
|
8
8
|
export * from './types.js';
|
|
9
|
+
export { getDomainExtractor, extractDomainData, type DomainExtractResult, type DomainExtractor } from './core/domain-extractors.js';
|
|
9
10
|
export { crawl, type CrawlOptions, type CrawlResult, type CrawlProgress } from './core/crawler.js';
|
|
10
11
|
export { discoverSitemap, type SitemapUrl, type SitemapResult } from './core/sitemap.js';
|
|
11
12
|
export { mapDomain, type MapOptions, type MapResult } from './core/map.js';
|
|
@@ -23,11 +24,14 @@ export { RateGovernor, formatDuration, type RateConfig, type RateState, type Can
|
|
|
23
24
|
export { ApplicationTracker, type ApplicationRecord, type ApplicationFilter, type ApplicationStats, type ApplicationStatus, } from './core/application-tracker.js';
|
|
24
25
|
export { applyToJob, loadApplications, saveApplication, getApplicationsToday, updateApplicationStatus, type ApplyProfile, type ApplyOptions, type ApplyProgressEvent, type DetectedField, type ApplyResult, type ApplicationRecord as ApplyApplicationRecord, } from './core/apply.js';
|
|
25
26
|
export { extractListings, type ListingItem } from './core/extract-listings.js';
|
|
27
|
+
export { parseYouTubeUrl, extractVideoInfo, extractPlayerResponse, parseCaptionXml, decodeHtmlEntities, getYouTubeTranscript, type TranscriptSegment, type YouTubeTranscript, type YouTubeVideoInfo, } from './core/youtube.js';
|
|
26
28
|
export { formatTable } from './core/table-format.js';
|
|
27
29
|
export { findNextPageUrl } from './core/paginate.js';
|
|
28
30
|
export { distillToBudget, budgetListings, TOKENS_PER_LISTING_ITEM } from './core/budget.js';
|
|
29
31
|
export { watch, parseDuration, parseAssertion, type WatchOptions, type Assertion, type WatchCheckResult, type AssertionResult, } from './core/watch.js';
|
|
30
32
|
export { diffUrl, type DiffOptions, type DiffResult, type DiffChange, } from './core/diff.js';
|
|
33
|
+
export { extractReadableContent, type ReadabilityResult, type ReadabilityOptions } from './core/readability.js';
|
|
34
|
+
export { quickAnswer, type QuickAnswerOptions, type QuickAnswerResult } from './core/quick-answer.js';
|
|
31
35
|
/**
|
|
32
36
|
* Fetch and extract content from a URL
|
|
33
37
|
*
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AASnG,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAa,MAAM,YAAY,CAAC;AAErE,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,KAAK,mBAAmB,EAAE,KAAK,eAAe,EAAE,MAAM,6BAA6B,CAAC;AACpI,OAAO,EAAE,KAAK,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,KAAK,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,cAAc,EAAE,KAAK,YAAY,EAAE,KAAK,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACvH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,KAAK,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAClI,OAAO,EAAE,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,KAAK,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAClH,OAAO,EAAE,QAAQ,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,KAAK,gBAAgB,EAAE,KAAK,UAAU,EAAE,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7J,OAAO,EAAE,gBAAgB,EAAE,KAAK,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,gBAAgB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,cAAc,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,KAAK,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACvH,OAAO,EACL,YAAY,EACZ,cAAc,EACd,KAAK,UAAU,EACf,KAAK,SAAS,EACd,KAAK,cAAc,GACpB,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,kBAAkB,EAClB,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,gBAAgB,EACrB,KAAK,iBAAiB,GACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,eAAe,EACf,oBAAoB,EACpB,uBAAuB,EACvB,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,kBAAkB,EACvB,KAAK,aAAa,EAClB,KAAK,WAAW,EAChB,KAAK,iBAAiB,IAAI,sBAAsB,GACjD,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,eAAe,EAAE,KAAK,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC/E,OAAO,EACL,eAAe,EACf,gBAAgB,EAChB,qBAAqB,EACrB,eAAe,EACf,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,gBAAgB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,uBAAuB,EAAE,MAAM,kBAAkB,CAAC;AAC5F,OAAO,EACL,KAAK,EACL,aAAa,EACb,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,SAAS,EACd,KAAK,gBAAgB,EACrB,KAAK,eAAe,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,OAAO,EACP,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,UAAU,GAChB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,sBAAsB,EAAE,KAAK,iBAAiB,EAAE,KAAK,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAChH,OAAO,EAAE,WAAW,EAAE,KAAK,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAEtG;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CA4etF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,WAAW,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACnD,OAAO,CAAC,CAAC,UAAU,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,CAAC,CAwB1D;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,mBAAmB,EAAE,CAAC;AAC1E,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAChF,OAAO,EACL,qBAAqB,EACrB,WAAW,EACX,qBAAqB,GACtB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,UAAU,EACV,cAAc,EACd,SAAS,EACT,YAAY,EACZ,SAAS,EACT,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,oBAAoB,EACpB,aAAa,EACb,WAAW,EACX,eAAe,EACf,WAAW,EACX,KAAK,WAAW,GACjB,MAAM,iBAAiB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -13,7 +13,12 @@ import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from '
|
|
|
13
13
|
import { autoScroll as runAutoScroll } from './core/actions.js';
|
|
14
14
|
import { extractStructured } from './core/extract.js';
|
|
15
15
|
import { isPdfContentType, isDocxContentType, extractDocumentToFormat } from './core/documents.js';
|
|
16
|
+
import { parseYouTubeUrl, getYouTubeTranscript } from './core/youtube.js';
|
|
17
|
+
import { extractDomainData, getDomainExtractor } from './core/domain-extractors.js';
|
|
18
|
+
import { extractReadableContent } from './core/readability.js';
|
|
19
|
+
import { quickAnswer as runQuickAnswer } from './core/quick-answer.js';
|
|
16
20
|
export * from './types.js';
|
|
21
|
+
export { getDomainExtractor, extractDomainData } from './core/domain-extractors.js';
|
|
17
22
|
export { crawl } from './core/crawler.js';
|
|
18
23
|
export { discoverSitemap } from './core/sitemap.js';
|
|
19
24
|
export { mapDomain } from './core/map.js';
|
|
@@ -32,11 +37,14 @@ export { ApplicationTracker, } from './core/application-tracker.js';
|
|
|
32
37
|
export { applyToJob, loadApplications, saveApplication, getApplicationsToday, updateApplicationStatus, } from './core/apply.js';
|
|
33
38
|
// Human behavior exports — see bottom of file for full export
|
|
34
39
|
export { extractListings } from './core/extract-listings.js';
|
|
40
|
+
export { parseYouTubeUrl, extractVideoInfo, extractPlayerResponse, parseCaptionXml, decodeHtmlEntities, getYouTubeTranscript, } from './core/youtube.js';
|
|
35
41
|
export { formatTable } from './core/table-format.js';
|
|
36
42
|
export { findNextPageUrl } from './core/paginate.js';
|
|
37
43
|
export { distillToBudget, budgetListings, TOKENS_PER_LISTING_ITEM } from './core/budget.js';
|
|
38
44
|
export { watch, parseDuration, parseAssertion, } from './core/watch.js';
|
|
39
45
|
export { diffUrl, } from './core/diff.js';
|
|
46
|
+
export { extractReadableContent } from './core/readability.js';
|
|
47
|
+
export { quickAnswer } from './core/quick-answer.js';
|
|
40
48
|
/**
|
|
41
49
|
* Fetch and extract content from a URL
|
|
42
50
|
*
|
|
@@ -90,6 +98,59 @@ export async function peel(url, options = {}) {
|
|
|
90
98
|
if (autoScrollOpts) {
|
|
91
99
|
render = true;
|
|
92
100
|
}
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// YouTube special case — extract transcript instead of fetching the page HTML
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
const ytVideoId = parseYouTubeUrl(url);
|
|
105
|
+
if (ytVideoId) {
|
|
106
|
+
const ytStartTime = Date.now();
|
|
107
|
+
try {
|
|
108
|
+
const transcript = await getYouTubeTranscript(url, {
|
|
109
|
+
language: options.language ?? 'en',
|
|
110
|
+
});
|
|
111
|
+
// Build a clean markdown representation of the video + transcript
|
|
112
|
+
const videoInfoLines = [
|
|
113
|
+
`# ${transcript.title}`,
|
|
114
|
+
'',
|
|
115
|
+
`**Channel:** ${transcript.channel}`,
|
|
116
|
+
`**Duration:** ${transcript.duration}`,
|
|
117
|
+
`**Language:** ${transcript.language}`,
|
|
118
|
+
transcript.availableLanguages.length > 1
|
|
119
|
+
? `**Available Languages:** ${transcript.availableLanguages.join(', ')}`
|
|
120
|
+
: '',
|
|
121
|
+
'',
|
|
122
|
+
'## Transcript',
|
|
123
|
+
'',
|
|
124
|
+
transcript.fullText,
|
|
125
|
+
].filter(l => l !== undefined);
|
|
126
|
+
const videoInfoContent = videoInfoLines.join('\n');
|
|
127
|
+
const elapsed = Date.now() - ytStartTime;
|
|
128
|
+
const tokens = estimateTokens(videoInfoContent);
|
|
129
|
+
const fingerprint = createHash('sha256').update(videoInfoContent).digest('hex').slice(0, 16);
|
|
130
|
+
return {
|
|
131
|
+
url: `https://www.youtube.com/watch?v=${ytVideoId}`,
|
|
132
|
+
title: transcript.title,
|
|
133
|
+
content: videoInfoContent,
|
|
134
|
+
metadata: {
|
|
135
|
+
description: `YouTube video by ${transcript.channel}, duration ${transcript.duration}`,
|
|
136
|
+
author: transcript.channel,
|
|
137
|
+
},
|
|
138
|
+
links: [`https://www.youtube.com/watch?v=${ytVideoId}`],
|
|
139
|
+
tokens,
|
|
140
|
+
method: 'simple',
|
|
141
|
+
elapsed,
|
|
142
|
+
contentType: 'youtube',
|
|
143
|
+
quality: 1.0,
|
|
144
|
+
fingerprint,
|
|
145
|
+
extracted: undefined,
|
|
146
|
+
structured: transcript,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
catch (_ytError) {
|
|
150
|
+
// If transcript extraction fails (no captions, page changed, etc.),
|
|
151
|
+
// fall through to the normal HTML fetch pipeline below.
|
|
152
|
+
}
|
|
153
|
+
}
|
|
93
154
|
try {
|
|
94
155
|
// Fetch the page (keep browser open if branding extraction or autoScroll is needed)
|
|
95
156
|
const needsBranding = options.branding && render;
|
|
@@ -284,6 +345,20 @@ export async function peel(url, options = {}) {
|
|
|
284
345
|
links = [...new Set(found)];
|
|
285
346
|
quality = 1.0;
|
|
286
347
|
}
|
|
348
|
+
// Readability mode
|
|
349
|
+
let readabilityResult;
|
|
350
|
+
if (options.readable && isHTML && fetchResult.html) {
|
|
351
|
+
const readResult = extractReadableContent(fetchResult.html, fetchResult.url);
|
|
352
|
+
readabilityResult = readResult;
|
|
353
|
+
content = readResult.content;
|
|
354
|
+
metadata = {
|
|
355
|
+
...metadata,
|
|
356
|
+
title: readResult.title || metadata?.title,
|
|
357
|
+
author: readResult.author || undefined,
|
|
358
|
+
publishedDate: readResult.date || undefined,
|
|
359
|
+
};
|
|
360
|
+
title = readResult.title || title;
|
|
361
|
+
}
|
|
287
362
|
// Extract images if requested
|
|
288
363
|
let imagesList;
|
|
289
364
|
if (extractImagesFlag && isHTML) {
|
|
@@ -314,6 +389,31 @@ export async function peel(url, options = {}) {
|
|
|
314
389
|
format === 'text' ? 'text' : 'markdown';
|
|
315
390
|
content = distillToBudget(content, options.budget, budgetFormat);
|
|
316
391
|
}
|
|
392
|
+
// Domain-aware structured extraction (Twitter, Reddit, GitHub, HN)
|
|
393
|
+
// Fires when URL matches a known domain. Replaces content with clean markdown.
|
|
394
|
+
let domainData;
|
|
395
|
+
if (getDomainExtractor(fetchResult.url)) {
|
|
396
|
+
try {
|
|
397
|
+
const ddResult = await extractDomainData(fetchResult.html, fetchResult.url);
|
|
398
|
+
if (ddResult) {
|
|
399
|
+
domainData = ddResult;
|
|
400
|
+
content = ddResult.cleanContent;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
// Domain extraction failure is non-fatal; continue with normal content
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
// Quick answer (LLM-free)
|
|
408
|
+
let quickAnswerResult;
|
|
409
|
+
if (options.question && content) {
|
|
410
|
+
const qa = runQuickAnswer({
|
|
411
|
+
question: options.question,
|
|
412
|
+
content,
|
|
413
|
+
url: fetchResult.url,
|
|
414
|
+
});
|
|
415
|
+
quickAnswerResult = qa;
|
|
416
|
+
}
|
|
317
417
|
// Calculate elapsed time, tokens, and fingerprint
|
|
318
418
|
const elapsed = Date.now() - startTime;
|
|
319
419
|
const tokens = estimateTokens(content);
|
|
@@ -390,6 +490,9 @@ export async function peel(url, options = {}) {
|
|
|
390
490
|
summary: summaryText,
|
|
391
491
|
images: imagesList,
|
|
392
492
|
...(prunedPercent !== undefined ? { prunedPercent } : {}),
|
|
493
|
+
...(domainData !== undefined ? { domainData } : {}),
|
|
494
|
+
...(readabilityResult !== undefined ? { readability: readabilityResult } : {}),
|
|
495
|
+
...(quickAnswerResult !== undefined ? { quickAnswer: quickAnswerResult } : {}),
|
|
393
496
|
};
|
|
394
497
|
}
|
|
395
498
|
catch (error) {
|