@pagepocket/lighterceptor 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +137 -0
- package/dist/examples/aggregate-requests.d.ts +2 -0
- package/dist/examples/aggregate-requests.d.ts.map +1 -0
- package/dist/examples/aggregate-requests.js +34 -0
- package/dist/examples/aggregate-requests.js.map +1 -0
- package/dist/examples/basic-lighterceptor.d.ts +2 -0
- package/dist/examples/basic-lighterceptor.d.ts.map +1 -0
- package/dist/examples/basic-lighterceptor.js +43 -0
- package/dist/examples/basic-lighterceptor.js.map +1 -0
- package/dist/examples/custom-interceptor.d.ts +2 -0
- package/dist/examples/custom-interceptor.d.ts.map +1 -0
- package/dist/examples/custom-interceptor.js +57 -0
- package/dist/examples/custom-interceptor.js.map +1 -0
- package/dist/examples/real-world-moon.d.ts +2 -0
- package/dist/examples/real-world-moon.d.ts.map +1 -0
- package/dist/examples/real-world-moon.js +69 -0
- package/dist/examples/real-world-moon.js.map +1 -0
- package/dist/examples/recursive-crawl.d.ts +2 -0
- package/dist/examples/recursive-crawl.d.ts.map +1 -0
- package/dist/examples/recursive-crawl.js +87 -0
- package/dist/examples/recursive-crawl.js.map +1 -0
- package/dist/src/dom.d.ts +9 -0
- package/dist/src/dom.d.ts.map +1 -0
- package/dist/src/dom.js +608 -0
- package/dist/src/dom.js.map +1 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +3 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/lighterceptor.d.ts +40 -0
- package/dist/src/lighterceptor.d.ts.map +1 -0
- package/dist/src/lighterceptor.js +697 -0
- package/dist/src/lighterceptor.js.map +1 -0
- package/dist/src/resource-loader.d.ts +8 -0
- package/dist/src/resource-loader.d.ts.map +1 -0
- package/dist/src/resource-loader.js +43 -0
- package/dist/src/resource-loader.js.map +1 -0
- package/dist/src/types.d.ts +10 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/package.json +30 -0
|
@@ -0,0 +1,697 @@
|
|
|
1
|
+
import { createJSDOMWithInterceptor } from "./dom.js";
|
|
2
|
+
const DEFAULT_SETTLE_MS = 50;
|
|
3
|
+
export class Lighterceptor {
|
|
4
|
+
input;
|
|
5
|
+
options;
|
|
6
|
+
constructor(input, options = {}) {
|
|
7
|
+
this.input = input;
|
|
8
|
+
this.options = options;
|
|
9
|
+
}
|
|
10
|
+
async run() {
|
|
11
|
+
const requests = [];
|
|
12
|
+
const networkRecords = [];
|
|
13
|
+
const capturedAt = new Date().toISOString();
|
|
14
|
+
const settleTimeMs = this.options.settleTimeMs ?? DEFAULT_SETTLE_MS;
|
|
15
|
+
const recursive = this.options.recursion ?? false;
|
|
16
|
+
const requestOnly = this.options.requestOnly ?? false;
|
|
17
|
+
const baseUrl = this.options.baseUrl;
|
|
18
|
+
const pending = [];
|
|
19
|
+
const processed = new Set();
|
|
20
|
+
const resourceCache = new Map();
|
|
21
|
+
const responseCache = new Map();
|
|
22
|
+
const pendingNetwork = [];
|
|
23
|
+
const fetchWithCache = (url) => {
|
|
24
|
+
const existing = responseCache.get(url);
|
|
25
|
+
if (existing) {
|
|
26
|
+
return existing;
|
|
27
|
+
}
|
|
28
|
+
const promise = (async () => {
|
|
29
|
+
if (typeof fetch !== "function") {
|
|
30
|
+
return {
|
|
31
|
+
ok: false,
|
|
32
|
+
error: "fetch-unavailable"
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
try {
|
|
36
|
+
const response = await fetch(url);
|
|
37
|
+
const cloned = response.clone();
|
|
38
|
+
const [buffer, text] = await Promise.all([response.arrayBuffer(), cloned.text()]);
|
|
39
|
+
const headers = {};
|
|
40
|
+
response.headers.forEach((value, key) => {
|
|
41
|
+
headers[key] = value;
|
|
42
|
+
});
|
|
43
|
+
const bodyEncoding = resolveBodyEncoding(response.headers.get("content-type") ?? undefined);
|
|
44
|
+
const body = bodyEncoding === "base64"
|
|
45
|
+
? Buffer.from(buffer).toString("base64")
|
|
46
|
+
: decodeText(Buffer.from(buffer), response.headers.get("content-type") ?? undefined, text);
|
|
47
|
+
const responseRecord = {
|
|
48
|
+
status: response.status,
|
|
49
|
+
statusText: response.statusText,
|
|
50
|
+
headers,
|
|
51
|
+
body,
|
|
52
|
+
bodyEncoding
|
|
53
|
+
};
|
|
54
|
+
return {
|
|
55
|
+
ok: response.ok,
|
|
56
|
+
response: responseRecord,
|
|
57
|
+
contentType: response.headers.get("content-type") ?? undefined,
|
|
58
|
+
text: bodyEncoding === "text" ? body : text,
|
|
59
|
+
buffer: Buffer.from(buffer)
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
return {
|
|
64
|
+
ok: false,
|
|
65
|
+
error: error instanceof Error ? error.message : String(error)
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
})();
|
|
69
|
+
responseCache.set(url, promise);
|
|
70
|
+
return promise;
|
|
71
|
+
};
|
|
72
|
+
const recordNetwork = (url, source) => {
|
|
73
|
+
if (requestOnly || isSkippableUrl(url)) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
const task = fetchWithCache(url).then((result) => {
|
|
77
|
+
const record = {
|
|
78
|
+
url,
|
|
79
|
+
source,
|
|
80
|
+
method: "GET",
|
|
81
|
+
timestamp: Date.now()
|
|
82
|
+
};
|
|
83
|
+
if (result.response) {
|
|
84
|
+
record.response = result.response;
|
|
85
|
+
}
|
|
86
|
+
if (!result.ok) {
|
|
87
|
+
record.error = result.error ?? "request-failed";
|
|
88
|
+
}
|
|
89
|
+
networkRecords.push(record);
|
|
90
|
+
});
|
|
91
|
+
pendingNetwork.push(task);
|
|
92
|
+
};
|
|
93
|
+
const recordUrl = (url, source, baseUrl) => {
|
|
94
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
95
|
+
if (!resolved) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
requests.push({
|
|
99
|
+
url: resolved,
|
|
100
|
+
source,
|
|
101
|
+
timestamp: Date.now()
|
|
102
|
+
});
|
|
103
|
+
recordNetwork(resolved, source);
|
|
104
|
+
};
|
|
105
|
+
const enqueue = (url, kind) => {
|
|
106
|
+
if (!recursive || isSkippableUrl(url)) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
if (processed.has(url)) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
processed.add(url);
|
|
113
|
+
pending.push({ url, kind });
|
|
114
|
+
};
|
|
115
|
+
const recordCssUrls = (cssText, baseUrl) => {
|
|
116
|
+
const { imports, urls } = extractCssDependencies(cssText);
|
|
117
|
+
for (const url of imports) {
|
|
118
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
119
|
+
if (!resolved) {
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
recordUrl(resolved, "css");
|
|
123
|
+
enqueue(resolved, "css");
|
|
124
|
+
}
|
|
125
|
+
for (const url of urls) {
|
|
126
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
127
|
+
if (!resolved) {
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
recordUrl(resolved, "css");
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
const analyzeJs = (jsText, baseUrl) => {
|
|
134
|
+
const { fetches, imports, importScripts, xhrs } = extractJsDependencies(jsText);
|
|
135
|
+
for (const url of imports) {
|
|
136
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
137
|
+
if (!resolved) {
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
recordUrl(resolved, "resource");
|
|
141
|
+
enqueue(resolved, inferResourceKindFromUrl(resolved) ?? "js");
|
|
142
|
+
}
|
|
143
|
+
for (const url of importScripts) {
|
|
144
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
145
|
+
if (!resolved) {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
recordUrl(resolved, "resource");
|
|
149
|
+
enqueue(resolved, "js");
|
|
150
|
+
}
|
|
151
|
+
for (const url of fetches) {
|
|
152
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
153
|
+
if (!resolved) {
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
recordUrl(resolved, "fetch");
|
|
157
|
+
enqueue(resolved, inferResourceKindFromUrl(resolved));
|
|
158
|
+
}
|
|
159
|
+
for (const url of xhrs) {
|
|
160
|
+
const resolved = resolveUrl(baseUrl, url);
|
|
161
|
+
if (!resolved) {
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
recordUrl(resolved, "xhr");
|
|
165
|
+
enqueue(resolved, inferResourceKindFromUrl(resolved));
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
const analyzeHtml = async (htmlText, baseUrl, captureTitle = false) => {
|
|
169
|
+
const dom = createJSDOMWithInterceptor({
|
|
170
|
+
html: htmlText,
|
|
171
|
+
domOptions: {
|
|
172
|
+
pretendToBeVisual: true,
|
|
173
|
+
runScripts: "dangerously",
|
|
174
|
+
url: baseUrl,
|
|
175
|
+
beforeParse(window) {
|
|
176
|
+
const createStubResponse = (url) => {
|
|
177
|
+
const normalizedUrl = url.toLowerCase();
|
|
178
|
+
const bodyText = normalizedUrl.endsWith("/figma/manifest.json")
|
|
179
|
+
? JSON.stringify({ figures: [], svgs: [] })
|
|
180
|
+
: normalizedUrl.includes("/features/") && normalizedUrl.endsWith(".json")
|
|
181
|
+
? JSON.stringify({
|
|
182
|
+
isDead: true,
|
|
183
|
+
statistics: {},
|
|
184
|
+
examples_quantiles: []
|
|
185
|
+
})
|
|
186
|
+
: "";
|
|
187
|
+
const encoder = typeof TextEncoder === "function" ? new TextEncoder() : undefined;
|
|
188
|
+
const buffer = encoder ? encoder.encode(bodyText).buffer : new ArrayBuffer(0);
|
|
189
|
+
const headers = typeof window.Headers === "function"
|
|
190
|
+
? new window.Headers()
|
|
191
|
+
: {
|
|
192
|
+
append: () => { },
|
|
193
|
+
delete: () => { },
|
|
194
|
+
get: () => null,
|
|
195
|
+
getSetCookie: () => [],
|
|
196
|
+
has: () => false,
|
|
197
|
+
set: () => { },
|
|
198
|
+
forEach: () => { },
|
|
199
|
+
keys: () => [][Symbol.iterator](),
|
|
200
|
+
values: () => [][Symbol.iterator](),
|
|
201
|
+
entries: () => [][Symbol.iterator](),
|
|
202
|
+
[Symbol.iterator]: () => [][Symbol.iterator]()
|
|
203
|
+
};
|
|
204
|
+
const responseUrl = url;
|
|
205
|
+
const response = {
|
|
206
|
+
ok: true,
|
|
207
|
+
status: 200,
|
|
208
|
+
statusText: "OK",
|
|
209
|
+
headers,
|
|
210
|
+
json: async () => {
|
|
211
|
+
if (!bodyText) {
|
|
212
|
+
return {};
|
|
213
|
+
}
|
|
214
|
+
try {
|
|
215
|
+
return JSON.parse(bodyText);
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
return {};
|
|
219
|
+
}
|
|
220
|
+
},
|
|
221
|
+
text: async () => bodyText,
|
|
222
|
+
arrayBuffer: async () => buffer,
|
|
223
|
+
clone: () => createStubResponse(responseUrl)
|
|
224
|
+
};
|
|
225
|
+
return response;
|
|
226
|
+
};
|
|
227
|
+
window.fetch = ((input) => {
|
|
228
|
+
let url = "";
|
|
229
|
+
if (typeof input === "string") {
|
|
230
|
+
url = input;
|
|
231
|
+
}
|
|
232
|
+
else if (input instanceof URL) {
|
|
233
|
+
url = input.toString();
|
|
234
|
+
}
|
|
235
|
+
else if ("url" in input) {
|
|
236
|
+
url = String(input.url);
|
|
237
|
+
}
|
|
238
|
+
return Promise.resolve(createStubResponse(url));
|
|
239
|
+
});
|
|
240
|
+
window.XMLHttpRequest.prototype.send = function send() { };
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
interceptor: async (url, options) => {
|
|
244
|
+
const resolved = resolveUrl(options.referrer, url);
|
|
245
|
+
if (!resolved) {
|
|
246
|
+
return Buffer.from("");
|
|
247
|
+
}
|
|
248
|
+
const source = options.source ?? "unknown";
|
|
249
|
+
recordUrl(resolved, source);
|
|
250
|
+
const element = options.element;
|
|
251
|
+
const tagName = element?.tagName?.toLowerCase();
|
|
252
|
+
if (recursive && tagName === "script") {
|
|
253
|
+
const result = await fetchWithCache(resolved);
|
|
254
|
+
if (result.ok && result.buffer) {
|
|
255
|
+
return result.buffer;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
if (recursive) {
|
|
259
|
+
if (source === "fetch" || source === "xhr") {
|
|
260
|
+
enqueue(resolved, inferResourceKindFromUrl(resolved));
|
|
261
|
+
}
|
|
262
|
+
else if (source === "resource") {
|
|
263
|
+
const kind = inferKindFromElement(options.element);
|
|
264
|
+
if (kind) {
|
|
265
|
+
enqueue(resolved, kind);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
return Buffer.from("");
|
|
270
|
+
}
|
|
271
|
+
});
|
|
272
|
+
const { document } = dom.window;
|
|
273
|
+
document.querySelectorAll("img").forEach((img) => {
|
|
274
|
+
if (img instanceof dom.window.HTMLImageElement && img.src) {
|
|
275
|
+
recordUrl(img.src, "img", baseUrl);
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
document.querySelectorAll("img[srcset]").forEach((img) => {
|
|
279
|
+
if (!(img instanceof dom.window.HTMLImageElement)) {
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
const srcset = img.getAttribute("srcset");
|
|
283
|
+
if (!srcset) {
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
for (const url of parseSrcsetUrls(srcset)) {
|
|
287
|
+
recordUrl(url, "img", baseUrl);
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
document.querySelectorAll("source[src]").forEach((source) => {
|
|
291
|
+
const src = source.getAttribute("src");
|
|
292
|
+
if (src) {
|
|
293
|
+
recordUrl(src, "resource", baseUrl);
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
document.querySelectorAll("source[srcset]").forEach((source) => {
|
|
297
|
+
const srcset = source.getAttribute("srcset");
|
|
298
|
+
if (srcset) {
|
|
299
|
+
for (const url of parseSrcsetUrls(srcset)) {
|
|
300
|
+
recordUrl(url, "resource", baseUrl);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
});
|
|
304
|
+
document.querySelectorAll("script[src]").forEach((script) => {
|
|
305
|
+
if (script instanceof dom.window.HTMLScriptElement && script.src) {
|
|
306
|
+
recordUrl(script.src, "resource", baseUrl);
|
|
307
|
+
enqueue(script.src, "js");
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
document.querySelectorAll("iframe[src]").forEach((iframe) => {
|
|
311
|
+
if (iframe instanceof dom.window.HTMLIFrameElement && iframe.src) {
|
|
312
|
+
recordUrl(iframe.src, "resource", baseUrl);
|
|
313
|
+
enqueue(iframe.src, "html");
|
|
314
|
+
}
|
|
315
|
+
});
|
|
316
|
+
document.querySelectorAll("video[src], audio[src]").forEach((media) => {
|
|
317
|
+
const src = media.getAttribute("src");
|
|
318
|
+
if (src) {
|
|
319
|
+
recordUrl(src, "resource", baseUrl);
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
document.querySelectorAll("video[poster]").forEach((video) => {
|
|
323
|
+
const poster = video.getAttribute("poster");
|
|
324
|
+
if (poster) {
|
|
325
|
+
recordUrl(poster, "resource", baseUrl);
|
|
326
|
+
}
|
|
327
|
+
});
|
|
328
|
+
document.querySelectorAll("track[src]").forEach((track) => {
|
|
329
|
+
const src = track.getAttribute("src");
|
|
330
|
+
if (src) {
|
|
331
|
+
recordUrl(src, "resource", baseUrl);
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
document.querySelectorAll("embed[src]").forEach((embed) => {
|
|
335
|
+
const src = embed.getAttribute("src");
|
|
336
|
+
if (src) {
|
|
337
|
+
recordUrl(src, "resource", baseUrl);
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
document.querySelectorAll("object[data]").forEach((object) => {
|
|
341
|
+
const data = object.getAttribute("data");
|
|
342
|
+
if (data) {
|
|
343
|
+
recordUrl(data, "resource", baseUrl);
|
|
344
|
+
}
|
|
345
|
+
});
|
|
346
|
+
document.querySelectorAll("[style]").forEach((element) => {
|
|
347
|
+
const cssText = element.getAttribute("style");
|
|
348
|
+
if (cssText) {
|
|
349
|
+
recordCssUrls(cssText, baseUrl);
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
document.querySelectorAll("style").forEach((style) => {
|
|
353
|
+
if (style.textContent) {
|
|
354
|
+
recordCssUrls(style.textContent, baseUrl);
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
document.querySelectorAll("link[rel]").forEach((link) => {
|
|
358
|
+
if (!(link instanceof dom.window.HTMLLinkElement)) {
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
const rel = link.getAttribute("rel") ?? "";
|
|
362
|
+
if (shouldInterceptLinkRel(rel)) {
|
|
363
|
+
const href = link.getAttribute("href") ?? link.href;
|
|
364
|
+
if (href) {
|
|
365
|
+
const resolvedHref = resolveUrl(baseUrl, href) ?? href;
|
|
366
|
+
recordUrl(resolvedHref, "resource");
|
|
367
|
+
if (rel.toLowerCase().includes("stylesheet")) {
|
|
368
|
+
enqueue(resolvedHref, "css");
|
|
369
|
+
}
|
|
370
|
+
else if (rel.toLowerCase().includes("preload")) {
|
|
371
|
+
const kind = inferResourceKindFromUrl(resolvedHref);
|
|
372
|
+
if (kind) {
|
|
373
|
+
enqueue(resolvedHref, kind);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
if (rel.toLowerCase().includes("preload")) {
|
|
379
|
+
const imagesrcset = link.getAttribute("imagesrcset");
|
|
380
|
+
if (imagesrcset) {
|
|
381
|
+
for (const url of parseSrcsetUrls(imagesrcset)) {
|
|
382
|
+
recordUrl(url, "resource", baseUrl);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
});
|
|
387
|
+
await new Promise((resolve) => setTimeout(resolve, settleTimeMs));
|
|
388
|
+
if (captureTitle) {
|
|
389
|
+
return dom.window.document.title || undefined;
|
|
390
|
+
}
|
|
391
|
+
return undefined;
|
|
392
|
+
};
|
|
393
|
+
const loadResource = async (url) => {
|
|
394
|
+
const existing = resourceCache.get(url);
|
|
395
|
+
if (existing) {
|
|
396
|
+
return existing;
|
|
397
|
+
}
|
|
398
|
+
const loader = fetchWithCache(url).then((result) => {
|
|
399
|
+
if (!result.ok || !result.text) {
|
|
400
|
+
return null;
|
|
401
|
+
}
|
|
402
|
+
return {
|
|
403
|
+
text: result.text,
|
|
404
|
+
contentType: result.contentType,
|
|
405
|
+
buffer: result.buffer
|
|
406
|
+
};
|
|
407
|
+
});
|
|
408
|
+
resourceCache.set(url, loader);
|
|
409
|
+
return loader;
|
|
410
|
+
};
|
|
411
|
+
const processPending = async () => {
|
|
412
|
+
while (pending.length > 0) {
|
|
413
|
+
const next = pending.shift();
|
|
414
|
+
if (!next) {
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
const result = await loadResource(next.url);
|
|
418
|
+
if (!result) {
|
|
419
|
+
continue;
|
|
420
|
+
}
|
|
421
|
+
const kind = next.kind ?? detectResourceKind(next.url, result.contentType, result.text);
|
|
422
|
+
if (!kind) {
|
|
423
|
+
continue;
|
|
424
|
+
}
|
|
425
|
+
if (kind === "html") {
|
|
426
|
+
await analyzeHtml(result.text, next.url);
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
if (kind === "css") {
|
|
430
|
+
recordCssUrls(result.text, next.url);
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
analyzeJs(result.text, next.url);
|
|
434
|
+
}
|
|
435
|
+
};
|
|
436
|
+
const initialUrl = parseAbsoluteUrl(this.input);
|
|
437
|
+
const effectiveBaseUrl = baseUrl ?? initialUrl ?? undefined;
|
|
438
|
+
let initialInput = this.input;
|
|
439
|
+
if (initialUrl) {
|
|
440
|
+
const result = await fetchWithCache(initialUrl);
|
|
441
|
+
if (!result.ok || !result.text) {
|
|
442
|
+
return {
|
|
443
|
+
title: undefined,
|
|
444
|
+
capturedAt,
|
|
445
|
+
requests,
|
|
446
|
+
networkRecords: requestOnly ? [] : networkRecords
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
initialInput = result.text;
|
|
450
|
+
}
|
|
451
|
+
const inputKind = detectInputKind(initialInput);
|
|
452
|
+
let title;
|
|
453
|
+
if (inputKind === "html") {
|
|
454
|
+
title = await analyzeHtml(initialInput, effectiveBaseUrl, true);
|
|
455
|
+
}
|
|
456
|
+
else if (inputKind === "css") {
|
|
457
|
+
recordCssUrls(initialInput, effectiveBaseUrl);
|
|
458
|
+
}
|
|
459
|
+
else {
|
|
460
|
+
analyzeJs(initialInput, effectiveBaseUrl);
|
|
461
|
+
}
|
|
462
|
+
await processPending();
|
|
463
|
+
await Promise.allSettled(pendingNetwork);
|
|
464
|
+
return {
|
|
465
|
+
title,
|
|
466
|
+
capturedAt,
|
|
467
|
+
requests,
|
|
468
|
+
networkRecords: requestOnly ? [] : networkRecords
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
function resolveUrl(baseUrl, url) {
|
|
473
|
+
if (!url) {
|
|
474
|
+
return undefined;
|
|
475
|
+
}
|
|
476
|
+
if (baseUrl) {
|
|
477
|
+
try {
|
|
478
|
+
return new URL(url, baseUrl).toString();
|
|
479
|
+
}
|
|
480
|
+
catch {
|
|
481
|
+
return url;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
try {
|
|
485
|
+
return new URL(url).toString();
|
|
486
|
+
}
|
|
487
|
+
catch {
|
|
488
|
+
return url;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
function parseAbsoluteUrl(value) {
|
|
492
|
+
try {
|
|
493
|
+
const parsed = new URL(value);
|
|
494
|
+
if (parsed.protocol === "http:" || parsed.protocol === "https:") {
|
|
495
|
+
return parsed.toString();
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
catch {
|
|
499
|
+
return undefined;
|
|
500
|
+
}
|
|
501
|
+
return undefined;
|
|
502
|
+
}
|
|
503
|
+
function isSkippableUrl(url) {
|
|
504
|
+
const lowered = url.toLowerCase();
|
|
505
|
+
return (lowered.startsWith("data:") || lowered.startsWith("javascript:") || lowered.startsWith("about:"));
|
|
506
|
+
}
|
|
507
|
+
function inferResourceKindFromUrl(url) {
|
|
508
|
+
const cleanUrl = url.split("?")[0].split("#")[0];
|
|
509
|
+
const extension = cleanUrl.split(".").pop()?.toLowerCase();
|
|
510
|
+
if (!extension) {
|
|
511
|
+
return undefined;
|
|
512
|
+
}
|
|
513
|
+
if (extension === "html" || extension === "htm") {
|
|
514
|
+
return "html";
|
|
515
|
+
}
|
|
516
|
+
if (extension === "css") {
|
|
517
|
+
return "css";
|
|
518
|
+
}
|
|
519
|
+
if (extension === "js" || extension === "mjs" || extension === "cjs") {
|
|
520
|
+
return "js";
|
|
521
|
+
}
|
|
522
|
+
return undefined;
|
|
523
|
+
}
|
|
524
|
+
function detectResourceKind(url, contentType, text) {
|
|
525
|
+
const normalized = contentType?.toLowerCase() ?? "";
|
|
526
|
+
if (normalized.includes("text/html")) {
|
|
527
|
+
return "html";
|
|
528
|
+
}
|
|
529
|
+
if (normalized.includes("text/css")) {
|
|
530
|
+
return "css";
|
|
531
|
+
}
|
|
532
|
+
if (normalized.includes("javascript")) {
|
|
533
|
+
return "js";
|
|
534
|
+
}
|
|
535
|
+
const inferred = inferResourceKindFromUrl(url);
|
|
536
|
+
if (inferred) {
|
|
537
|
+
return inferred;
|
|
538
|
+
}
|
|
539
|
+
const trimmed = text.trimStart();
|
|
540
|
+
if (trimmed.startsWith("<!doctype") || trimmed.startsWith("<html")) {
|
|
541
|
+
return "html";
|
|
542
|
+
}
|
|
543
|
+
if (trimmed.startsWith("<")) {
|
|
544
|
+
return "html";
|
|
545
|
+
}
|
|
546
|
+
if (trimmed.startsWith("@") || trimmed.includes("url(")) {
|
|
547
|
+
return "css";
|
|
548
|
+
}
|
|
549
|
+
if (looksLikeJavaScript(trimmed)) {
|
|
550
|
+
return "js";
|
|
551
|
+
}
|
|
552
|
+
return undefined;
|
|
553
|
+
}
|
|
554
|
+
function detectInputKind(input) {
|
|
555
|
+
const trimmed = input.trimStart();
|
|
556
|
+
if (trimmed.startsWith("<")) {
|
|
557
|
+
return "html";
|
|
558
|
+
}
|
|
559
|
+
if (trimmed.startsWith("@") || trimmed.includes("url(")) {
|
|
560
|
+
return "css";
|
|
561
|
+
}
|
|
562
|
+
return "js";
|
|
563
|
+
}
|
|
564
|
+
function looksLikeJavaScript(text) {
|
|
565
|
+
return (/\b(import|export)\b/.test(text) ||
|
|
566
|
+
/\b(const|let|var|function)\b/.test(text) ||
|
|
567
|
+
/\bfetch\s*\(/.test(text) ||
|
|
568
|
+
/\bXMLHttpRequest\b/.test(text) ||
|
|
569
|
+
/\bimportScripts\s*\(/.test(text));
|
|
570
|
+
}
|
|
571
|
+
function inferKindFromElement(element) {
|
|
572
|
+
if (!element || typeof element !== "object") {
|
|
573
|
+
return undefined;
|
|
574
|
+
}
|
|
575
|
+
const tagName = "tagName" in element && typeof element.tagName === "string"
|
|
576
|
+
? element.tagName.toLowerCase()
|
|
577
|
+
: "";
|
|
578
|
+
if (tagName === "script") {
|
|
579
|
+
return "js";
|
|
580
|
+
}
|
|
581
|
+
if (tagName === "iframe") {
|
|
582
|
+
return "html";
|
|
583
|
+
}
|
|
584
|
+
if (tagName === "link" && "getAttribute" in element) {
|
|
585
|
+
const rel = String(element.getAttribute("rel") ?? "").toLowerCase();
|
|
586
|
+
const asValue = String(element.getAttribute("as") ?? "").toLowerCase();
|
|
587
|
+
if (rel.includes("stylesheet")) {
|
|
588
|
+
return "css";
|
|
589
|
+
}
|
|
590
|
+
if (rel.includes("preload") || rel.includes("prefetch")) {
|
|
591
|
+
if (asValue === "style") {
|
|
592
|
+
return "css";
|
|
593
|
+
}
|
|
594
|
+
if (asValue === "script") {
|
|
595
|
+
return "js";
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
return undefined;
|
|
600
|
+
}
|
|
601
|
+
function extractCssDependencies(cssText) {
|
|
602
|
+
const imports = [];
|
|
603
|
+
const urls = [];
|
|
604
|
+
const urlPattern = /url\(\s*(['"]?)(.*?)\1\s*\)/gi;
|
|
605
|
+
const importPattern = /@import\s+(?:url\(\s*)?(['"]?)([^'")\s]+)\1\s*\)?/gi;
|
|
606
|
+
let match;
|
|
607
|
+
while ((match = urlPattern.exec(cssText)) !== null) {
|
|
608
|
+
const url = match[2].trim();
|
|
609
|
+
if (url.length > 0) {
|
|
610
|
+
urls.push(url);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
while ((match = importPattern.exec(cssText)) !== null) {
|
|
614
|
+
const url = match[2].trim();
|
|
615
|
+
if (url.length > 0) {
|
|
616
|
+
imports.push(url);
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
return { imports, urls };
|
|
620
|
+
}
|
|
621
|
+
function extractJsDependencies(jsText) {
|
|
622
|
+
const imports = new Set();
|
|
623
|
+
const importScripts = new Set();
|
|
624
|
+
const fetches = new Set();
|
|
625
|
+
const xhrs = new Set();
|
|
626
|
+
const importPattern = /\bimport\s+(?:[^'"]+from\s+)?['"]([^'"]+)['"]/g;
|
|
627
|
+
const dynamicImportPattern = /\bimport\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
628
|
+
const importScriptsPattern = /\bimportScripts\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
629
|
+
const fetchPattern = /\bfetch\(\s*['"]([^'"]+)['"]/g;
|
|
630
|
+
const xhrPattern = /\.open\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]/g;
|
|
631
|
+
let match;
|
|
632
|
+
while ((match = importPattern.exec(jsText)) !== null) {
|
|
633
|
+
imports.add(match[1]);
|
|
634
|
+
}
|
|
635
|
+
while ((match = dynamicImportPattern.exec(jsText)) !== null) {
|
|
636
|
+
imports.add(match[1]);
|
|
637
|
+
}
|
|
638
|
+
while ((match = importScriptsPattern.exec(jsText)) !== null) {
|
|
639
|
+
importScripts.add(match[1]);
|
|
640
|
+
}
|
|
641
|
+
while ((match = fetchPattern.exec(jsText)) !== null) {
|
|
642
|
+
fetches.add(match[1]);
|
|
643
|
+
}
|
|
644
|
+
while ((match = xhrPattern.exec(jsText)) !== null) {
|
|
645
|
+
xhrs.add(match[1]);
|
|
646
|
+
}
|
|
647
|
+
return {
|
|
648
|
+
imports: [...imports],
|
|
649
|
+
importScripts: [...importScripts],
|
|
650
|
+
fetches: [...fetches],
|
|
651
|
+
xhrs: [...xhrs]
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
function parseSrcsetUrls(value) {
|
|
655
|
+
return value
|
|
656
|
+
.split(",")
|
|
657
|
+
.map((candidate) => candidate.trim().split(/\s+/)[0])
|
|
658
|
+
.filter((url) => url.length > 0);
|
|
659
|
+
}
|
|
660
|
+
function shouldInterceptLinkRel(rel) {
|
|
661
|
+
const normalized = rel.toLowerCase();
|
|
662
|
+
return (normalized.includes("preload") ||
|
|
663
|
+
normalized.includes("prefetch") ||
|
|
664
|
+
normalized.includes("stylesheet") ||
|
|
665
|
+
normalized.includes("icon"));
|
|
666
|
+
}
|
|
667
|
+
function resolveBodyEncoding(contentType) {
|
|
668
|
+
if (!contentType) {
|
|
669
|
+
return "text";
|
|
670
|
+
}
|
|
671
|
+
const normalized = contentType.toLowerCase();
|
|
672
|
+
if (normalized.startsWith("text/") ||
|
|
673
|
+
normalized.includes("json") ||
|
|
674
|
+
normalized.includes("xml") ||
|
|
675
|
+
normalized.includes("javascript") ||
|
|
676
|
+
normalized.includes("svg")) {
|
|
677
|
+
return "text";
|
|
678
|
+
}
|
|
679
|
+
return "base64";
|
|
680
|
+
}
|
|
681
|
+
function decodeText(buffer, contentType, fallback) {
|
|
682
|
+
const charset = contentType
|
|
683
|
+
?.toLowerCase()
|
|
684
|
+
.match(/charset=([^;]+)/)?.[1]
|
|
685
|
+
?.trim();
|
|
686
|
+
if (!charset) {
|
|
687
|
+
return fallback;
|
|
688
|
+
}
|
|
689
|
+
try {
|
|
690
|
+
const decoder = new TextDecoder(charset);
|
|
691
|
+
return decoder.decode(buffer);
|
|
692
|
+
}
|
|
693
|
+
catch {
|
|
694
|
+
return fallback;
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
//# sourceMappingURL=lighterceptor.js.map
|