@pagepocket/cli 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +11 -1
- package/package.json +5 -4
- package/dist/lib/content-type.js +0 -36
- package/dist/lib/css-rewrite.js +0 -62
- package/dist/lib/hackers/capture-network.js +0 -64
- package/dist/lib/hackers/index.js +0 -22
- package/dist/lib/hackers/preload-fetch.js +0 -56
- package/dist/lib/hackers/preload-image.js +0 -61
- package/dist/lib/hackers/preload-xhr.js +0 -59
- package/dist/lib/hackers/replay-beacon.js +0 -21
- package/dist/lib/hackers/replay-dom-rewrite.js +0 -295
- package/dist/lib/hackers/replay-eventsource.js +0 -25
- package/dist/lib/hackers/replay-fetch.js +0 -33
- package/dist/lib/hackers/replay-image.js +0 -48
- package/dist/lib/hackers/replay-svg-image.js +0 -89
- package/dist/lib/hackers/replay-websocket.js +0 -26
- package/dist/lib/hackers/replay-xhr.js +0 -91
- package/dist/lib/hackers/types.js +0 -2
- package/dist/lib/network-records.js +0 -69
- package/dist/lib/replay-script.js +0 -346
- package/dist/lib/resources.js +0 -131
- package/dist/lib/stages/download.js +0 -61
- package/dist/lib/stages/index.js +0 -235
- package/dist/lib/stages/intercept.js +0 -23
- package/dist/lib/stages/trigger.js +0 -56
- package/dist/lib/stages/visit.js +0 -24
- package/dist/lib/types.js +0 -2
- package/dist/preload.js +0 -60
- package/dist/stages/build-snapshot-data.js +0 -14
- package/dist/stages/build-snapshot.js +0 -30
- package/dist/stages/capture-network.js +0 -19
- package/dist/stages/download-resources.js +0 -48
- package/dist/stages/fetch-html.js +0 -69
- package/dist/stages/rewrite-links.js +0 -145
|
@@ -1,346 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildReplayScript = void 0;
|
|
4
|
-
const hackers_1 = require("./hackers");
|
|
5
|
-
const buildReplayScript = (requestsPath, baseUrl) => {
|
|
6
|
-
const basePayload = JSON.stringify(baseUrl);
|
|
7
|
-
const requestsPayload = JSON.stringify(requestsPath);
|
|
8
|
-
const context = { stage: "replay" };
|
|
9
|
-
const hackerScripts = hackers_1.replayHackers
|
|
10
|
-
.map((hacker) => ` // hacker:${hacker.id}\n${hacker.build(context)}`)
|
|
11
|
-
.join("\n");
|
|
12
|
-
return `
|
|
13
|
-
<script>
|
|
14
|
-
(function(){
|
|
15
|
-
// Load the snapshot metadata before patching runtime APIs.
|
|
16
|
-
const baseUrl = ${basePayload};
|
|
17
|
-
const requestsUrl = ${requestsPayload};
|
|
18
|
-
const __pagepocketOriginalFetch = window.fetch ? window.fetch.bind(window) : null;
|
|
19
|
-
|
|
20
|
-
const loadSnapshot = async () => {
|
|
21
|
-
try {
|
|
22
|
-
if (!__pagepocketOriginalFetch) {
|
|
23
|
-
throw new Error("Fetch is unavailable");
|
|
24
|
-
}
|
|
25
|
-
const response = await __pagepocketOriginalFetch(requestsUrl);
|
|
26
|
-
if (!response.ok) {
|
|
27
|
-
throw new Error("Failed to load snapshot metadata");
|
|
28
|
-
}
|
|
29
|
-
return await response.json();
|
|
30
|
-
} catch {
|
|
31
|
-
return {
|
|
32
|
-
url: baseUrl,
|
|
33
|
-
title: "",
|
|
34
|
-
capturedAt: "",
|
|
35
|
-
fetchXhrRecords: [],
|
|
36
|
-
networkRecords: [],
|
|
37
|
-
resources: []
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
// Soften JSON parse failures to avoid halting replay flows.
|
|
43
|
-
const originalResponseJson = Response && Response.prototype && Response.prototype.json;
|
|
44
|
-
if (originalResponseJson) {
|
|
45
|
-
Response.prototype.json = function(...args) {
|
|
46
|
-
try {
|
|
47
|
-
return originalResponseJson.apply(this, args).catch(() => null);
|
|
48
|
-
} catch {
|
|
49
|
-
return Promise.resolve(null);
|
|
50
|
-
}
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Guard to reapply patches if overwritten later.
|
|
55
|
-
const ensureReplayPatches = () => {
|
|
56
|
-
try {
|
|
57
|
-
if (!window.fetch.__pagepocketOriginal && typeof __pagepocketOriginalFetch === "function") {
|
|
58
|
-
window.fetch.__pagepocketOriginal = __pagepocketOriginalFetch;
|
|
59
|
-
}
|
|
60
|
-
} catch {}
|
|
61
|
-
try {
|
|
62
|
-
if (!XMLHttpRequest.prototype.send.__pagepocketOriginal) {
|
|
63
|
-
XMLHttpRequest.prototype.send.__pagepocketOriginal = XMLHttpRequest.prototype.send;
|
|
64
|
-
}
|
|
65
|
-
} catch {}
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
let records = [];
|
|
69
|
-
let networkRecords = [];
|
|
70
|
-
const byKey = new Map();
|
|
71
|
-
|
|
72
|
-
const localResourceSet = new Set();
|
|
73
|
-
const resourceUrlMap = new Map();
|
|
74
|
-
|
|
75
|
-
const normalizeUrl = (input) => {
|
|
76
|
-
try { return new URL(input, baseUrl).toString(); } catch { return input; }
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
let baseOrigin = "";
|
|
80
|
-
let baseDir = "";
|
|
81
|
-
try {
|
|
82
|
-
const parsedBase = new URL(baseUrl);
|
|
83
|
-
baseOrigin = parsedBase.origin;
|
|
84
|
-
baseDir = new URL(".", parsedBase).toString().replace(/\\/$/, "");
|
|
85
|
-
} catch {}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
const expandUrlVariants = (value) => {
|
|
89
|
-
const variants = [];
|
|
90
|
-
if (typeof value === "string") {
|
|
91
|
-
variants.push(value);
|
|
92
|
-
variants.push(normalizeUrl(value));
|
|
93
|
-
if (baseOrigin && value.startsWith("/")) {
|
|
94
|
-
variants.push(baseOrigin + value);
|
|
95
|
-
if (baseDir) variants.push(baseDir + value);
|
|
96
|
-
} else if (baseDir) {
|
|
97
|
-
variants.push(baseDir + (value.startsWith("/") ? value : "/" + value));
|
|
98
|
-
}
|
|
99
|
-
try {
|
|
100
|
-
const parsed = new URL(value, baseUrl);
|
|
101
|
-
const pathWithSearch = (parsed.pathname || "") + (parsed.search || "");
|
|
102
|
-
if (baseOrigin && parsed.origin !== baseOrigin) {
|
|
103
|
-
variants.push(baseOrigin + pathWithSearch);
|
|
104
|
-
if (baseDir) {
|
|
105
|
-
const path = pathWithSearch.startsWith("/") ? pathWithSearch : "/" + pathWithSearch;
|
|
106
|
-
variants.push(baseDir + path);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
} catch {}
|
|
110
|
-
}
|
|
111
|
-
return Array.from(new Set(variants.filter(Boolean)));
|
|
112
|
-
};
|
|
113
|
-
|
|
114
|
-
const normalizeBody = (body) => {
|
|
115
|
-
if (body === undefined || body === null) return "";
|
|
116
|
-
if (typeof body === "string") return body;
|
|
117
|
-
try { return String(body); } catch { return ""; }
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
// Build a stable key so requests with identical method/url/body match the same response.
|
|
121
|
-
const makeKey = (method, url, body) => method.toUpperCase() + " " + normalizeUrl(url) + " " + normalizeBody(body);
|
|
122
|
-
const makeVariantKeys = (method, url, body) => {
|
|
123
|
-
return expandUrlVariants(url).map((variant) => makeKey(method, variant, body));
|
|
124
|
-
};
|
|
125
|
-
const normalizeNetworkRecord = (record) => {
|
|
126
|
-
if (!record || typeof record !== "object") {
|
|
127
|
-
return record;
|
|
128
|
-
}
|
|
129
|
-
if (record.response && record.response.body !== undefined) {
|
|
130
|
-
const response = record.response || {};
|
|
131
|
-
const encoding = response.bodyEncoding || "text";
|
|
132
|
-
return {
|
|
133
|
-
url: record.url,
|
|
134
|
-
method: record.method || "GET",
|
|
135
|
-
requestBody: record.requestBody || "",
|
|
136
|
-
status: response.status,
|
|
137
|
-
statusText: response.statusText,
|
|
138
|
-
responseHeaders: response.headers,
|
|
139
|
-
responseBody: encoding === "text" ? response.body : undefined,
|
|
140
|
-
responseBodyBase64: encoding === "base64" ? response.body : undefined,
|
|
141
|
-
responseEncoding: encoding,
|
|
142
|
-
error: record.error,
|
|
143
|
-
timestamp: record.timestamp
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
return record;
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
const primeLookups = (snapshot) => {
|
|
150
|
-
records = snapshot.fetchXhrRecords || [];
|
|
151
|
-
networkRecords = (snapshot.networkRecords || []).map(normalizeNetworkRecord);
|
|
152
|
-
byKey.clear();
|
|
153
|
-
localResourceSet.clear();
|
|
154
|
-
resourceUrlMap.clear();
|
|
155
|
-
|
|
156
|
-
for (const record of records) {
|
|
157
|
-
if (!record || !record.url || !record.method) continue;
|
|
158
|
-
const keys = makeVariantKeys(record.method, record.url, record.requestBody || "");
|
|
159
|
-
for (const key of keys) {
|
|
160
|
-
if (!byKey.has(key)) {
|
|
161
|
-
byKey.set(key, record);
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
for (const record of networkRecords) {
|
|
167
|
-
if (!record || !record.url || !record.method) continue;
|
|
168
|
-
const keys = makeVariantKeys(record.method, record.url, record.requestBody || "");
|
|
169
|
-
for (const key of keys) {
|
|
170
|
-
if (!byKey.has(key)) {
|
|
171
|
-
byKey.set(key, record);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// Track local resource files and map original URLs to local paths.
|
|
177
|
-
const resourceList = snapshot.resources || [];
|
|
178
|
-
for (const item of resourceList) {
|
|
179
|
-
if (!item || !item.localPath) continue;
|
|
180
|
-
localResourceSet.add(item.localPath);
|
|
181
|
-
localResourceSet.add("./" + item.localPath);
|
|
182
|
-
|
|
183
|
-
if (item.url) {
|
|
184
|
-
const variants = expandUrlVariants(item.url);
|
|
185
|
-
for (const variant of variants) {
|
|
186
|
-
resourceUrlMap.set(variant, item.localPath);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
};
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
const ready = (async () => {
|
|
194
|
-
// Deserialize the snapshot and prepare lookup tables for offline responses.
|
|
195
|
-
const snapshot = (await loadSnapshot()) || {};
|
|
196
|
-
primeLookups(snapshot);
|
|
197
|
-
return snapshot;
|
|
198
|
-
})();
|
|
199
|
-
|
|
200
|
-
const isLocalResource = (value) => {
|
|
201
|
-
if (!value) return false;
|
|
202
|
-
if (value.startsWith("data:") || value.startsWith("blob:")) return true;
|
|
203
|
-
return localResourceSet.has(value);
|
|
204
|
-
};
|
|
205
|
-
|
|
206
|
-
// Lookup helpers for request records and local assets.
|
|
207
|
-
const findRecord = (method, url, body) => {
|
|
208
|
-
const variants = expandUrlVariants(url);
|
|
209
|
-
for (const variant of variants) {
|
|
210
|
-
const key = makeKey(method, variant, body);
|
|
211
|
-
if (byKey.has(key)) return byKey.get(key);
|
|
212
|
-
}
|
|
213
|
-
for (const variant of variants) {
|
|
214
|
-
const fallbackKey = makeKey(method, variant, "");
|
|
215
|
-
if (byKey.has(fallbackKey)) return byKey.get(fallbackKey);
|
|
216
|
-
}
|
|
217
|
-
for (const variant of variants) {
|
|
218
|
-
const getKey = makeKey("GET", variant, "");
|
|
219
|
-
if (byKey.has(getKey)) return byKey.get(getKey);
|
|
220
|
-
}
|
|
221
|
-
return null;
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
const findByUrl = (url) => {
|
|
225
|
-
if (isLocalResource(url)) return null;
|
|
226
|
-
const variants = expandUrlVariants(url);
|
|
227
|
-
for (const variant of variants) {
|
|
228
|
-
const direct = byKey.get(makeKey("GET", variant, ""));
|
|
229
|
-
if (direct) return direct;
|
|
230
|
-
}
|
|
231
|
-
// Attempt a looser match: ignore querystring if needed.
|
|
232
|
-
for (const variant of variants) {
|
|
233
|
-
try {
|
|
234
|
-
const withoutQuery = new URL(variant).origin + new URL(variant).pathname;
|
|
235
|
-
const direct = byKey.get(makeKey("GET", withoutQuery, ""));
|
|
236
|
-
if (direct) return direct;
|
|
237
|
-
} catch {}
|
|
238
|
-
}
|
|
239
|
-
return null;
|
|
240
|
-
};
|
|
241
|
-
|
|
242
|
-
const findLocalPath = (url) => {
|
|
243
|
-
if (!url) return null;
|
|
244
|
-
const variants = expandUrlVariants(url);
|
|
245
|
-
for (const variant of variants) {
|
|
246
|
-
const hit = resourceUrlMap.get(variant);
|
|
247
|
-
if (hit) return hit;
|
|
248
|
-
}
|
|
249
|
-
for (const variant of variants) {
|
|
250
|
-
try {
|
|
251
|
-
const withoutQuery = new URL(variant).origin + new URL(variant).pathname;
|
|
252
|
-
const hit = resourceUrlMap.get(withoutQuery);
|
|
253
|
-
if (hit) return hit;
|
|
254
|
-
} catch {}
|
|
255
|
-
}
|
|
256
|
-
// If still not found, fallback to data URLs if present.
|
|
257
|
-
return null;
|
|
258
|
-
};
|
|
259
|
-
|
|
260
|
-
// Safe property injection for emulating XHR state transitions.
|
|
261
|
-
const defineProp = (obj, key, value) => {
|
|
262
|
-
try {
|
|
263
|
-
Object.defineProperty(obj, key, { value, configurable: true });
|
|
264
|
-
} catch {}
|
|
265
|
-
};
|
|
266
|
-
|
|
267
|
-
// Base64 helpers for binary payloads.
|
|
268
|
-
const decodeBase64 = (input) => {
|
|
269
|
-
try {
|
|
270
|
-
const binary = atob(input || "");
|
|
271
|
-
const bytes = new Uint8Array(binary.length);
|
|
272
|
-
|
|
273
|
-
Array.from(binary).forEach((char, index) => {
|
|
274
|
-
bytes[index] = char.charCodeAt(0);
|
|
275
|
-
});
|
|
276
|
-
|
|
277
|
-
return bytes;
|
|
278
|
-
} catch {
|
|
279
|
-
return new Uint8Array();
|
|
280
|
-
}
|
|
281
|
-
};
|
|
282
|
-
|
|
283
|
-
const bytesToBase64 = (bytes) => {
|
|
284
|
-
const binary = Array.from(bytes, (value) => String.fromCharCode(value)).join("");
|
|
285
|
-
return btoa(binary);
|
|
286
|
-
};
|
|
287
|
-
|
|
288
|
-
const textToBase64 = (text) => {
|
|
289
|
-
try {
|
|
290
|
-
const bytes = new TextEncoder().encode(text || "");
|
|
291
|
-
return bytesToBase64(bytes);
|
|
292
|
-
} catch {
|
|
293
|
-
return btoa(text || "");
|
|
294
|
-
}
|
|
295
|
-
};
|
|
296
|
-
|
|
297
|
-
// Resolve a content type from recorded response headers.
|
|
298
|
-
const getContentType = (record) => {
|
|
299
|
-
const headers = record.responseHeaders || {};
|
|
300
|
-
for (const key in headers) {
|
|
301
|
-
if (key.toLowerCase() === "content-type") {
|
|
302
|
-
return headers[key] || "application/octet-stream";
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
return "application/octet-stream";
|
|
306
|
-
};
|
|
307
|
-
|
|
308
|
-
// Turn a recorded response into a data URL for inline usage.
|
|
309
|
-
const toDataUrl = (record, fallbackType) => {
|
|
310
|
-
if (!record) return "";
|
|
311
|
-
const contentType = getContentType(record) || fallbackType || "application/octet-stream";
|
|
312
|
-
if (record.responseEncoding === "base64" && record.responseBodyBase64) {
|
|
313
|
-
return "data:" + contentType + ";base64," + record.responseBodyBase64;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
if (record.responseBody) {
|
|
317
|
-
return "data:" + contentType + ";base64," + textToBase64(record.responseBody);
|
|
318
|
-
}
|
|
319
|
-
return "data:" + (fallbackType || "application/octet-stream") + ",";
|
|
320
|
-
};
|
|
321
|
-
|
|
322
|
-
// Build a real Response object from the recorded payload.
|
|
323
|
-
const responseFromRecord = (record) => {
|
|
324
|
-
const headers = new Headers(record.responseHeaders || {});
|
|
325
|
-
if (record.responseEncoding === "base64" && record.responseBodyBase64) {
|
|
326
|
-
const bytes = decodeBase64(record.responseBodyBase64);
|
|
327
|
-
return new Response(bytes, {
|
|
328
|
-
status: record.status || 200,
|
|
329
|
-
statusText: record.statusText || "OK",
|
|
330
|
-
headers
|
|
331
|
-
});
|
|
332
|
-
}
|
|
333
|
-
const bodyText = record.responseBody || "";
|
|
334
|
-
return new Response(bodyText, {
|
|
335
|
-
status: record.status || 200,
|
|
336
|
-
statusText: record.statusText || "OK",
|
|
337
|
-
headers
|
|
338
|
-
});
|
|
339
|
-
};
|
|
340
|
-
|
|
341
|
-
${hackerScripts}
|
|
342
|
-
})();
|
|
343
|
-
</script>
|
|
344
|
-
`;
|
|
345
|
-
};
|
|
346
|
-
exports.buildReplayScript = buildReplayScript;
|
package/dist/lib/resources.js
DELETED
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
-
};
|
|
38
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.applyResourceMapToDom = exports.downloadResource = exports.extractResourceUrls = exports.toAbsoluteUrl = void 0;
|
|
40
|
-
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
41
|
-
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
42
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
43
|
-
const cheerio = __importStar(require("cheerio"));
|
|
44
|
-
const content_type_1 = require("./content-type");
|
|
45
|
-
const toAbsoluteUrl = (baseUrl, resourceUrl) => {
|
|
46
|
-
try {
|
|
47
|
-
return new URL(resourceUrl, baseUrl).toString();
|
|
48
|
-
}
|
|
49
|
-
catch {
|
|
50
|
-
return resourceUrl;
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
exports.toAbsoluteUrl = toAbsoluteUrl;
|
|
54
|
-
const extractResourceUrls = (html, baseUrl) => {
|
|
55
|
-
const $ = cheerio.load(html);
|
|
56
|
-
const urls = [];
|
|
57
|
-
const collect = (selector, attr) => {
|
|
58
|
-
$(selector).each((_, element) => {
|
|
59
|
-
const value = $(element).attr(attr);
|
|
60
|
-
if (value) {
|
|
61
|
-
urls.push({ attr, element });
|
|
62
|
-
}
|
|
63
|
-
});
|
|
64
|
-
};
|
|
65
|
-
collect("script[src]", "src");
|
|
66
|
-
collect("link[rel=stylesheet][href]", "href");
|
|
67
|
-
collect("link[rel=icon][href]", "href");
|
|
68
|
-
collect("img[src]", "src");
|
|
69
|
-
collect("source[src]", "src");
|
|
70
|
-
collect("video[src]", "src");
|
|
71
|
-
collect("audio[src]", "src");
|
|
72
|
-
const srcsetItems = [];
|
|
73
|
-
$("img[srcset], source[srcset]").each((_, element) => {
|
|
74
|
-
const value = $(element).attr("srcset");
|
|
75
|
-
if (value) {
|
|
76
|
-
srcsetItems.push({ element, value });
|
|
77
|
-
}
|
|
78
|
-
});
|
|
79
|
-
const resourceUrls = urls.map(({ attr, element }) => {
|
|
80
|
-
const value = $(element).attr(attr) || "";
|
|
81
|
-
return {
|
|
82
|
-
attr,
|
|
83
|
-
element,
|
|
84
|
-
url: (0, exports.toAbsoluteUrl)(baseUrl, value)
|
|
85
|
-
};
|
|
86
|
-
});
|
|
87
|
-
return { $, resourceUrls, srcsetItems };
|
|
88
|
-
};
|
|
89
|
-
exports.extractResourceUrls = extractResourceUrls;
|
|
90
|
-
const downloadResource = async (url, outputDir, referer) => {
|
|
91
|
-
const headers = {};
|
|
92
|
-
if (referer) {
|
|
93
|
-
headers.referer = referer;
|
|
94
|
-
}
|
|
95
|
-
const response = await fetch(url, { redirect: "follow", headers });
|
|
96
|
-
const contentType = response.headers.get("content-type");
|
|
97
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
98
|
-
const urlPath = new URL(url).pathname;
|
|
99
|
-
const ext = node_path_1.default.extname(urlPath) || (0, content_type_1.extensionFromContentType)(contentType);
|
|
100
|
-
const filename = `${node_crypto_1.default.createHash("sha1").update(url).digest("hex")}${ext}`;
|
|
101
|
-
const outputPath = node_path_1.default.join(outputDir, filename);
|
|
102
|
-
await promises_1.default.writeFile(outputPath, buffer);
|
|
103
|
-
return { outputPath, filename, contentType, size: buffer.length };
|
|
104
|
-
};
|
|
105
|
-
exports.downloadResource = downloadResource;
|
|
106
|
-
const applyResourceMapToDom = ($, resourceUrls, srcsetItems, baseUrl, resourceMap, assetsDirName) => {
|
|
107
|
-
for (const resource of resourceUrls) {
|
|
108
|
-
const local = resourceMap.get(resource.url);
|
|
109
|
-
if (!local) {
|
|
110
|
-
continue;
|
|
111
|
-
}
|
|
112
|
-
$(resource.element).attr(resource.attr, node_path_1.default.join(assetsDirName, local));
|
|
113
|
-
}
|
|
114
|
-
for (const item of srcsetItems) {
|
|
115
|
-
const parts = item.value.split(",").map((part) => part.trim());
|
|
116
|
-
const rewritten = parts
|
|
117
|
-
.map((part) => {
|
|
118
|
-
const [url, descriptor] = part.split(/\s+/, 2);
|
|
119
|
-
const absolute = (0, exports.toAbsoluteUrl)(baseUrl, url);
|
|
120
|
-
const local = resourceMap.get(absolute);
|
|
121
|
-
if (!local) {
|
|
122
|
-
return part;
|
|
123
|
-
}
|
|
124
|
-
const nextUrl = node_path_1.default.join(assetsDirName, local);
|
|
125
|
-
return descriptor ? `${nextUrl} ${descriptor}` : nextUrl;
|
|
126
|
-
})
|
|
127
|
-
.join(", ");
|
|
128
|
-
$(item.element).attr("srcset", rewritten);
|
|
129
|
-
}
|
|
130
|
-
};
|
|
131
|
-
exports.applyResourceMapToDom = applyResourceMapToDom;
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.runDownloadStage = void 0;
|
|
7
|
-
const ora_1 = __importDefault(require("ora"));
|
|
8
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
-
const resources_1 = require("../resources");
|
|
10
|
-
const css_rewrite_1 = require("../css-rewrite");
|
|
11
|
-
const runDownloadStage = async (html, targetUrl, networkRecords, resourcesDir, assetsDirName) => {
|
|
12
|
-
const downloadSpinner = (0, ora_1.default)("Downloading resources").start();
|
|
13
|
-
const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(networkRecords);
|
|
14
|
-
const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(html, targetUrl);
|
|
15
|
-
const resourceMap = new Map();
|
|
16
|
-
const resourceMeta = [];
|
|
17
|
-
let downloadedCount = 0;
|
|
18
|
-
let failedCount = 0;
|
|
19
|
-
for (const resource of resourceUrls) {
|
|
20
|
-
const url = resource.url;
|
|
21
|
-
if (!url || resourceMap.has(url)) {
|
|
22
|
-
continue;
|
|
23
|
-
}
|
|
24
|
-
try {
|
|
25
|
-
const resourceLabel = (() => {
|
|
26
|
-
try {
|
|
27
|
-
const pathname = new URL(url).pathname;
|
|
28
|
-
const basename = node_path_1.default.basename(pathname);
|
|
29
|
-
return basename || url;
|
|
30
|
-
}
|
|
31
|
-
catch {
|
|
32
|
-
return url;
|
|
33
|
-
}
|
|
34
|
-
})();
|
|
35
|
-
downloadSpinner.text = `Downloading ${resourceLabel}`;
|
|
36
|
-
const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, resourcesDir, targetUrl);
|
|
37
|
-
if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
|
|
38
|
-
await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
|
|
39
|
-
}
|
|
40
|
-
resourceMap.set(url, filename);
|
|
41
|
-
resourceMeta.push({
|
|
42
|
-
url,
|
|
43
|
-
localPath: node_path_1.default.join(assetsDirName, filename),
|
|
44
|
-
contentType,
|
|
45
|
-
size
|
|
46
|
-
});
|
|
47
|
-
downloadedCount += 1;
|
|
48
|
-
}
|
|
49
|
-
catch {
|
|
50
|
-
failedCount += 1;
|
|
51
|
-
continue;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
const downloadSummary = failedCount > 0
|
|
55
|
-
? `Resources downloaded (${downloadedCount} saved, ${failedCount} failed)`
|
|
56
|
-
: `Resources downloaded (${downloadedCount} saved)`;
|
|
57
|
-
downloadSpinner.succeed(downloadSummary);
|
|
58
|
-
(0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, targetUrl, resourceMap, assetsDirName);
|
|
59
|
-
return { resourceMap, resourceMeta, html: $.html(), $ };
|
|
60
|
-
};
|
|
61
|
-
exports.runDownloadStage = runDownloadStage;
|