@pagepocket/cli 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +11 -1
- package/package.json +5 -4
- package/dist/lib/content-type.js +0 -36
- package/dist/lib/css-rewrite.js +0 -62
- package/dist/lib/hackers/capture-network.js +0 -64
- package/dist/lib/hackers/index.js +0 -22
- package/dist/lib/hackers/preload-fetch.js +0 -56
- package/dist/lib/hackers/preload-image.js +0 -61
- package/dist/lib/hackers/preload-xhr.js +0 -59
- package/dist/lib/hackers/replay-beacon.js +0 -21
- package/dist/lib/hackers/replay-dom-rewrite.js +0 -295
- package/dist/lib/hackers/replay-eventsource.js +0 -25
- package/dist/lib/hackers/replay-fetch.js +0 -33
- package/dist/lib/hackers/replay-image.js +0 -48
- package/dist/lib/hackers/replay-svg-image.js +0 -89
- package/dist/lib/hackers/replay-websocket.js +0 -26
- package/dist/lib/hackers/replay-xhr.js +0 -91
- package/dist/lib/hackers/types.js +0 -2
- package/dist/lib/network-records.js +0 -69
- package/dist/lib/replay-script.js +0 -346
- package/dist/lib/resources.js +0 -131
- package/dist/lib/stages/download.js +0 -61
- package/dist/lib/stages/index.js +0 -235
- package/dist/lib/stages/intercept.js +0 -23
- package/dist/lib/stages/trigger.js +0 -56
- package/dist/lib/stages/visit.js +0 -24
- package/dist/lib/types.js +0 -2
- package/dist/preload.js +0 -60
- package/dist/stages/build-snapshot-data.js +0 -14
- package/dist/stages/build-snapshot.js +0 -30
- package/dist/stages/capture-network.js +0 -19
- package/dist/stages/download-resources.js +0 -48
- package/dist/stages/fetch-html.js +0 -69
- package/dist/stages/rewrite-links.js +0 -145
package/dist/lib/stages/index.js
DELETED
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.runDownloadStage = exports.runTriggerStage = exports.runVisitStage = exports.runInterceptStage = exports.setupInterception = void 0;
|
|
7
|
-
const ora_1 = __importDefault(require("ora"));
|
|
8
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
-
const resources_1 = require("../resources");
|
|
10
|
-
const css_rewrite_1 = require("../css-rewrite");
|
|
11
|
-
const hackers_1 = require("../hackers");
|
|
12
|
-
const setupInterception = async (page, networkRecords) => {
|
|
13
|
-
await (0, hackers_1.applyCaptureHackers)({ stage: "capture", page, networkRecords });
|
|
14
|
-
};
|
|
15
|
-
exports.setupInterception = setupInterception;
|
|
16
|
-
const runInterceptStage = () => {
|
|
17
|
-
const spinner = (0, ora_1.default)("Intercepting network requests").start();
|
|
18
|
-
spinner.succeed("Intercepted network requests");
|
|
19
|
-
};
|
|
20
|
-
exports.runInterceptStage = runInterceptStage;
|
|
21
|
-
const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
|
|
22
|
-
const spinner = (0, ora_1.default)("Visiting the target site").start();
|
|
23
|
-
const response = await page.goto(targetUrl, {
|
|
24
|
-
waitUntil: "domcontentloaded",
|
|
25
|
-
timeout: navigationTimeoutMs
|
|
26
|
-
});
|
|
27
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
28
|
-
spinner.succeed("Visited the target site");
|
|
29
|
-
return response;
|
|
30
|
-
};
|
|
31
|
-
exports.runVisitStage = runVisitStage;
|
|
32
|
-
const runTriggerStage = async (page, pendingTimeoutMs) => {
|
|
33
|
-
const spinner = (0, ora_1.default)("Triggering additional requests").start();
|
|
34
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
35
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
36
|
-
await page
|
|
37
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
38
|
-
timeout: pendingTimeoutMs
|
|
39
|
-
})
|
|
40
|
-
.catch(() => undefined);
|
|
41
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
42
|
-
await page.evaluate(() => {
|
|
43
|
-
const events = ["mouseover", "mouseenter", "mousemove"];
|
|
44
|
-
const elements = Array.from(document.querySelectorAll("*"));
|
|
45
|
-
for (const el of elements) {
|
|
46
|
-
try {
|
|
47
|
-
const rect = el.getBoundingClientRect();
|
|
48
|
-
const hasSize = rect && rect.width >= 1 && rect.height >= 1;
|
|
49
|
-
const clientX = hasSize ? rect.left + rect.width / 2 : 0;
|
|
50
|
-
const clientY = hasSize ? rect.top + rect.height / 2 : 0;
|
|
51
|
-
for (const type of events) {
|
|
52
|
-
const evt = new MouseEvent(type, {
|
|
53
|
-
bubbles: true,
|
|
54
|
-
cancelable: true,
|
|
55
|
-
view: window,
|
|
56
|
-
clientX,
|
|
57
|
-
clientY
|
|
58
|
-
});
|
|
59
|
-
el.dispatchEvent(evt);
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
catch { }
|
|
63
|
-
}
|
|
64
|
-
});
|
|
65
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
66
|
-
await page
|
|
67
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
68
|
-
timeout: pendingTimeoutMs
|
|
69
|
-
})
|
|
70
|
-
.catch(() => undefined);
|
|
71
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
72
|
-
spinner.succeed("Triggered additional requests");
|
|
73
|
-
};
|
|
74
|
-
exports.runTriggerStage = runTriggerStage;
|
|
75
|
-
const runDownloadStage = async (html, targetUrl, networkRecords, resourcesDir, assetsDirName) => {
|
|
76
|
-
const downloadSpinner = (0, ora_1.default)("Downloading resources").start();
|
|
77
|
-
const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(networkRecords);
|
|
78
|
-
const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(html, targetUrl);
|
|
79
|
-
const resourceMap = new Map();
|
|
80
|
-
const resourceMeta = [];
|
|
81
|
-
let downloadedCount = 0;
|
|
82
|
-
let failedCount = 0;
|
|
83
|
-
for (const resource of resourceUrls) {
|
|
84
|
-
const url = resource.url;
|
|
85
|
-
if (!url || resourceMap.has(url)) {
|
|
86
|
-
continue;
|
|
87
|
-
}
|
|
88
|
-
try {
|
|
89
|
-
const resourceLabel = (() => {
|
|
90
|
-
try {
|
|
91
|
-
const pathname = new URL(url).pathname;
|
|
92
|
-
const basename = node_path_1.default.basename(pathname);
|
|
93
|
-
return basename || url;
|
|
94
|
-
}
|
|
95
|
-
catch {
|
|
96
|
-
return url;
|
|
97
|
-
}
|
|
98
|
-
})();
|
|
99
|
-
downloadSpinner.text = `Downloading ${resourceLabel}`;
|
|
100
|
-
const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, resourcesDir, targetUrl);
|
|
101
|
-
if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
|
|
102
|
-
await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
|
|
103
|
-
}
|
|
104
|
-
resourceMap.set(url, filename);
|
|
105
|
-
resourceMeta.push({
|
|
106
|
-
url,
|
|
107
|
-
localPath: node_path_1.default.join(assetsDirName, filename),
|
|
108
|
-
contentType,
|
|
109
|
-
size
|
|
110
|
-
});
|
|
111
|
-
downloadedCount += 1;
|
|
112
|
-
}
|
|
113
|
-
catch {
|
|
114
|
-
failedCount += 1;
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
const downloadSummary = failedCount > 0
|
|
119
|
-
? `Resources downloaded (${downloadedCount} saved, ${failedCount} failed)`
|
|
120
|
-
: `Resources downloaded (${downloadedCount} saved)`;
|
|
121
|
-
downloadSpinner.succeed(downloadSummary);
|
|
122
|
-
(0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, targetUrl, resourceMap, assetsDirName);
|
|
123
|
-
return { resourceMap, resourceMeta, html: $.html() };
|
|
124
|
-
};
|
|
125
|
-
exports.runDownloadStage = runDownloadStage;
|
|
126
|
-
const runInterceptStage = async () => {
|
|
127
|
-
const spinner = (0, ora_1.default)("Intercepting network requests").start();
|
|
128
|
-
spinner.succeed("Intercepted network requests");
|
|
129
|
-
};
|
|
130
|
-
exports.runInterceptStage = runInterceptStage;
|
|
131
|
-
const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
|
|
132
|
-
const spinner = (0, ora_1.default)("Visiting the target site").start();
|
|
133
|
-
const response = await page.goto(targetUrl, {
|
|
134
|
-
waitUntil: "domcontentloaded",
|
|
135
|
-
timeout: navigationTimeoutMs
|
|
136
|
-
});
|
|
137
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
138
|
-
spinner.succeed("Visited the target site");
|
|
139
|
-
return response;
|
|
140
|
-
};
|
|
141
|
-
exports.runVisitStage = runVisitStage;
|
|
142
|
-
const runTriggerStage = async (page, pendingTimeoutMs) => {
|
|
143
|
-
const spinner = (0, ora_1.default)("Triggering additional requests").start();
|
|
144
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
145
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
146
|
-
await page
|
|
147
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
148
|
-
timeout: pendingTimeoutMs
|
|
149
|
-
})
|
|
150
|
-
.catch(() => undefined);
|
|
151
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
152
|
-
await page.evaluate(() => {
|
|
153
|
-
const events = ["mouseover", "mouseenter", "mousemove"];
|
|
154
|
-
const elements = Array.from(document.querySelectorAll("*"));
|
|
155
|
-
for (const el of elements) {
|
|
156
|
-
try {
|
|
157
|
-
const rect = el.getBoundingClientRect();
|
|
158
|
-
const hasSize = rect && rect.width >= 1 && rect.height >= 1;
|
|
159
|
-
const clientX = hasSize ? rect.left + rect.width / 2 : 0;
|
|
160
|
-
const clientY = hasSize ? rect.top + rect.height / 2 : 0;
|
|
161
|
-
for (const type of events) {
|
|
162
|
-
const evt = new MouseEvent(type, {
|
|
163
|
-
bubbles: true,
|
|
164
|
-
cancelable: true,
|
|
165
|
-
view: window,
|
|
166
|
-
clientX,
|
|
167
|
-
clientY
|
|
168
|
-
});
|
|
169
|
-
el.dispatchEvent(evt);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
catch { }
|
|
173
|
-
}
|
|
174
|
-
});
|
|
175
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
176
|
-
await page
|
|
177
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
178
|
-
timeout: pendingTimeoutMs
|
|
179
|
-
})
|
|
180
|
-
.catch(() => undefined);
|
|
181
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
182
|
-
spinner.succeed("Triggered additional requests");
|
|
183
|
-
};
|
|
184
|
-
exports.runTriggerStage = runTriggerStage;
|
|
185
|
-
const runDownloadStage = async (html, targetUrl, networkRecords, resourcesDir, assetsDirName) => {
|
|
186
|
-
const downloadSpinner = (0, ora_1.default)("Downloading resources").start();
|
|
187
|
-
const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(networkRecords);
|
|
188
|
-
const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(html, targetUrl);
|
|
189
|
-
const resourceMap = new Map();
|
|
190
|
-
const resourceMeta = [];
|
|
191
|
-
let downloadedCount = 0;
|
|
192
|
-
let failedCount = 0;
|
|
193
|
-
for (const resource of resourceUrls) {
|
|
194
|
-
const url = resource.url;
|
|
195
|
-
if (!url || resourceMap.has(url)) {
|
|
196
|
-
continue;
|
|
197
|
-
}
|
|
198
|
-
try {
|
|
199
|
-
const resourceLabel = (() => {
|
|
200
|
-
try {
|
|
201
|
-
const pathname = new URL(url).pathname;
|
|
202
|
-
const basename = node_path_1.default.basename(pathname);
|
|
203
|
-
return basename || url;
|
|
204
|
-
}
|
|
205
|
-
catch {
|
|
206
|
-
return url;
|
|
207
|
-
}
|
|
208
|
-
})();
|
|
209
|
-
downloadSpinner.text = `Downloading ${resourceLabel}`;
|
|
210
|
-
const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, resourcesDir, targetUrl);
|
|
211
|
-
if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
|
|
212
|
-
await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
|
|
213
|
-
}
|
|
214
|
-
resourceMap.set(url, filename);
|
|
215
|
-
resourceMeta.push({
|
|
216
|
-
url,
|
|
217
|
-
localPath: node_path_1.default.join(assetsDirName, filename),
|
|
218
|
-
contentType,
|
|
219
|
-
size
|
|
220
|
-
});
|
|
221
|
-
downloadedCount += 1;
|
|
222
|
-
}
|
|
223
|
-
catch {
|
|
224
|
-
failedCount += 1;
|
|
225
|
-
continue;
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
const downloadSummary = failedCount > 0
|
|
229
|
-
? `Resources downloaded (${downloadedCount} saved, ${failedCount} failed)`
|
|
230
|
-
: `Resources downloaded (${downloadedCount} saved)`;
|
|
231
|
-
downloadSpinner.succeed(downloadSummary);
|
|
232
|
-
(0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, targetUrl, resourceMap, assetsDirName);
|
|
233
|
-
return { resourceMap, resourceMeta, html: $.html() };
|
|
234
|
-
};
|
|
235
|
-
exports.runDownloadStage = runDownloadStage;
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.runInterceptStage = exports.setupInterception = void 0;
|
|
7
|
-
const ora_1 = __importDefault(require("ora"));
|
|
8
|
-
const hackers_1 = require("../hackers");
|
|
9
|
-
const setupInterception = async (page, networkRecords) => {
|
|
10
|
-
await (0, hackers_1.applyCaptureHackers)({ stage: "capture", page, networkRecords });
|
|
11
|
-
};
|
|
12
|
-
exports.setupInterception = setupInterception;
|
|
13
|
-
const runInterceptStage = async () => {
|
|
14
|
-
const spinner = (0, ora_1.default)("Intercepting network requests").start();
|
|
15
|
-
try {
|
|
16
|
-
spinner.succeed("Intercepted network requests");
|
|
17
|
-
}
|
|
18
|
-
catch (error) {
|
|
19
|
-
spinner.fail("Failed to intercept network requests");
|
|
20
|
-
throw error;
|
|
21
|
-
}
|
|
22
|
-
};
|
|
23
|
-
exports.runInterceptStage = runInterceptStage;
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.runTriggerStage = void 0;
|
|
7
|
-
const ora_1 = __importDefault(require("ora"));
|
|
8
|
-
const runTriggerStage = async (page, pendingTimeoutMs) => {
|
|
9
|
-
const spinner = (0, ora_1.default)("Triggering additional requests").start();
|
|
10
|
-
try {
|
|
11
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
12
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
13
|
-
await page
|
|
14
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
15
|
-
timeout: pendingTimeoutMs
|
|
16
|
-
})
|
|
17
|
-
.catch(() => undefined);
|
|
18
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
19
|
-
await page.evaluate(() => {
|
|
20
|
-
const events = ["mouseover", "mouseenter", "mousemove"];
|
|
21
|
-
const elements = Array.from(document.querySelectorAll("*"));
|
|
22
|
-
for (const el of elements) {
|
|
23
|
-
try {
|
|
24
|
-
const rect = el.getBoundingClientRect();
|
|
25
|
-
const hasSize = rect && rect.width >= 1 && rect.height >= 1;
|
|
26
|
-
const clientX = hasSize ? rect.left + rect.width / 2 : 0;
|
|
27
|
-
const clientY = hasSize ? rect.top + rect.height / 2 : 0;
|
|
28
|
-
for (const type of events) {
|
|
29
|
-
const evt = new MouseEvent(type, {
|
|
30
|
-
bubbles: true,
|
|
31
|
-
cancelable: true,
|
|
32
|
-
view: window,
|
|
33
|
-
clientX,
|
|
34
|
-
clientY
|
|
35
|
-
});
|
|
36
|
-
el.dispatchEvent(evt);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
catch { }
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
|
|
43
|
-
await page
|
|
44
|
-
.waitForFunction(() => window.__pagepocketPendingRequests === 0, {
|
|
45
|
-
timeout: pendingTimeoutMs
|
|
46
|
-
})
|
|
47
|
-
.catch(() => undefined);
|
|
48
|
-
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
49
|
-
spinner.succeed("Triggered additional requests");
|
|
50
|
-
}
|
|
51
|
-
catch (error) {
|
|
52
|
-
spinner.fail("Failed to trigger additional requests");
|
|
53
|
-
throw error;
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
exports.runTriggerStage = runTriggerStage;
|
package/dist/lib/stages/visit.js
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.runVisitStage = void 0;
|
|
7
|
-
const ora_1 = __importDefault(require("ora"));
|
|
8
|
-
const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
|
|
9
|
-
const spinner = (0, ora_1.default)("Visiting the target site").start();
|
|
10
|
-
try {
|
|
11
|
-
const response = await page.goto(targetUrl, {
|
|
12
|
-
waitUntil: "domcontentloaded",
|
|
13
|
-
timeout: navigationTimeoutMs
|
|
14
|
-
});
|
|
15
|
-
await page.waitForSelector("body", { timeout: 15000 });
|
|
16
|
-
spinner.succeed("Visited the target site");
|
|
17
|
-
return response;
|
|
18
|
-
}
|
|
19
|
-
catch (error) {
|
|
20
|
-
spinner.fail("Failed to visit the target site");
|
|
21
|
-
throw error;
|
|
22
|
-
}
|
|
23
|
-
};
|
|
24
|
-
exports.runVisitStage = runVisitStage;
|
package/dist/lib/types.js
DELETED
package/dist/preload.js
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildPreloadScript = void 0;
|
|
4
|
-
const hackers_1 = require("./lib/hackers");
|
|
5
|
-
const buildPreloadScript = () => {
|
|
6
|
-
const context = { stage: "preload" };
|
|
7
|
-
const hackerScripts = hackers_1.preloadHackers
|
|
8
|
-
.map((hacker) => ` // hacker:${hacker.id}\n${hacker.build(context)}`)
|
|
9
|
-
.join("\n");
|
|
10
|
-
return `
|
|
11
|
-
(function () {
|
|
12
|
-
if (window.__pagepocketPatched) {
|
|
13
|
-
return;
|
|
14
|
-
}
|
|
15
|
-
Object.defineProperty(window, "__pagepocketPatched", { value: true });
|
|
16
|
-
|
|
17
|
-
const records = [];
|
|
18
|
-
window.__pagepocketRecords = records;
|
|
19
|
-
window.__pagepocketPendingRequests = 0;
|
|
20
|
-
|
|
21
|
-
const toAbsoluteUrl = (input) => {
|
|
22
|
-
try {
|
|
23
|
-
return new URL(input, window.location.href).toString();
|
|
24
|
-
} catch {
|
|
25
|
-
return input;
|
|
26
|
-
}
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
const normalizeBody = (body) => {
|
|
30
|
-
if (body === undefined || body === null) {
|
|
31
|
-
return "";
|
|
32
|
-
}
|
|
33
|
-
if (typeof body === "string") {
|
|
34
|
-
return body;
|
|
35
|
-
}
|
|
36
|
-
if (body instanceof ArrayBuffer) {
|
|
37
|
-
try {
|
|
38
|
-
return new TextDecoder().decode(body);
|
|
39
|
-
} catch {
|
|
40
|
-
return "";
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
if (body instanceof Blob) {
|
|
44
|
-
return "";
|
|
45
|
-
}
|
|
46
|
-
return String(body);
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
const trackPendingStart = () => {
|
|
50
|
-
window.__pagepocketPendingRequests += 1;
|
|
51
|
-
};
|
|
52
|
-
const trackPendingEnd = () => {
|
|
53
|
-
window.__pagepocketPendingRequests = Math.max(0, window.__pagepocketPendingRequests - 1);
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
${hackerScripts}
|
|
57
|
-
})();
|
|
58
|
-
`;
|
|
59
|
-
};
|
|
60
|
-
exports.buildPreloadScript = buildPreloadScript;
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.buildSnapshotData = void 0;
|
|
4
|
-
const buildSnapshotData = (input) => {
|
|
5
|
-
return {
|
|
6
|
-
url: input.targetUrl,
|
|
7
|
-
title: input.title,
|
|
8
|
-
capturedAt: new Date().toISOString(),
|
|
9
|
-
fetchXhrRecords: input.fetchXhrRecords,
|
|
10
|
-
networkRecords: input.capturedNetworkRecords,
|
|
11
|
-
resources: input.resources
|
|
12
|
-
};
|
|
13
|
-
};
|
|
14
|
-
exports.buildSnapshotData = buildSnapshotData;
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.buildSnapshotHtml = void 0;
|
|
7
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
8
|
-
const replay_script_1 = require("../lib/replay-script");
|
|
9
|
-
const buildSnapshotHtml = (input) => {
|
|
10
|
-
const replayScript = (0, replay_script_1.buildReplayScript)(node_path_1.default.basename(input.outputRequestsPath), input.targetUrl);
|
|
11
|
-
const head = input.$("head");
|
|
12
|
-
if (head.length) {
|
|
13
|
-
head.prepend(replayScript);
|
|
14
|
-
}
|
|
15
|
-
else {
|
|
16
|
-
input.$.root().prepend(replayScript);
|
|
17
|
-
}
|
|
18
|
-
if (input.faviconDataUrl) {
|
|
19
|
-
const existingIcon = input.$('link[rel="icon"]');
|
|
20
|
-
if (existingIcon.length) {
|
|
21
|
-
existingIcon.attr("href", input.faviconDataUrl);
|
|
22
|
-
}
|
|
23
|
-
else {
|
|
24
|
-
const link = '<link rel="icon" href="' + input.faviconDataUrl + '" />';
|
|
25
|
-
head.length ? head.append(link) : input.$.root().append(link);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
return input.$.html();
|
|
29
|
-
};
|
|
30
|
-
exports.buildSnapshotHtml = buildSnapshotHtml;
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.captureNetwork = void 0;
|
|
4
|
-
const lib_1 = require("@pagepocket/lib");
|
|
5
|
-
const lighterceptor_1 = require("@pagepocket/lighterceptor");
|
|
6
|
-
const captureNetwork = async (targetUrl, currentTitle) => {
|
|
7
|
-
const result = await new lighterceptor_1.Lighterceptor(targetUrl, { recursion: true }).run();
|
|
8
|
-
const capturedNetworkRecords = (result.networkRecords ?? []);
|
|
9
|
-
const networkRecords = (0, lib_1.mapCapturedNetworkRecords)(capturedNetworkRecords);
|
|
10
|
-
const capturedTitle = result.title;
|
|
11
|
-
const title = currentTitle === "snapshot" && capturedTitle ? capturedTitle : currentTitle;
|
|
12
|
-
return {
|
|
13
|
-
networkRecords,
|
|
14
|
-
capturedNetworkRecords,
|
|
15
|
-
capturedTitle,
|
|
16
|
-
title
|
|
17
|
-
};
|
|
18
|
-
};
|
|
19
|
-
exports.captureNetwork = captureNetwork;
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.downloadResources = void 0;
|
|
7
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
8
|
-
const css_rewrite_1 = require("../lib/css-rewrite");
|
|
9
|
-
const resources_1 = require("../lib/resources");
|
|
10
|
-
const downloadResources = async (input) => {
|
|
11
|
-
const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(input.networkRecords);
|
|
12
|
-
const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(input.html, input.targetUrl);
|
|
13
|
-
const resourceMap = new Map();
|
|
14
|
-
const resourceMeta = [];
|
|
15
|
-
let downloadedCount = 0;
|
|
16
|
-
let failedCount = 0;
|
|
17
|
-
for (const resource of resourceUrls) {
|
|
18
|
-
const url = resource.url;
|
|
19
|
-
if (!url || resourceMap.has(url)) {
|
|
20
|
-
continue;
|
|
21
|
-
}
|
|
22
|
-
try {
|
|
23
|
-
const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, input.resourcesDir, input.targetUrl);
|
|
24
|
-
if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
|
|
25
|
-
await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
|
|
26
|
-
}
|
|
27
|
-
resourceMap.set(url, filename);
|
|
28
|
-
resourceMeta.push({
|
|
29
|
-
url,
|
|
30
|
-
localPath: node_path_1.default.join(input.assetsDirName, filename),
|
|
31
|
-
contentType,
|
|
32
|
-
size
|
|
33
|
-
});
|
|
34
|
-
downloadedCount += 1;
|
|
35
|
-
}
|
|
36
|
-
catch {
|
|
37
|
-
failedCount += 1;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
(0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, input.targetUrl, resourceMap, input.assetsDirName);
|
|
41
|
-
return {
|
|
42
|
-
$,
|
|
43
|
-
resourceMeta,
|
|
44
|
-
downloadedCount,
|
|
45
|
-
failedCount
|
|
46
|
-
};
|
|
47
|
-
};
|
|
48
|
-
exports.downloadResources = downloadResources;
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
-
};
|
|
38
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.fetchHtml = void 0;
|
|
40
|
-
const cheerio = __importStar(require("cheerio"));
|
|
41
|
-
const got_1 = __importDefault(require("got"));
|
|
42
|
-
const fetchHtml = async (targetUrl, timeoutMs, headersOverride) => {
|
|
43
|
-
const headers = {
|
|
44
|
-
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
45
|
-
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
46
|
-
"accept-language": "en-US,en;q=0.9",
|
|
47
|
-
referer: targetUrl,
|
|
48
|
-
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
49
|
-
"Sec-Fetch-Site": "same-origin",
|
|
50
|
-
"Sec-Fetch-Mode": "navigate",
|
|
51
|
-
"Sec-Fetch-Dest": "document",
|
|
52
|
-
...headersOverride
|
|
53
|
-
};
|
|
54
|
-
const response = await (0, got_1.default)(targetUrl, {
|
|
55
|
-
headers,
|
|
56
|
-
followRedirect: true,
|
|
57
|
-
throwHttpErrors: false,
|
|
58
|
-
timeout: { request: timeoutMs }
|
|
59
|
-
});
|
|
60
|
-
if (response.statusCode < 200 || response.statusCode >= 300) {
|
|
61
|
-
const statusText = response.statusMessage ? ` ${response.statusMessage}` : "";
|
|
62
|
-
throw new Error(`HTTP ${response.statusCode}${statusText}`);
|
|
63
|
-
}
|
|
64
|
-
const html = response.body;
|
|
65
|
-
const $initial = cheerio.load(html);
|
|
66
|
-
const title = $initial("title").first().text() || "snapshot";
|
|
67
|
-
return { html, title };
|
|
68
|
-
};
|
|
69
|
-
exports.fetchHtml = fetchHtml;
|