@pagepocket/cli 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/cli.js +11 -1
  2. package/package.json +5 -4
  3. package/dist/lib/content-type.js +0 -36
  4. package/dist/lib/css-rewrite.js +0 -62
  5. package/dist/lib/hackers/capture-network.js +0 -64
  6. package/dist/lib/hackers/index.js +0 -22
  7. package/dist/lib/hackers/preload-fetch.js +0 -56
  8. package/dist/lib/hackers/preload-image.js +0 -61
  9. package/dist/lib/hackers/preload-xhr.js +0 -59
  10. package/dist/lib/hackers/replay-beacon.js +0 -21
  11. package/dist/lib/hackers/replay-dom-rewrite.js +0 -295
  12. package/dist/lib/hackers/replay-eventsource.js +0 -25
  13. package/dist/lib/hackers/replay-fetch.js +0 -33
  14. package/dist/lib/hackers/replay-image.js +0 -48
  15. package/dist/lib/hackers/replay-svg-image.js +0 -89
  16. package/dist/lib/hackers/replay-websocket.js +0 -26
  17. package/dist/lib/hackers/replay-xhr.js +0 -91
  18. package/dist/lib/hackers/types.js +0 -2
  19. package/dist/lib/network-records.js +0 -69
  20. package/dist/lib/replay-script.js +0 -346
  21. package/dist/lib/resources.js +0 -131
  22. package/dist/lib/stages/download.js +0 -61
  23. package/dist/lib/stages/index.js +0 -235
  24. package/dist/lib/stages/intercept.js +0 -23
  25. package/dist/lib/stages/trigger.js +0 -56
  26. package/dist/lib/stages/visit.js +0 -24
  27. package/dist/lib/types.js +0 -2
  28. package/dist/preload.js +0 -60
  29. package/dist/stages/build-snapshot-data.js +0 -14
  30. package/dist/stages/build-snapshot.js +0 -30
  31. package/dist/stages/capture-network.js +0 -19
  32. package/dist/stages/download-resources.js +0 -48
  33. package/dist/stages/fetch-html.js +0 -69
  34. package/dist/stages/rewrite-links.js +0 -145
@@ -1,235 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.runDownloadStage = exports.runTriggerStage = exports.runVisitStage = exports.runInterceptStage = exports.setupInterception = void 0;
7
- const ora_1 = __importDefault(require("ora"));
8
- const node_path_1 = __importDefault(require("node:path"));
9
- const resources_1 = require("../resources");
10
- const css_rewrite_1 = require("../css-rewrite");
11
- const hackers_1 = require("../hackers");
12
- const setupInterception = async (page, networkRecords) => {
13
- await (0, hackers_1.applyCaptureHackers)({ stage: "capture", page, networkRecords });
14
- };
15
- exports.setupInterception = setupInterception;
16
- const runInterceptStage = () => {
17
- const spinner = (0, ora_1.default)("Intercepting network requests").start();
18
- spinner.succeed("Intercepted network requests");
19
- };
20
- exports.runInterceptStage = runInterceptStage;
21
- const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
22
- const spinner = (0, ora_1.default)("Visiting the target site").start();
23
- const response = await page.goto(targetUrl, {
24
- waitUntil: "domcontentloaded",
25
- timeout: navigationTimeoutMs
26
- });
27
- await page.waitForSelector("body", { timeout: 15000 });
28
- spinner.succeed("Visited the target site");
29
- return response;
30
- };
31
- exports.runVisitStage = runVisitStage;
32
- const runTriggerStage = async (page, pendingTimeoutMs) => {
33
- const spinner = (0, ora_1.default)("Triggering additional requests").start();
34
- await page.waitForSelector("body", { timeout: 15000 });
35
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
36
- await page
37
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
38
- timeout: pendingTimeoutMs
39
- })
40
- .catch(() => undefined);
41
- await new Promise((resolve) => setTimeout(resolve, 2000));
42
- await page.evaluate(() => {
43
- const events = ["mouseover", "mouseenter", "mousemove"];
44
- const elements = Array.from(document.querySelectorAll("*"));
45
- for (const el of elements) {
46
- try {
47
- const rect = el.getBoundingClientRect();
48
- const hasSize = rect && rect.width >= 1 && rect.height >= 1;
49
- const clientX = hasSize ? rect.left + rect.width / 2 : 0;
50
- const clientY = hasSize ? rect.top + rect.height / 2 : 0;
51
- for (const type of events) {
52
- const evt = new MouseEvent(type, {
53
- bubbles: true,
54
- cancelable: true,
55
- view: window,
56
- clientX,
57
- clientY
58
- });
59
- el.dispatchEvent(evt);
60
- }
61
- }
62
- catch { }
63
- }
64
- });
65
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
66
- await page
67
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
68
- timeout: pendingTimeoutMs
69
- })
70
- .catch(() => undefined);
71
- await new Promise((resolve) => setTimeout(resolve, 2000));
72
- spinner.succeed("Triggered additional requests");
73
- };
74
- exports.runTriggerStage = runTriggerStage;
75
- const runDownloadStage = async (html, targetUrl, networkRecords, resourcesDir, assetsDirName) => {
76
- const downloadSpinner = (0, ora_1.default)("Downloading resources").start();
77
- const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(networkRecords);
78
- const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(html, targetUrl);
79
- const resourceMap = new Map();
80
- const resourceMeta = [];
81
- let downloadedCount = 0;
82
- let failedCount = 0;
83
- for (const resource of resourceUrls) {
84
- const url = resource.url;
85
- if (!url || resourceMap.has(url)) {
86
- continue;
87
- }
88
- try {
89
- const resourceLabel = (() => {
90
- try {
91
- const pathname = new URL(url).pathname;
92
- const basename = node_path_1.default.basename(pathname);
93
- return basename || url;
94
- }
95
- catch {
96
- return url;
97
- }
98
- })();
99
- downloadSpinner.text = `Downloading ${resourceLabel}`;
100
- const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, resourcesDir, targetUrl);
101
- if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
102
- await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
103
- }
104
- resourceMap.set(url, filename);
105
- resourceMeta.push({
106
- url,
107
- localPath: node_path_1.default.join(assetsDirName, filename),
108
- contentType,
109
- size
110
- });
111
- downloadedCount += 1;
112
- }
113
- catch {
114
- failedCount += 1;
115
- continue;
116
- }
117
- }
118
- const downloadSummary = failedCount > 0
119
- ? `Resources downloaded (${downloadedCount} saved, ${failedCount} failed)`
120
- : `Resources downloaded (${downloadedCount} saved)`;
121
- downloadSpinner.succeed(downloadSummary);
122
- (0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, targetUrl, resourceMap, assetsDirName);
123
- return { resourceMap, resourceMeta, html: $.html() };
124
- };
125
- exports.runDownloadStage = runDownloadStage;
126
- const runInterceptStage = async () => {
127
- const spinner = (0, ora_1.default)("Intercepting network requests").start();
128
- spinner.succeed("Intercepted network requests");
129
- };
130
- exports.runInterceptStage = runInterceptStage;
131
- const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
132
- const spinner = (0, ora_1.default)("Visiting the target site").start();
133
- const response = await page.goto(targetUrl, {
134
- waitUntil: "domcontentloaded",
135
- timeout: navigationTimeoutMs
136
- });
137
- await page.waitForSelector("body", { timeout: 15000 });
138
- spinner.succeed("Visited the target site");
139
- return response;
140
- };
141
- exports.runVisitStage = runVisitStage;
142
- const runTriggerStage = async (page, pendingTimeoutMs) => {
143
- const spinner = (0, ora_1.default)("Triggering additional requests").start();
144
- await page.waitForSelector("body", { timeout: 15000 });
145
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
146
- await page
147
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
148
- timeout: pendingTimeoutMs
149
- })
150
- .catch(() => undefined);
151
- await new Promise((resolve) => setTimeout(resolve, 2000));
152
- await page.evaluate(() => {
153
- const events = ["mouseover", "mouseenter", "mousemove"];
154
- const elements = Array.from(document.querySelectorAll("*"));
155
- for (const el of elements) {
156
- try {
157
- const rect = el.getBoundingClientRect();
158
- const hasSize = rect && rect.width >= 1 && rect.height >= 1;
159
- const clientX = hasSize ? rect.left + rect.width / 2 : 0;
160
- const clientY = hasSize ? rect.top + rect.height / 2 : 0;
161
- for (const type of events) {
162
- const evt = new MouseEvent(type, {
163
- bubbles: true,
164
- cancelable: true,
165
- view: window,
166
- clientX,
167
- clientY
168
- });
169
- el.dispatchEvent(evt);
170
- }
171
- }
172
- catch { }
173
- }
174
- });
175
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
176
- await page
177
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
178
- timeout: pendingTimeoutMs
179
- })
180
- .catch(() => undefined);
181
- await new Promise((resolve) => setTimeout(resolve, 2000));
182
- spinner.succeed("Triggered additional requests");
183
- };
184
- exports.runTriggerStage = runTriggerStage;
185
- const runDownloadStage = async (html, targetUrl, networkRecords, resourcesDir, assetsDirName) => {
186
- const downloadSpinner = (0, ora_1.default)("Downloading resources").start();
187
- const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(networkRecords);
188
- const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(html, targetUrl);
189
- const resourceMap = new Map();
190
- const resourceMeta = [];
191
- let downloadedCount = 0;
192
- let failedCount = 0;
193
- for (const resource of resourceUrls) {
194
- const url = resource.url;
195
- if (!url || resourceMap.has(url)) {
196
- continue;
197
- }
198
- try {
199
- const resourceLabel = (() => {
200
- try {
201
- const pathname = new URL(url).pathname;
202
- const basename = node_path_1.default.basename(pathname);
203
- return basename || url;
204
- }
205
- catch {
206
- return url;
207
- }
208
- })();
209
- downloadSpinner.text = `Downloading ${resourceLabel}`;
210
- const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, resourcesDir, targetUrl);
211
- if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
212
- await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
213
- }
214
- resourceMap.set(url, filename);
215
- resourceMeta.push({
216
- url,
217
- localPath: node_path_1.default.join(assetsDirName, filename),
218
- contentType,
219
- size
220
- });
221
- downloadedCount += 1;
222
- }
223
- catch {
224
- failedCount += 1;
225
- continue;
226
- }
227
- }
228
- const downloadSummary = failedCount > 0
229
- ? `Resources downloaded (${downloadedCount} saved, ${failedCount} failed)`
230
- : `Resources downloaded (${downloadedCount} saved)`;
231
- downloadSpinner.succeed(downloadSummary);
232
- (0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, targetUrl, resourceMap, assetsDirName);
233
- return { resourceMap, resourceMeta, html: $.html() };
234
- };
235
- exports.runDownloadStage = runDownloadStage;
@@ -1,23 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.runInterceptStage = exports.setupInterception = void 0;
7
- const ora_1 = __importDefault(require("ora"));
8
- const hackers_1 = require("../hackers");
9
- const setupInterception = async (page, networkRecords) => {
10
- await (0, hackers_1.applyCaptureHackers)({ stage: "capture", page, networkRecords });
11
- };
12
- exports.setupInterception = setupInterception;
13
- const runInterceptStage = async () => {
14
- const spinner = (0, ora_1.default)("Intercepting network requests").start();
15
- try {
16
- spinner.succeed("Intercepted network requests");
17
- }
18
- catch (error) {
19
- spinner.fail("Failed to intercept network requests");
20
- throw error;
21
- }
22
- };
23
- exports.runInterceptStage = runInterceptStage;
@@ -1,56 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.runTriggerStage = void 0;
7
- const ora_1 = __importDefault(require("ora"));
8
- const runTriggerStage = async (page, pendingTimeoutMs) => {
9
- const spinner = (0, ora_1.default)("Triggering additional requests").start();
10
- try {
11
- await page.waitForSelector("body", { timeout: 15000 });
12
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
13
- await page
14
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
15
- timeout: pendingTimeoutMs
16
- })
17
- .catch(() => undefined);
18
- await new Promise((resolve) => setTimeout(resolve, 2000));
19
- await page.evaluate(() => {
20
- const events = ["mouseover", "mouseenter", "mousemove"];
21
- const elements = Array.from(document.querySelectorAll("*"));
22
- for (const el of elements) {
23
- try {
24
- const rect = el.getBoundingClientRect();
25
- const hasSize = rect && rect.width >= 1 && rect.height >= 1;
26
- const clientX = hasSize ? rect.left + rect.width / 2 : 0;
27
- const clientY = hasSize ? rect.top + rect.height / 2 : 0;
28
- for (const type of events) {
29
- const evt = new MouseEvent(type, {
30
- bubbles: true,
31
- cancelable: true,
32
- view: window,
33
- clientX,
34
- clientY
35
- });
36
- el.dispatchEvent(evt);
37
- }
38
- }
39
- catch { }
40
- }
41
- });
42
- await page.waitForNetworkIdle({ idleTime: 5000, timeout: 30000 }).catch(() => undefined);
43
- await page
44
- .waitForFunction(() => window.__pagepocketPendingRequests === 0, {
45
- timeout: pendingTimeoutMs
46
- })
47
- .catch(() => undefined);
48
- await new Promise((resolve) => setTimeout(resolve, 2000));
49
- spinner.succeed("Triggered additional requests");
50
- }
51
- catch (error) {
52
- spinner.fail("Failed to trigger additional requests");
53
- throw error;
54
- }
55
- };
56
- exports.runTriggerStage = runTriggerStage;
@@ -1,24 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.runVisitStage = void 0;
7
- const ora_1 = __importDefault(require("ora"));
8
- const runVisitStage = async (page, targetUrl, navigationTimeoutMs) => {
9
- const spinner = (0, ora_1.default)("Visiting the target site").start();
10
- try {
11
- const response = await page.goto(targetUrl, {
12
- waitUntil: "domcontentloaded",
13
- timeout: navigationTimeoutMs
14
- });
15
- await page.waitForSelector("body", { timeout: 15000 });
16
- spinner.succeed("Visited the target site");
17
- return response;
18
- }
19
- catch (error) {
20
- spinner.fail("Failed to visit the target site");
21
- throw error;
22
- }
23
- };
24
- exports.runVisitStage = runVisitStage;
package/dist/lib/types.js DELETED
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
package/dist/preload.js DELETED
@@ -1,60 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildPreloadScript = void 0;
4
- const hackers_1 = require("./lib/hackers");
5
- const buildPreloadScript = () => {
6
- const context = { stage: "preload" };
7
- const hackerScripts = hackers_1.preloadHackers
8
- .map((hacker) => ` // hacker:${hacker.id}\n${hacker.build(context)}`)
9
- .join("\n");
10
- return `
11
- (function () {
12
- if (window.__pagepocketPatched) {
13
- return;
14
- }
15
- Object.defineProperty(window, "__pagepocketPatched", { value: true });
16
-
17
- const records = [];
18
- window.__pagepocketRecords = records;
19
- window.__pagepocketPendingRequests = 0;
20
-
21
- const toAbsoluteUrl = (input) => {
22
- try {
23
- return new URL(input, window.location.href).toString();
24
- } catch {
25
- return input;
26
- }
27
- };
28
-
29
- const normalizeBody = (body) => {
30
- if (body === undefined || body === null) {
31
- return "";
32
- }
33
- if (typeof body === "string") {
34
- return body;
35
- }
36
- if (body instanceof ArrayBuffer) {
37
- try {
38
- return new TextDecoder().decode(body);
39
- } catch {
40
- return "";
41
- }
42
- }
43
- if (body instanceof Blob) {
44
- return "";
45
- }
46
- return String(body);
47
- };
48
-
49
- const trackPendingStart = () => {
50
- window.__pagepocketPendingRequests += 1;
51
- };
52
- const trackPendingEnd = () => {
53
- window.__pagepocketPendingRequests = Math.max(0, window.__pagepocketPendingRequests - 1);
54
- };
55
-
56
- ${hackerScripts}
57
- })();
58
- `;
59
- };
60
- exports.buildPreloadScript = buildPreloadScript;
@@ -1,14 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildSnapshotData = void 0;
4
- const buildSnapshotData = (input) => {
5
- return {
6
- url: input.targetUrl,
7
- title: input.title,
8
- capturedAt: new Date().toISOString(),
9
- fetchXhrRecords: input.fetchXhrRecords,
10
- networkRecords: input.capturedNetworkRecords,
11
- resources: input.resources
12
- };
13
- };
14
- exports.buildSnapshotData = buildSnapshotData;
@@ -1,30 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.buildSnapshotHtml = void 0;
7
- const node_path_1 = __importDefault(require("node:path"));
8
- const replay_script_1 = require("../lib/replay-script");
9
- const buildSnapshotHtml = (input) => {
10
- const replayScript = (0, replay_script_1.buildReplayScript)(node_path_1.default.basename(input.outputRequestsPath), input.targetUrl);
11
- const head = input.$("head");
12
- if (head.length) {
13
- head.prepend(replayScript);
14
- }
15
- else {
16
- input.$.root().prepend(replayScript);
17
- }
18
- if (input.faviconDataUrl) {
19
- const existingIcon = input.$('link[rel="icon"]');
20
- if (existingIcon.length) {
21
- existingIcon.attr("href", input.faviconDataUrl);
22
- }
23
- else {
24
- const link = '<link rel="icon" href="' + input.faviconDataUrl + '" />';
25
- head.length ? head.append(link) : input.$.root().append(link);
26
- }
27
- }
28
- return input.$.html();
29
- };
30
- exports.buildSnapshotHtml = buildSnapshotHtml;
@@ -1,19 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.captureNetwork = void 0;
4
- const lib_1 = require("@pagepocket/lib");
5
- const lighterceptor_1 = require("@pagepocket/lighterceptor");
6
- const captureNetwork = async (targetUrl, currentTitle) => {
7
- const result = await new lighterceptor_1.Lighterceptor(targetUrl, { recursion: true }).run();
8
- const capturedNetworkRecords = (result.networkRecords ?? []);
9
- const networkRecords = (0, lib_1.mapCapturedNetworkRecords)(capturedNetworkRecords);
10
- const capturedTitle = result.title;
11
- const title = currentTitle === "snapshot" && capturedTitle ? capturedTitle : currentTitle;
12
- return {
13
- networkRecords,
14
- capturedNetworkRecords,
15
- capturedTitle,
16
- title
17
- };
18
- };
19
- exports.captureNetwork = captureNetwork;
@@ -1,48 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.downloadResources = void 0;
7
- const node_path_1 = __importDefault(require("node:path"));
8
- const css_rewrite_1 = require("../lib/css-rewrite");
9
- const resources_1 = require("../lib/resources");
10
- const downloadResources = async (input) => {
11
- const dataUrlMap = (0, css_rewrite_1.buildDataUrlMap)(input.networkRecords);
12
- const { $, resourceUrls, srcsetItems } = (0, resources_1.extractResourceUrls)(input.html, input.targetUrl);
13
- const resourceMap = new Map();
14
- const resourceMeta = [];
15
- let downloadedCount = 0;
16
- let failedCount = 0;
17
- for (const resource of resourceUrls) {
18
- const url = resource.url;
19
- if (!url || resourceMap.has(url)) {
20
- continue;
21
- }
22
- try {
23
- const { filename, contentType, size, outputPath } = await (0, resources_1.downloadResource)(url, input.resourcesDir, input.targetUrl);
24
- if ((contentType && contentType.includes("text/css")) || outputPath.endsWith(".css")) {
25
- await (0, css_rewrite_1.rewriteCssUrls)(outputPath, url, dataUrlMap);
26
- }
27
- resourceMap.set(url, filename);
28
- resourceMeta.push({
29
- url,
30
- localPath: node_path_1.default.join(input.assetsDirName, filename),
31
- contentType,
32
- size
33
- });
34
- downloadedCount += 1;
35
- }
36
- catch {
37
- failedCount += 1;
38
- }
39
- }
40
- (0, resources_1.applyResourceMapToDom)($, resourceUrls, srcsetItems, input.targetUrl, resourceMap, input.assetsDirName);
41
- return {
42
- $,
43
- resourceMeta,
44
- downloadedCount,
45
- failedCount
46
- };
47
- };
48
- exports.downloadResources = downloadResources;
@@ -1,69 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __importDefault = (this && this.__importDefault) || function (mod) {
36
- return (mod && mod.__esModule) ? mod : { "default": mod };
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.fetchHtml = void 0;
40
- const cheerio = __importStar(require("cheerio"));
41
- const got_1 = __importDefault(require("got"));
42
- const fetchHtml = async (targetUrl, timeoutMs, headersOverride) => {
43
- const headers = {
44
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
45
- accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
46
- "accept-language": "en-US,en;q=0.9",
47
- referer: targetUrl,
48
- "Accept-Language": "zh-CN,zh;q=0.9",
49
- "Sec-Fetch-Site": "same-origin",
50
- "Sec-Fetch-Mode": "navigate",
51
- "Sec-Fetch-Dest": "document",
52
- ...headersOverride
53
- };
54
- const response = await (0, got_1.default)(targetUrl, {
55
- headers,
56
- followRedirect: true,
57
- throwHttpErrors: false,
58
- timeout: { request: timeoutMs }
59
- });
60
- if (response.statusCode < 200 || response.statusCode >= 300) {
61
- const statusText = response.statusMessage ? ` ${response.statusMessage}` : "";
62
- throw new Error(`HTTP ${response.statusCode}${statusText}`);
63
- }
64
- const html = response.body;
65
- const $initial = cheerio.load(html);
66
- const title = $initial("title").first().text() || "snapshot";
67
- return { html, title };
68
- };
69
- exports.fetchHtml = fetchHtml;