@pagepocket/cli 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -14
- package/dist/cli.js +21 -96
- package/dist/stages/build-snapshot-data.js +1 -1
- package/dist/stages/capture-network.js +4 -4
- package/dist/stages/prepare-output.js +6 -13
- package/dist/stages/write-snapshot.js +1 -6
- package/package.json +3 -8
package/README.md
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# @pagepocket/cli
|
|
2
2
|
|
|
3
|
-
CLI for capturing offline snapshots of web pages
|
|
4
|
-
|
|
5
|
-
script so the snapshot works offline.
|
|
3
|
+
CLI for capturing offline snapshots of web pages using the PagePocket library and
|
|
4
|
+
NetworkInterceptorAdapter event streams.
|
|
6
5
|
|
|
7
6
|
## Install
|
|
8
7
|
|
|
@@ -19,18 +18,15 @@ pp https://example.com -o ./snapshots
|
|
|
19
18
|
|
|
20
19
|
## Output
|
|
21
20
|
|
|
22
|
-
Snapshots are written to the
|
|
21
|
+
Snapshots are written to a folder named after the page title (or `snapshot`) inside
|
|
22
|
+
the output directory (default: current directory). Example layout:
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Environment variables:
|
|
31
|
-
|
|
32
|
-
- `PAGEPOCKET_FETCH_TIMEOUT_MS` (default: `60000`)
|
|
33
|
-
- `PAGEPOCKET_FETCH_HEADERS` (JSON string of extra headers)
|
|
24
|
+
```
|
|
25
|
+
<output>/<title>/index.html
|
|
26
|
+
<output>/<title>/api.json
|
|
27
|
+
<output>/<title>/<same-origin paths>
|
|
28
|
+
<output>/<title>/external_resources/<cross-origin paths>
|
|
29
|
+
```
|
|
34
30
|
|
|
35
31
|
## Development
|
|
36
32
|
|
package/dist/cli.js
CHANGED
|
@@ -3,116 +3,41 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
7
6
|
const core_1 = require("@oclif/core");
|
|
8
|
-
const chalk_1 = __importDefault(require("chalk"));
|
|
9
7
|
const lib_1 = require("@pagepocket/lib");
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const capture_network_1 = require("./stages/capture-network");
|
|
13
|
-
const fetch_html_1 = require("./stages/fetch-html");
|
|
8
|
+
const lighterceptor_adapter_1 = require("@pagepocket/lighterceptor-adapter");
|
|
9
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
14
10
|
const prepare_output_1 = require("./stages/prepare-output");
|
|
15
11
|
const write_snapshot_1 = require("./stages/write-snapshot");
|
|
12
|
+
const with_spinner_1 = require("./utils/with-spinner");
|
|
16
13
|
class PagepocketCommand extends core_1.Command {
|
|
17
14
|
async run() {
|
|
18
15
|
const { args, flags } = await this.parse(PagepocketCommand);
|
|
19
16
|
const targetUrl = args.url;
|
|
20
17
|
const outputFlag = flags.output ? flags.output.trim() : undefined;
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
return headers;
|
|
37
|
-
}
|
|
38
|
-
catch {
|
|
39
|
-
throw new Error("Invalid PAGEPOCKET_FETCH_HEADERS JSON.");
|
|
40
|
-
}
|
|
41
|
-
})();
|
|
42
|
-
const fetchXhrRecords = [];
|
|
43
|
-
const fetched = await (0, with_spinner_1.withSpinner)(async () => (0, fetch_html_1.fetchHtml)(targetUrl, fetchTimeoutMs, headersOverride), "Fetching the target HTML");
|
|
44
|
-
const networkStage = await (async () => {
|
|
45
|
-
try {
|
|
46
|
-
return await (0, with_spinner_1.withSpinner)(async () => (0, capture_network_1.captureNetwork)(targetUrl, fetched.title), "Capturing network requests with lighterceptor");
|
|
47
|
-
}
|
|
48
|
-
catch {
|
|
49
|
-
return {
|
|
50
|
-
networkRecords: [],
|
|
51
|
-
lighterceptorNetworkRecords: [],
|
|
52
|
-
capturedTitle: undefined,
|
|
53
|
-
title: fetched.title
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
})();
|
|
57
|
-
const outputPaths = await (0, with_spinner_1.withSpinner)(async () => (0, prepare_output_1.prepareOutputPaths)(networkStage.title, outputFlag), "Preparing output paths");
|
|
58
|
-
const downloadStage = await (0, with_spinner_1.withSpinner)(async () => {
|
|
59
|
-
const originalCwd = process.cwd();
|
|
60
|
-
const shouldRestoreCwd = outputPaths.baseDir !== originalCwd;
|
|
61
|
-
try {
|
|
62
|
-
if (shouldRestoreCwd) {
|
|
63
|
-
process.chdir(outputPaths.baseDir);
|
|
64
|
-
}
|
|
65
|
-
const seedSnapshot = {
|
|
66
|
-
url: targetUrl,
|
|
67
|
-
title: networkStage.title,
|
|
68
|
-
capturedAt: new Date().toISOString(),
|
|
69
|
-
fetchXhrRecords,
|
|
70
|
-
networkRecords: networkStage.lighterceptorNetworkRecords,
|
|
71
|
-
resources: []
|
|
72
|
-
};
|
|
73
|
-
const pagepocket = new lib_1.PagePocket(fetched.html, seedSnapshot, {
|
|
74
|
-
assetsDirName: outputPaths.assetsDirName,
|
|
75
|
-
baseUrl: targetUrl,
|
|
76
|
-
requestsPath: node_path_1.default.basename(outputPaths.outputRequestsPath)
|
|
77
|
-
});
|
|
78
|
-
const snapshotHtml = await pagepocket.put();
|
|
79
|
-
return {
|
|
80
|
-
snapshotHtml,
|
|
81
|
-
resourceMeta: pagepocket.resources,
|
|
82
|
-
downloadedCount: pagepocket.downloadedCount,
|
|
83
|
-
failedCount: pagepocket.failedCount
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
finally {
|
|
87
|
-
if (shouldRestoreCwd) {
|
|
88
|
-
try {
|
|
89
|
-
process.chdir(originalCwd);
|
|
90
|
-
}
|
|
91
|
-
catch {
|
|
92
|
-
// Ignore restore errors to preserve original failure.
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}, "Downloading resources");
|
|
97
|
-
const snapshotData = await (0, with_spinner_1.withSpinner)(async () => (0, build_snapshot_data_1.buildSnapshotData)({
|
|
98
|
-
targetUrl,
|
|
99
|
-
title: networkStage.title,
|
|
100
|
-
fetchXhrRecords,
|
|
101
|
-
lighterceptorNetworkRecords: networkStage.lighterceptorNetworkRecords,
|
|
102
|
-
resources: downloadStage.resourceMeta
|
|
103
|
-
}), "Preparing snapshot HTML");
|
|
18
|
+
const headers = {
|
|
19
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
20
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
21
|
+
"accept-language": "en-US,en;q=0.9",
|
|
22
|
+
referer: targetUrl
|
|
23
|
+
};
|
|
24
|
+
const snapshot = await (0, with_spinner_1.withSpinner)(async () => {
|
|
25
|
+
const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ headers });
|
|
26
|
+
const pagepocket = lib_1.PagePocket.fromURL(targetUrl);
|
|
27
|
+
return pagepocket.capture({
|
|
28
|
+
interceptor,
|
|
29
|
+
completion: { wait: async () => { } }
|
|
30
|
+
});
|
|
31
|
+
}, "Capturing snapshot");
|
|
32
|
+
const { outputDir } = await (0, with_spinner_1.withSpinner)(() => (0, prepare_output_1.prepareOutputDir)(snapshot.title ?? "snapshot", outputFlag), "Preparing output directory");
|
|
104
33
|
await (0, with_spinner_1.withSpinner)(async () => {
|
|
105
34
|
await (0, write_snapshot_1.writeSnapshotFiles)({
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
snapshotData,
|
|
109
|
-
snapshotHtml: downloadStage.snapshotHtml
|
|
35
|
+
outputDir,
|
|
36
|
+
snapshot
|
|
110
37
|
});
|
|
111
38
|
}, "Writing snapshot files");
|
|
112
39
|
this.log(chalk_1.default.green("All done! Snapshot created."));
|
|
113
|
-
this.log(`
|
|
114
|
-
this.log(`Requests saved to ${chalk_1.default.cyan(outputPaths.outputRequestsPath)}`);
|
|
115
|
-
this.log(`Resources saved to ${chalk_1.default.cyan(outputPaths.resourcesDir)}`);
|
|
40
|
+
this.log(`Snapshot saved to ${chalk_1.default.cyan(outputDir)}`);
|
|
116
41
|
process.exit();
|
|
117
42
|
}
|
|
118
43
|
}
|
|
@@ -7,7 +7,7 @@ const buildSnapshotData = (input) => {
|
|
|
7
7
|
title: input.title,
|
|
8
8
|
capturedAt: new Date().toISOString(),
|
|
9
9
|
fetchXhrRecords: input.fetchXhrRecords,
|
|
10
|
-
networkRecords: input.
|
|
10
|
+
networkRecords: input.capturedNetworkRecords,
|
|
11
11
|
resources: input.resources
|
|
12
12
|
};
|
|
13
13
|
};
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.captureNetwork = void 0;
|
|
4
|
-
const lighterceptor_1 = require("lighterceptor");
|
|
5
4
|
const lib_1 = require("@pagepocket/lib");
|
|
5
|
+
const lighterceptor_1 = require("@pagepocket/lighterceptor");
|
|
6
6
|
const captureNetwork = async (targetUrl, currentTitle) => {
|
|
7
7
|
const result = await new lighterceptor_1.Lighterceptor(targetUrl, { recursion: true }).run();
|
|
8
|
-
const
|
|
9
|
-
const networkRecords = (0, lib_1.
|
|
8
|
+
const capturedNetworkRecords = (result.networkRecords ?? []);
|
|
9
|
+
const networkRecords = (0, lib_1.mapCapturedNetworkRecords)(capturedNetworkRecords);
|
|
10
10
|
const capturedTitle = result.title;
|
|
11
11
|
const title = currentTitle === "snapshot" && capturedTitle ? capturedTitle : currentTitle;
|
|
12
12
|
return {
|
|
13
13
|
networkRecords,
|
|
14
|
-
|
|
14
|
+
capturedNetworkRecords,
|
|
15
15
|
capturedTitle,
|
|
16
16
|
title
|
|
17
17
|
};
|
|
@@ -3,25 +3,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
6
|
+
exports.prepareOutputDir = void 0;
|
|
7
7
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
8
|
const node_path_1 = __importDefault(require("node:path"));
|
|
9
9
|
const filename_1 = require("../lib/filename");
|
|
10
|
-
const
|
|
10
|
+
const prepareOutputDir = async (title, outputFlag) => {
|
|
11
11
|
const safeTitle = (0, filename_1.safeFilename)(title || "snapshot");
|
|
12
12
|
const baseDir = outputFlag ? node_path_1.default.resolve(outputFlag) : process.cwd();
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
const assetsDirName = `${safeTitle}_files`;
|
|
16
|
-
const resourcesDir = node_path_1.default.join(baseDir, assetsDirName);
|
|
17
|
-
await promises_1.default.mkdir(resourcesDir, { recursive: true });
|
|
13
|
+
const outputDir = node_path_1.default.join(baseDir, safeTitle);
|
|
14
|
+
await promises_1.default.mkdir(outputDir, { recursive: true });
|
|
18
15
|
return {
|
|
19
16
|
safeTitle,
|
|
20
|
-
|
|
21
|
-
outputHtmlPath,
|
|
22
|
-
outputRequestsPath,
|
|
23
|
-
assetsDirName,
|
|
24
|
-
resourcesDir
|
|
17
|
+
outputDir
|
|
25
18
|
};
|
|
26
19
|
};
|
|
27
|
-
exports.
|
|
20
|
+
exports.prepareOutputDir = prepareOutputDir;
|
|
@@ -1,12 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.writeSnapshotFiles = void 0;
|
|
7
|
-
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
4
|
const writeSnapshotFiles = async (input) => {
|
|
9
|
-
await
|
|
10
|
-
await promises_1.default.writeFile(input.outputHtmlPath, input.snapshotHtml, "utf-8");
|
|
5
|
+
await input.snapshot.toDirectory(input.outputDir);
|
|
11
6
|
};
|
|
12
7
|
exports.writeSnapshotFiles = writeSnapshotFiles;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "CLI for capturing offline snapshots of web pages.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -14,16 +14,13 @@
|
|
|
14
14
|
"license": "ISC",
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@oclif/core": "^4.0.9",
|
|
17
|
-
"cheerio": "^1.0.0-rc.12",
|
|
18
17
|
"chalk": "^4.1.2",
|
|
19
|
-
"got": "^11.8.6",
|
|
20
18
|
"ora": "^9.0.0",
|
|
21
|
-
"
|
|
22
|
-
"@pagepocket/
|
|
19
|
+
"@pagepocket/lib": "0.5.0",
|
|
20
|
+
"@pagepocket/lighterceptor-adapter": "0.5.0"
|
|
23
21
|
},
|
|
24
22
|
"devDependencies": {
|
|
25
23
|
"@types/node": "^20.11.30",
|
|
26
|
-
"prettier": "^3.7.4",
|
|
27
24
|
"tsx": "^4.19.3",
|
|
28
25
|
"typescript": "^5.4.5"
|
|
29
26
|
},
|
|
@@ -36,8 +33,6 @@
|
|
|
36
33
|
},
|
|
37
34
|
"scripts": {
|
|
38
35
|
"build": "tsc",
|
|
39
|
-
"format": "prettier --write .",
|
|
40
|
-
"format:check": "prettier --check .",
|
|
41
36
|
"start": "node dist/index.js",
|
|
42
37
|
"test": "tsx --test specs/**/*.test.ts"
|
|
43
38
|
}
|