@pagepocket/cli 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -14
- package/dist/cli.js +18 -69
- package/dist/stages/prepare-output.js +6 -13
- package/dist/stages/write-snapshot.js +1 -6
- package/package.json +3 -6
package/README.md
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# @pagepocket/cli
|
|
2
2
|
|
|
3
|
-
CLI for capturing offline snapshots of web pages
|
|
4
|
-
|
|
5
|
-
script so the snapshot works offline.
|
|
3
|
+
CLI for capturing offline snapshots of web pages using the PagePocket library and
|
|
4
|
+
NetworkInterceptorAdapter event streams.
|
|
6
5
|
|
|
7
6
|
## Install
|
|
8
7
|
|
|
@@ -19,18 +18,15 @@ pp https://example.com -o ./snapshots
|
|
|
19
18
|
|
|
20
19
|
## Output
|
|
21
20
|
|
|
22
|
-
Snapshots are written to the
|
|
21
|
+
Snapshots are written to a folder named after the page title (or `snapshot`) inside
|
|
22
|
+
the output directory (default: current directory). Example layout:
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Environment variables:
|
|
31
|
-
|
|
32
|
-
- `PAGEPOCKET_FETCH_TIMEOUT_MS` (default: `60000`)
|
|
33
|
-
- `PAGEPOCKET_FETCH_HEADERS` (JSON string of extra headers)
|
|
24
|
+
```
|
|
25
|
+
<output>/<title>/index.html
|
|
26
|
+
<output>/<title>/api.json
|
|
27
|
+
<output>/<title>/<same-origin paths>
|
|
28
|
+
<output>/<title>/external_resources/<cross-origin paths>
|
|
29
|
+
```
|
|
34
30
|
|
|
35
31
|
## Development
|
|
36
32
|
|
package/dist/cli.js
CHANGED
|
@@ -3,12 +3,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const node_path_1 = __importDefault(require("node:path"));
|
|
7
6
|
const core_1 = require("@oclif/core");
|
|
8
7
|
const lib_1 = require("@pagepocket/lib");
|
|
9
8
|
const lighterceptor_adapter_1 = require("@pagepocket/lighterceptor-adapter");
|
|
10
9
|
const chalk_1 = __importDefault(require("chalk"));
|
|
11
|
-
const fetch_html_1 = require("./stages/fetch-html");
|
|
12
10
|
const prepare_output_1 = require("./stages/prepare-output");
|
|
13
11
|
const write_snapshot_1 = require("./stages/write-snapshot");
|
|
14
12
|
const with_spinner_1 = require("./utils/with-spinner");
|
|
@@ -17,78 +15,29 @@ class PagepocketCommand extends core_1.Command {
|
|
|
17
15
|
const { args, flags } = await this.parse(PagepocketCommand);
|
|
18
16
|
const targetUrl = args.url;
|
|
19
17
|
const outputFlag = flags.output ? flags.output.trim() : undefined;
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return headers;
|
|
36
|
-
}
|
|
37
|
-
catch {
|
|
38
|
-
throw new Error("Invalid PAGEPOCKET_FETCH_HEADERS JSON.");
|
|
39
|
-
}
|
|
40
|
-
})();
|
|
41
|
-
const fetched = await (0, with_spinner_1.withSpinner)(async () => (0, fetch_html_1.fetchHtml)(targetUrl, fetchTimeoutMs, headersOverride), "Fetching the target HTML");
|
|
42
|
-
const { outputPaths, snapshotData, snapshotHtml } = await (0, with_spinner_1.withSpinner)(async () => {
|
|
43
|
-
const originalCwd = process.cwd();
|
|
44
|
-
const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ title: fetched.title });
|
|
45
|
-
const snapshotSeed = await interceptor.run(targetUrl);
|
|
46
|
-
const outputPaths = await (0, prepare_output_1.prepareOutputPaths)(snapshotSeed.title, outputFlag);
|
|
47
|
-
const shouldRestoreCwd = outputPaths.baseDir !== originalCwd;
|
|
48
|
-
try {
|
|
49
|
-
if (shouldRestoreCwd) {
|
|
50
|
-
process.chdir(outputPaths.baseDir);
|
|
51
|
-
}
|
|
52
|
-
const pagepocket = new lib_1.PagePocket(fetched.html, snapshotSeed, {
|
|
53
|
-
baseUrl: targetUrl,
|
|
54
|
-
assetsDirName: outputPaths.assetsDirName,
|
|
55
|
-
requestsPath: node_path_1.default.basename(outputPaths.outputRequestsPath)
|
|
56
|
-
});
|
|
57
|
-
const pageData = await pagepocket.put();
|
|
58
|
-
const snapshotData = {
|
|
59
|
-
...snapshotSeed,
|
|
60
|
-
title: pageData.title,
|
|
61
|
-
resources: pagepocket.resources
|
|
62
|
-
};
|
|
63
|
-
return {
|
|
64
|
-
outputPaths,
|
|
65
|
-
snapshotData,
|
|
66
|
-
snapshotHtml: pageData.content
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
finally {
|
|
70
|
-
if (shouldRestoreCwd) {
|
|
71
|
-
try {
|
|
72
|
-
process.chdir(originalCwd);
|
|
73
|
-
}
|
|
74
|
-
catch {
|
|
75
|
-
// Ignore restore errors to preserve original failure.
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
}, "Downloading resources");
|
|
18
|
+
const headers = {
|
|
19
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
20
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
21
|
+
"accept-language": "en-US,en;q=0.9",
|
|
22
|
+
referer: targetUrl
|
|
23
|
+
};
|
|
24
|
+
const snapshot = await (0, with_spinner_1.withSpinner)(async () => {
|
|
25
|
+
const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ headers });
|
|
26
|
+
const pagepocket = lib_1.PagePocket.fromURL(targetUrl);
|
|
27
|
+
return pagepocket.capture({
|
|
28
|
+
interceptor,
|
|
29
|
+
completion: { wait: async () => { } }
|
|
30
|
+
});
|
|
31
|
+
}, "Capturing snapshot");
|
|
32
|
+
const { outputDir } = await (0, with_spinner_1.withSpinner)(() => (0, prepare_output_1.prepareOutputDir)(snapshot.title ?? "snapshot", outputFlag), "Preparing output directory");
|
|
80
33
|
await (0, with_spinner_1.withSpinner)(async () => {
|
|
81
34
|
await (0, write_snapshot_1.writeSnapshotFiles)({
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
snapshotData,
|
|
85
|
-
snapshotHtml: snapshotHtml
|
|
35
|
+
outputDir,
|
|
36
|
+
snapshot
|
|
86
37
|
});
|
|
87
38
|
}, "Writing snapshot files");
|
|
88
39
|
this.log(chalk_1.default.green("All done! Snapshot created."));
|
|
89
|
-
this.log(`
|
|
90
|
-
this.log(`Requests saved to ${chalk_1.default.cyan(outputPaths.outputRequestsPath)}`);
|
|
91
|
-
this.log(`Resources saved to ${chalk_1.default.cyan(outputPaths.resourcesDir)}`);
|
|
40
|
+
this.log(`Snapshot saved to ${chalk_1.default.cyan(outputDir)}`);
|
|
92
41
|
process.exit();
|
|
93
42
|
}
|
|
94
43
|
}
|
|
@@ -3,25 +3,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
6
|
+
exports.prepareOutputDir = void 0;
|
|
7
7
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
8
|
const node_path_1 = __importDefault(require("node:path"));
|
|
9
9
|
const filename_1 = require("../lib/filename");
|
|
10
|
-
const
|
|
10
|
+
const prepareOutputDir = async (title, outputFlag) => {
|
|
11
11
|
const safeTitle = (0, filename_1.safeFilename)(title || "snapshot");
|
|
12
12
|
const baseDir = outputFlag ? node_path_1.default.resolve(outputFlag) : process.cwd();
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
const assetsDirName = `${safeTitle}_files`;
|
|
16
|
-
const resourcesDir = node_path_1.default.join(baseDir, assetsDirName);
|
|
17
|
-
await promises_1.default.mkdir(resourcesDir, { recursive: true });
|
|
13
|
+
const outputDir = node_path_1.default.join(baseDir, safeTitle);
|
|
14
|
+
await promises_1.default.mkdir(outputDir, { recursive: true });
|
|
18
15
|
return {
|
|
19
16
|
safeTitle,
|
|
20
|
-
|
|
21
|
-
outputHtmlPath,
|
|
22
|
-
outputRequestsPath,
|
|
23
|
-
assetsDirName,
|
|
24
|
-
resourcesDir
|
|
17
|
+
outputDir
|
|
25
18
|
};
|
|
26
19
|
};
|
|
27
|
-
exports.
|
|
20
|
+
exports.prepareOutputDir = prepareOutputDir;
|
|
@@ -1,12 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.writeSnapshotFiles = void 0;
|
|
7
|
-
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
4
|
const writeSnapshotFiles = async (input) => {
|
|
9
|
-
await
|
|
10
|
-
await promises_1.default.writeFile(input.outputHtmlPath, input.snapshotHtml, "utf-8");
|
|
5
|
+
await input.snapshot.toDirectory(input.outputDir);
|
|
11
6
|
};
|
|
12
7
|
exports.writeSnapshotFiles = writeSnapshotFiles;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "CLI for capturing offline snapshots of web pages.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@oclif/core": "^4.0.9",
|
|
17
17
|
"chalk": "^4.1.2",
|
|
18
|
-
"cheerio": "^1.0.0-rc.12",
|
|
19
|
-
"got": "^11.8.6",
|
|
20
18
|
"ora": "^9.0.0",
|
|
21
|
-
"@pagepocket/
|
|
22
|
-
"@pagepocket/
|
|
23
|
-
"@pagepocket/lighterceptor-adapter": "0.4.2"
|
|
19
|
+
"@pagepocket/lib": "0.5.0",
|
|
20
|
+
"@pagepocket/lighterceptor-adapter": "0.5.0"
|
|
24
21
|
},
|
|
25
22
|
"devDependencies": {
|
|
26
23
|
"@types/node": "^20.11.30",
|