@pagepocket/cli 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,7 @@
1
1
  # @pagepocket/cli
2
2
 
3
- CLI for capturing offline snapshots of web pages. It fetches HTML, records network
4
- responses, downloads assets, rewrites links to local files, and injects a replay
5
- script so the snapshot works offline.
3
+ CLI for capturing offline snapshots of web pages using the PagePocket library and
4
+ NetworkInterceptorAdapter event streams.
6
5
 
7
6
  ## Install
8
7
 
@@ -19,18 +18,15 @@ pp https://example.com -o ./snapshots
19
18
 
20
19
  ## Output
21
20
 
22
- Snapshots are written to the current directory by default:
21
+ Snapshots are written to a folder named after the page title (or `snapshot`) inside
22
+ the output directory (default: current directory). Example layout:
23
23
 
24
- - `*.html`: offline snapshot page
25
- - `*.requests.json`: recorded requests/responses
26
- - `*_files/`: downloaded assets
27
-
28
- ## Configuration
29
-
30
- Environment variables:
31
-
32
- - `PAGEPOCKET_FETCH_TIMEOUT_MS` (default: `60000`)
33
- - `PAGEPOCKET_FETCH_HEADERS` (JSON string of extra headers)
24
+ ```
25
+ <output>/<title>/index.html
26
+ <output>/<title>/api.json
27
+ <output>/<title>/<same-origin paths>
28
+ <output>/<title>/external_resources/<cross-origin paths>
29
+ ```
34
30
 
35
31
  ## Development
36
32
 
package/dist/cli.js CHANGED
@@ -3,116 +3,41 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const node_path_1 = __importDefault(require("node:path"));
7
6
  const core_1 = require("@oclif/core");
8
- const chalk_1 = __importDefault(require("chalk"));
9
7
  const lib_1 = require("@pagepocket/lib");
10
- const with_spinner_1 = require("./utils/with-spinner");
11
- const build_snapshot_data_1 = require("./stages/build-snapshot-data");
12
- const capture_network_1 = require("./stages/capture-network");
13
- const fetch_html_1 = require("./stages/fetch-html");
8
+ const lighterceptor_adapter_1 = require("@pagepocket/lighterceptor-adapter");
9
+ const chalk_1 = __importDefault(require("chalk"));
14
10
  const prepare_output_1 = require("./stages/prepare-output");
15
11
  const write_snapshot_1 = require("./stages/write-snapshot");
12
+ const with_spinner_1 = require("./utils/with-spinner");
16
13
  class PagepocketCommand extends core_1.Command {
17
14
  async run() {
18
15
  const { args, flags } = await this.parse(PagepocketCommand);
19
16
  const targetUrl = args.url;
20
17
  const outputFlag = flags.output ? flags.output.trim() : undefined;
21
- const fetchTimeoutMs = Number(process.env.PAGEPOCKET_FETCH_TIMEOUT_MS || "60000");
22
- const headersOverride = (() => {
23
- const raw = process.env.PAGEPOCKET_FETCH_HEADERS;
24
- if (!raw) {
25
- return undefined;
26
- }
27
- try {
28
- const parsed = JSON.parse(raw);
29
- const headers = {};
30
- for (const [key, value] of Object.entries(parsed)) {
31
- if (value === undefined || value === null) {
32
- continue;
33
- }
34
- headers[key] = String(value);
35
- }
36
- return headers;
37
- }
38
- catch {
39
- throw new Error("Invalid PAGEPOCKET_FETCH_HEADERS JSON.");
40
- }
41
- })();
42
- const fetchXhrRecords = [];
43
- const fetched = await (0, with_spinner_1.withSpinner)(async () => (0, fetch_html_1.fetchHtml)(targetUrl, fetchTimeoutMs, headersOverride), "Fetching the target HTML");
44
- const networkStage = await (async () => {
45
- try {
46
- return await (0, with_spinner_1.withSpinner)(async () => (0, capture_network_1.captureNetwork)(targetUrl, fetched.title), "Capturing network requests with lighterceptor");
47
- }
48
- catch {
49
- return {
50
- networkRecords: [],
51
- lighterceptorNetworkRecords: [],
52
- capturedTitle: undefined,
53
- title: fetched.title
54
- };
55
- }
56
- })();
57
- const outputPaths = await (0, with_spinner_1.withSpinner)(async () => (0, prepare_output_1.prepareOutputPaths)(networkStage.title, outputFlag), "Preparing output paths");
58
- const downloadStage = await (0, with_spinner_1.withSpinner)(async () => {
59
- const originalCwd = process.cwd();
60
- const shouldRestoreCwd = outputPaths.baseDir !== originalCwd;
61
- try {
62
- if (shouldRestoreCwd) {
63
- process.chdir(outputPaths.baseDir);
64
- }
65
- const seedSnapshot = {
66
- url: targetUrl,
67
- title: networkStage.title,
68
- capturedAt: new Date().toISOString(),
69
- fetchXhrRecords,
70
- networkRecords: networkStage.lighterceptorNetworkRecords,
71
- resources: []
72
- };
73
- const pagepocket = new lib_1.PagePocket(fetched.html, seedSnapshot, {
74
- assetsDirName: outputPaths.assetsDirName,
75
- baseUrl: targetUrl,
76
- requestsPath: node_path_1.default.basename(outputPaths.outputRequestsPath)
77
- });
78
- const snapshotHtml = await pagepocket.put();
79
- return {
80
- snapshotHtml,
81
- resourceMeta: pagepocket.resources,
82
- downloadedCount: pagepocket.downloadedCount,
83
- failedCount: pagepocket.failedCount
84
- };
85
- }
86
- finally {
87
- if (shouldRestoreCwd) {
88
- try {
89
- process.chdir(originalCwd);
90
- }
91
- catch {
92
- // Ignore restore errors to preserve original failure.
93
- }
94
- }
95
- }
96
- }, "Downloading resources");
97
- const snapshotData = await (0, with_spinner_1.withSpinner)(async () => (0, build_snapshot_data_1.buildSnapshotData)({
98
- targetUrl,
99
- title: networkStage.title,
100
- fetchXhrRecords,
101
- lighterceptorNetworkRecords: networkStage.lighterceptorNetworkRecords,
102
- resources: downloadStage.resourceMeta
103
- }), "Preparing snapshot HTML");
18
+ const headers = {
19
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
20
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
21
+ "accept-language": "en-US,en;q=0.9",
22
+ referer: targetUrl
23
+ };
24
+ const snapshot = await (0, with_spinner_1.withSpinner)(async () => {
25
+ const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ headers });
26
+ const pagepocket = lib_1.PagePocket.fromURL(targetUrl);
27
+ return pagepocket.capture({
28
+ interceptor,
29
+ completion: { wait: async () => { } }
30
+ });
31
+ }, "Capturing snapshot");
32
+ const { outputDir } = await (0, with_spinner_1.withSpinner)(() => (0, prepare_output_1.prepareOutputDir)(snapshot.title ?? "snapshot", outputFlag), "Preparing output directory");
104
33
  await (0, with_spinner_1.withSpinner)(async () => {
105
34
  await (0, write_snapshot_1.writeSnapshotFiles)({
106
- outputRequestsPath: outputPaths.outputRequestsPath,
107
- outputHtmlPath: outputPaths.outputHtmlPath,
108
- snapshotData,
109
- snapshotHtml: downloadStage.snapshotHtml
35
+ outputDir,
36
+ snapshot
110
37
  });
111
38
  }, "Writing snapshot files");
112
39
  this.log(chalk_1.default.green("All done! Snapshot created."));
113
- this.log(`HTML saved to ${chalk_1.default.cyan(outputPaths.outputHtmlPath)}`);
114
- this.log(`Requests saved to ${chalk_1.default.cyan(outputPaths.outputRequestsPath)}`);
115
- this.log(`Resources saved to ${chalk_1.default.cyan(outputPaths.resourcesDir)}`);
40
+ this.log(`Snapshot saved to ${chalk_1.default.cyan(outputDir)}`);
116
41
  process.exit();
117
42
  }
118
43
  }
@@ -7,7 +7,7 @@ const buildSnapshotData = (input) => {
7
7
  title: input.title,
8
8
  capturedAt: new Date().toISOString(),
9
9
  fetchXhrRecords: input.fetchXhrRecords,
10
- networkRecords: input.lighterceptorNetworkRecords,
10
+ networkRecords: input.capturedNetworkRecords,
11
11
  resources: input.resources
12
12
  };
13
13
  };
@@ -1,17 +1,17 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.captureNetwork = void 0;
4
- const lighterceptor_1 = require("lighterceptor");
5
4
  const lib_1 = require("@pagepocket/lib");
5
+ const lighterceptor_1 = require("@pagepocket/lighterceptor");
6
6
  const captureNetwork = async (targetUrl, currentTitle) => {
7
7
  const result = await new lighterceptor_1.Lighterceptor(targetUrl, { recursion: true }).run();
8
- const lighterceptorNetworkRecords = (result.networkRecords ?? []);
9
- const networkRecords = (0, lib_1.mapLighterceptorRecords)(lighterceptorNetworkRecords);
8
+ const capturedNetworkRecords = (result.networkRecords ?? []);
9
+ const networkRecords = (0, lib_1.mapCapturedNetworkRecords)(capturedNetworkRecords);
10
10
  const capturedTitle = result.title;
11
11
  const title = currentTitle === "snapshot" && capturedTitle ? capturedTitle : currentTitle;
12
12
  return {
13
13
  networkRecords,
14
- lighterceptorNetworkRecords,
14
+ capturedNetworkRecords,
15
15
  capturedTitle,
16
16
  title
17
17
  };
@@ -3,25 +3,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.prepareOutputPaths = void 0;
6
+ exports.prepareOutputDir = void 0;
7
7
  const promises_1 = __importDefault(require("node:fs/promises"));
8
8
  const node_path_1 = __importDefault(require("node:path"));
9
9
  const filename_1 = require("../lib/filename");
10
- const prepareOutputPaths = async (title, outputFlag) => {
10
+ const prepareOutputDir = async (title, outputFlag) => {
11
11
  const safeTitle = (0, filename_1.safeFilename)(title || "snapshot");
12
12
  const baseDir = outputFlag ? node_path_1.default.resolve(outputFlag) : process.cwd();
13
- const outputHtmlPath = node_path_1.default.join(baseDir, `${safeTitle}.html`);
14
- const outputRequestsPath = node_path_1.default.join(baseDir, `${safeTitle}.requests.json`);
15
- const assetsDirName = `${safeTitle}_files`;
16
- const resourcesDir = node_path_1.default.join(baseDir, assetsDirName);
17
- await promises_1.default.mkdir(resourcesDir, { recursive: true });
13
+ const outputDir = node_path_1.default.join(baseDir, safeTitle);
14
+ await promises_1.default.mkdir(outputDir, { recursive: true });
18
15
  return {
19
16
  safeTitle,
20
- baseDir,
21
- outputHtmlPath,
22
- outputRequestsPath,
23
- assetsDirName,
24
- resourcesDir
17
+ outputDir
25
18
  };
26
19
  };
27
- exports.prepareOutputPaths = prepareOutputPaths;
20
+ exports.prepareOutputDir = prepareOutputDir;
@@ -1,12 +1,7 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.writeSnapshotFiles = void 0;
7
- const promises_1 = __importDefault(require("node:fs/promises"));
8
4
  const writeSnapshotFiles = async (input) => {
9
- await promises_1.default.writeFile(input.outputRequestsPath, JSON.stringify(input.snapshotData, null, 2), "utf-8");
10
- await promises_1.default.writeFile(input.outputHtmlPath, input.snapshotHtml, "utf-8");
5
+ await input.snapshot.toDirectory(input.outputDir);
11
6
  };
12
7
  exports.writeSnapshotFiles = writeSnapshotFiles;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pagepocket/cli",
3
- "version": "0.4.1",
3
+ "version": "0.5.0",
4
4
  "description": "CLI for capturing offline snapshots of web pages.",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -14,16 +14,13 @@
14
14
  "license": "ISC",
15
15
  "dependencies": {
16
16
  "@oclif/core": "^4.0.9",
17
- "cheerio": "^1.0.0-rc.12",
18
17
  "chalk": "^4.1.2",
19
- "got": "^11.8.6",
20
18
  "ora": "^9.0.0",
21
- "lighterceptor": "npm:@pagepocket/lighterceptor@0.4.1",
22
- "@pagepocket/lib": "0.4.1"
19
+ "@pagepocket/lib": "0.5.0",
20
+ "@pagepocket/lighterceptor-adapter": "0.5.0"
23
21
  },
24
22
  "devDependencies": {
25
23
  "@types/node": "^20.11.30",
26
- "prettier": "^3.7.4",
27
24
  "tsx": "^4.19.3",
28
25
  "typescript": "^5.4.5"
29
26
  },
@@ -36,8 +33,6 @@
36
33
  },
37
34
  "scripts": {
38
35
  "build": "tsc",
39
- "format": "prettier --write .",
40
- "format:check": "prettier --check .",
41
36
  "start": "node dist/index.js",
42
37
  "test": "tsx --test specs/**/*.test.ts"
43
38
  }