@pagepocket/cli 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,7 @@
1
1
  # @pagepocket/cli
2
2
 
3
- CLI for capturing offline snapshots of web pages. It fetches HTML, records network
4
- responses, downloads assets, rewrites links to local files, and injects a replay
5
- script so the snapshot works offline.
3
+ CLI for capturing offline snapshots of web pages using the PagePocket library and
4
+ NetworkInterceptorAdapter event streams.
6
5
 
7
6
  ## Install
8
7
 
@@ -19,18 +18,15 @@ pp https://example.com -o ./snapshots
19
18
 
20
19
  ## Output
21
20
 
22
- Snapshots are written to the current directory by default:
21
+ Snapshots are written to a folder named after the page title (or `snapshot`) inside
22
+ the output directory (default: current directory). Example layout:
23
23
 
24
- - `*.html`: offline snapshot page
25
- - `*.requests.json`: recorded requests/responses
26
- - `*_files/`: downloaded assets
27
-
28
- ## Configuration
29
-
30
- Environment variables:
31
-
32
- - `PAGEPOCKET_FETCH_TIMEOUT_MS` (default: `60000`)
33
- - `PAGEPOCKET_FETCH_HEADERS` (JSON string of extra headers)
24
+ ```
25
+ <output>/<title>/index.html
26
+ <output>/<title>/api.json
27
+ <output>/<title>/<same-origin paths>
28
+ <output>/<title>/external_resources/<cross-origin paths>
29
+ ```
34
30
 
35
31
  ## Development
36
32
 
package/dist/cli.js CHANGED
@@ -3,12 +3,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const node_path_1 = __importDefault(require("node:path"));
7
6
  const core_1 = require("@oclif/core");
8
7
  const lib_1 = require("@pagepocket/lib");
9
8
  const lighterceptor_adapter_1 = require("@pagepocket/lighterceptor-adapter");
10
9
  const chalk_1 = __importDefault(require("chalk"));
11
- const fetch_html_1 = require("./stages/fetch-html");
12
10
  const prepare_output_1 = require("./stages/prepare-output");
13
11
  const write_snapshot_1 = require("./stages/write-snapshot");
14
12
  const with_spinner_1 = require("./utils/with-spinner");
@@ -17,78 +15,29 @@ class PagepocketCommand extends core_1.Command {
17
15
  const { args, flags } = await this.parse(PagepocketCommand);
18
16
  const targetUrl = args.url;
19
17
  const outputFlag = flags.output ? flags.output.trim() : undefined;
20
- const fetchTimeoutMs = Number(process.env.PAGEPOCKET_FETCH_TIMEOUT_MS || "60000");
21
- const headersOverride = (() => {
22
- const raw = process.env.PAGEPOCKET_FETCH_HEADERS;
23
- if (!raw) {
24
- return undefined;
25
- }
26
- try {
27
- const parsed = JSON.parse(raw);
28
- const headers = {};
29
- for (const [key, value] of Object.entries(parsed)) {
30
- if (value === undefined || value === null) {
31
- continue;
32
- }
33
- headers[key] = String(value);
34
- }
35
- return headers;
36
- }
37
- catch {
38
- throw new Error("Invalid PAGEPOCKET_FETCH_HEADERS JSON.");
39
- }
40
- })();
41
- const fetched = await (0, with_spinner_1.withSpinner)(async () => (0, fetch_html_1.fetchHtml)(targetUrl, fetchTimeoutMs, headersOverride), "Fetching the target HTML");
42
- const { outputPaths, snapshotData, snapshotHtml } = await (0, with_spinner_1.withSpinner)(async () => {
43
- const originalCwd = process.cwd();
44
- const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ title: fetched.title });
45
- const snapshotSeed = await interceptor.run(targetUrl);
46
- const outputPaths = await (0, prepare_output_1.prepareOutputPaths)(snapshotSeed.title, outputFlag);
47
- const shouldRestoreCwd = outputPaths.baseDir !== originalCwd;
48
- try {
49
- if (shouldRestoreCwd) {
50
- process.chdir(outputPaths.baseDir);
51
- }
52
- const pagepocket = new lib_1.PagePocket(fetched.html, snapshotSeed, {
53
- baseUrl: targetUrl,
54
- assetsDirName: outputPaths.assetsDirName,
55
- requestsPath: node_path_1.default.basename(outputPaths.outputRequestsPath)
56
- });
57
- const pageData = await pagepocket.put();
58
- const snapshotData = {
59
- ...snapshotSeed,
60
- title: pageData.title,
61
- resources: pagepocket.resources
62
- };
63
- return {
64
- outputPaths,
65
- snapshotData,
66
- snapshotHtml: pageData.content
67
- };
68
- }
69
- finally {
70
- if (shouldRestoreCwd) {
71
- try {
72
- process.chdir(originalCwd);
73
- }
74
- catch {
75
- // Ignore restore errors to preserve original failure.
76
- }
77
- }
78
- }
79
- }, "Downloading resources");
18
+ const headers = {
19
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
20
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
21
+ "accept-language": "en-US,en;q=0.9",
22
+ referer: targetUrl
23
+ };
24
+ const snapshot = await (0, with_spinner_1.withSpinner)(async () => {
25
+ const interceptor = new lighterceptor_adapter_1.LighterceptorAdapter({ headers });
26
+ const pagepocket = lib_1.PagePocket.fromURL(targetUrl);
27
+ return pagepocket.capture({
28
+ interceptor,
29
+ completion: { wait: async () => { } }
30
+ });
31
+ }, "Capturing snapshot");
32
+ const { outputDir } = await (0, with_spinner_1.withSpinner)(() => (0, prepare_output_1.prepareOutputDir)(snapshot.title ?? "snapshot", outputFlag), "Preparing output directory");
80
33
  await (0, with_spinner_1.withSpinner)(async () => {
81
34
  await (0, write_snapshot_1.writeSnapshotFiles)({
82
- outputRequestsPath: outputPaths.outputRequestsPath,
83
- outputHtmlPath: outputPaths.outputHtmlPath,
84
- snapshotData,
85
- snapshotHtml: snapshotHtml
35
+ outputDir,
36
+ snapshot
86
37
  });
87
38
  }, "Writing snapshot files");
88
39
  this.log(chalk_1.default.green("All done! Snapshot created."));
89
- this.log(`HTML saved to ${chalk_1.default.cyan(outputPaths.outputHtmlPath)}`);
90
- this.log(`Requests saved to ${chalk_1.default.cyan(outputPaths.outputRequestsPath)}`);
91
- this.log(`Resources saved to ${chalk_1.default.cyan(outputPaths.resourcesDir)}`);
40
+ this.log(`Snapshot saved to ${chalk_1.default.cyan(outputDir)}`);
92
41
  process.exit();
93
42
  }
94
43
  }
@@ -3,25 +3,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.prepareOutputPaths = void 0;
6
+ exports.prepareOutputDir = void 0;
7
7
  const promises_1 = __importDefault(require("node:fs/promises"));
8
8
  const node_path_1 = __importDefault(require("node:path"));
9
9
  const filename_1 = require("../lib/filename");
10
- const prepareOutputPaths = async (title, outputFlag) => {
10
+ const prepareOutputDir = async (title, outputFlag) => {
11
11
  const safeTitle = (0, filename_1.safeFilename)(title || "snapshot");
12
12
  const baseDir = outputFlag ? node_path_1.default.resolve(outputFlag) : process.cwd();
13
- const outputHtmlPath = node_path_1.default.join(baseDir, `${safeTitle}.html`);
14
- const outputRequestsPath = node_path_1.default.join(baseDir, `${safeTitle}.requests.json`);
15
- const assetsDirName = `${safeTitle}_files`;
16
- const resourcesDir = node_path_1.default.join(baseDir, assetsDirName);
17
- await promises_1.default.mkdir(resourcesDir, { recursive: true });
13
+ const outputDir = node_path_1.default.join(baseDir, safeTitle);
14
+ await promises_1.default.mkdir(outputDir, { recursive: true });
18
15
  return {
19
16
  safeTitle,
20
- baseDir,
21
- outputHtmlPath,
22
- outputRequestsPath,
23
- assetsDirName,
24
- resourcesDir
17
+ outputDir
25
18
  };
26
19
  };
27
- exports.prepareOutputPaths = prepareOutputPaths;
20
+ exports.prepareOutputDir = prepareOutputDir;
@@ -1,12 +1,7 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.writeSnapshotFiles = void 0;
7
- const promises_1 = __importDefault(require("node:fs/promises"));
8
4
  const writeSnapshotFiles = async (input) => {
9
- await promises_1.default.writeFile(input.outputRequestsPath, JSON.stringify(input.snapshotData, null, 2), "utf-8");
10
- await promises_1.default.writeFile(input.outputHtmlPath, input.snapshotHtml, "utf-8");
5
+ await input.snapshot.toDirectory(input.outputDir);
11
6
  };
12
7
  exports.writeSnapshotFiles = writeSnapshotFiles;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pagepocket/cli",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "description": "CLI for capturing offline snapshots of web pages.",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -15,12 +15,9 @@
15
15
  "dependencies": {
16
16
  "@oclif/core": "^4.0.9",
17
17
  "chalk": "^4.1.2",
18
- "cheerio": "^1.0.0-rc.12",
19
- "got": "^11.8.6",
20
18
  "ora": "^9.0.0",
21
- "@pagepocket/lighterceptor": "0.4.2",
22
- "@pagepocket/lib": "0.4.2",
23
- "@pagepocket/lighterceptor-adapter": "0.4.2"
19
+ "@pagepocket/lib": "0.5.0",
20
+ "@pagepocket/lighterceptor-adapter": "0.5.0"
24
21
  },
25
22
  "devDependencies": {
26
23
  "@types/node": "^20.11.30",