@pagepocket/lib 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  import type { BodySource, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkResponseEvent, ResourceType } from "@pagepocket/interceptor";
2
+ import type { Cheerio, CheerioAPI } from "cheerio";
2
3
  export type { BodySource, InterceptOptions, InterceptSession, InterceptTarget, InterceptorActions, InterceptorCapabilities, NavigateOptions, NetworkEvent, NetworkEventHandlers, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, ResourceType, TriggerAction } from "@pagepocket/interceptor";
4
+ import type { NetworkEvent } from "@pagepocket/interceptor";
3
5
  export interface PathResolver {
4
6
  resolve(input: {
5
7
  url: string;
@@ -47,20 +49,94 @@ export interface CompletionStrategy {
47
49
  }
48
50
  export interface PagePocketOptions {
49
51
  }
52
+ export type NetworkEventStream = AsyncIterable<NetworkEvent>;
50
53
  export interface CaptureOptions {
51
54
  interceptor: NetworkInterceptorAdapter;
52
55
  completion?: CompletionStrategy | CompletionStrategy[];
56
+ /**
57
+ * Network idle duration (ms) used to determine capture completion.
58
+ *
59
+ * If `completion` is not provided, PagePocket will wait until the network has
60
+ * been idle (no inflight requests) for this duration.
61
+ *
62
+ * Note: this is NOT a wall-clock timeout from capture start.
63
+ */
64
+ timeoutMs?: number;
65
+ /**
66
+ * Hard wall-clock limit (ms) for the overall capture session.
67
+ *
68
+ * When `completion` is not provided, PagePocket will stop after either:
69
+ * - network has been idle for `timeoutMs`, OR
70
+ * - `maxDurationMs` has elapsed.
71
+ */
72
+ maxDurationMs?: number;
53
73
  filter?: ResourceFilter;
54
74
  pathResolver?: PathResolver;
55
75
  contentStore?: ContentStore;
56
76
  rewriteEntry?: boolean;
57
77
  rewriteCSS?: boolean;
78
+ blacklist?: RegExp[];
79
+ /**
80
+ * Replace parts of the captured HTML (Document response body) during the HTML
81
+ * rewrite stage (Cheerio).
82
+ */
83
+ replaceElements?: ReplaceElementsConfig;
58
84
  limits?: {
59
85
  maxTotalBytes?: number;
60
86
  maxSingleResourceBytes?: number;
61
87
  maxResources?: number;
62
88
  };
63
89
  }
90
+ export type ReplaceElementsConfig = Array<ReplaceElementRule | ReplaceElementFn | ReplaceElementFnWithQuery>;
91
+ export type MatchQuery = string | {
92
+ selector?: string;
93
+ tagName?: string;
94
+ id?: string;
95
+ attrs?: Record<string, string | RegExp | true>;
96
+ };
97
+ export type ReplaceAction = {
98
+ type: "replaceWithHtml";
99
+ html: string;
100
+ } | {
101
+ type: "replaceWithElement";
102
+ tagName: string;
103
+ textContent?: string;
104
+ html?: string;
105
+ attrs?: Record<string, string | null>;
106
+ } | {
107
+ type: "renameTag";
108
+ to: string;
109
+ keepAttributes?: boolean;
110
+ keepChildren?: boolean;
111
+ } | {
112
+ type: "remove";
113
+ };
114
+ export interface ApplyOptions {
115
+ scope?: "document" | "allFrames";
116
+ limit?: number | "all";
117
+ onReplaced?: "stop" | "continue";
118
+ }
119
+ export interface ReplaceElementRule {
120
+ name?: string;
121
+ match: MatchQuery;
122
+ replace: ReplaceAction;
123
+ apply?: ApplyOptions;
124
+ }
125
+ export interface ReplaceElementContext {
126
+ $: CheerioAPI;
127
+ $el: Cheerio<any>;
128
+ url: string;
129
+ entryUrl: string;
130
+ ruleIndex: number;
131
+ matchIndex: number;
132
+ }
133
+ export type ReplaceElementFn = (ctx: ReplaceElementContext) => void | ReplaceAction | ReplaceAction[] | Promise<void | ReplaceAction | ReplaceAction[]>;
134
+ export interface ReplaceElementFnWithQuery {
135
+ name?: string;
136
+ query: string;
137
+ run: ReplaceElementFn;
138
+ apply?: ApplyOptions;
139
+ }
64
140
  export interface SnapshotFile {
65
141
  path: string;
66
142
  mimeType?: string;
@@ -84,19 +160,30 @@ export interface PageSnapshot {
84
160
  };
85
161
  content: ContentStoreHandle;
86
162
  toDirectory(outDir: string, options?: WriteFSOptions): Promise<WriteResult>;
87
- toZip(options?: ZipOptions): Promise<Uint8Array | Blob>;
163
+ toZip(options?: ZipOptions): Promise<ZipResult>;
88
164
  }
89
165
  export interface WriteFSOptions {
90
166
  clearCache?: boolean;
167
+ overwrite?: boolean;
168
+ suffix?: string;
91
169
  }
92
170
  export interface WriteResult {
93
171
  filesWritten: number;
94
172
  totalBytes: number;
173
+ outputDir?: string;
95
174
  }
96
175
  export interface ZipOptions {
97
176
  asBlob?: boolean;
98
177
  clearCache?: boolean;
178
+ overwrite?: boolean;
179
+ suffix?: string;
180
+ outputPath?: string;
181
+ }
182
+ export interface ZipWriteResult {
183
+ data: Uint8Array | Blob;
184
+ outputPath: string;
99
185
  }
186
+ export type ZipResult = Uint8Array | Blob | ZipWriteResult;
100
187
  export interface ApiRecord {
101
188
  url: string;
102
189
  method: string;
package/dist/writers.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- import type { PageSnapshot, WriteFSOptions, WriteResult, ZipOptions } from "./types";
1
+ import type { PageSnapshot, WriteFSOptions, WriteResult, ZipOptions, ZipResult } from "./types";
2
2
  export declare const writeToFS: (snapshot: PageSnapshot, outDir: string, options?: WriteFSOptions) => Promise<WriteResult>;
3
- export declare const toZip: (snapshot: PageSnapshot, options?: ZipOptions) => Promise<Uint8Array | Blob>;
3
+ export declare const toZip: (snapshot: PageSnapshot, options?: ZipOptions) => Promise<ZipResult>;
package/dist/writers.js CHANGED
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.toZip = exports.writeToFS = void 0;
4
4
  const uni_fs_1 = require("@pagepocket/uni-fs");
5
5
  const utils_1 = require("./utils");
6
+ const DEFAULT_SUFFIX_PATTERN = "_{num}";
6
7
  const normalizePath = (value) => value.replace(/\\/g, "/");
7
8
  const joinPath = (base, relative) => {
8
9
  const cleanBase = normalizePath(base).replace(/\/+$/, "");
@@ -24,6 +25,39 @@ const splitPathExtension = (value) => {
24
25
  }
25
26
  return { filename: clean, extension: "" };
26
27
  };
28
+ const trimTrailingSlash = (value) => {
29
+ const normalized = normalizePath(value);
30
+ if (normalized === "/") {
31
+ return normalized;
32
+ }
33
+ return normalized.replace(/\/+$/, "");
34
+ };
35
+ const buildSuffix = (pattern, index) => {
36
+ const template = pattern ?? DEFAULT_SUFFIX_PATTERN;
37
+ return template.includes("{num}") ? template.replace("{num}", String(index)) : `${template}${index}`;
38
+ };
39
+ const appendDirectorySuffix = (basePath, suffix) => {
40
+ return `${trimTrailingSlash(basePath)}${suffix}`;
41
+ };
42
+ const appendFileSuffix = (basePath, suffix) => {
43
+ const { filename, extension } = splitPathExtension(basePath);
44
+ if (!extension) {
45
+ return `${filename}${suffix}`;
46
+ }
47
+ return `${filename}${suffix}.${extension}`;
48
+ };
49
+ const resolveUniquePath = async (basePath, options) => {
50
+ if (options.overwrite) {
51
+ return basePath;
52
+ }
53
+ const applySuffix = options.kind === "directory" ? appendDirectorySuffix : appendFileSuffix;
54
+ for (let index = 0;; index += 1) {
55
+ const candidate = index === 0 ? basePath : applySuffix(basePath, buildSuffix(options.suffix, index));
56
+ if (!(await (0, uni_fs_1.existsPath)(candidate, ""))) {
57
+ return candidate;
58
+ }
59
+ }
60
+ };
27
61
  const streamToUint8Array = async (stream) => {
28
62
  const reader = stream.getReader();
29
63
  const chunks = [];
@@ -48,9 +82,14 @@ const streamToUint8Array = async (stream) => {
48
82
  const writeToFS = async (snapshot, outDir, options) => {
49
83
  let filesWritten = 0;
50
84
  let totalBytes = 0;
85
+ const outputDir = await resolveUniquePath(outDir, {
86
+ overwrite: options?.overwrite ?? false,
87
+ suffix: options?.suffix,
88
+ kind: "directory"
89
+ });
51
90
  for (const file of snapshot.files) {
52
91
  const relative = (0, utils_1.stripLeadingSlash)(file.path);
53
- const outputPath = joinPath(outDir, relative);
92
+ const outputPath = joinPath(outputDir, relative);
54
93
  const { filename, extension } = splitPathExtension(outputPath);
55
94
  const stream = await snapshot.content.open(file.source);
56
95
  const data = await streamToUint8Array(stream);
@@ -61,7 +100,7 @@ const writeToFS = async (snapshot, outDir, options) => {
61
100
  if (options?.clearCache ?? true) {
62
101
  await snapshot.content.dispose?.();
63
102
  }
64
- return { filesWritten, totalBytes };
103
+ return { filesWritten, totalBytes, outputDir };
65
104
  };
66
105
  exports.writeToFS = writeToFS;
67
106
  const crc32Table = (() => {
@@ -164,12 +203,25 @@ const toZip = async (snapshot, options) => {
164
203
  writeUint16(0)
165
204
  ]);
166
205
  const zipBytes = concatBytes([...localChunks, centralDirectory, endRecord]);
167
- const output = options?.asBlob && typeof Blob !== "undefined"
206
+ const outputData = options?.asBlob && typeof Blob !== "undefined"
168
207
  ? new Blob([zipBytes], { type: "application/zip" })
169
208
  : zipBytes;
209
+ if (options?.outputPath) {
210
+ const outputPath = await resolveUniquePath(options.outputPath, {
211
+ overwrite: options?.overwrite ?? false,
212
+ suffix: options?.suffix,
213
+ kind: "file"
214
+ });
215
+ const { filename, extension } = splitPathExtension(outputPath);
216
+ await (0, uni_fs_1.write)(filename, extension, outputData);
217
+ if (options?.clearCache ?? true) {
218
+ await snapshot.content.dispose?.();
219
+ }
220
+ return { data: outputData, outputPath };
221
+ }
170
222
  if (options?.clearCache ?? true) {
171
223
  await snapshot.content.dispose?.();
172
224
  }
173
- return output;
225
+ return outputData;
174
226
  };
175
227
  exports.toZip = toZip;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pagepocket/lib",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "description": "Library for rewriting HTML snapshots and inlining local resources.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -12,8 +12,8 @@
12
12
  "license": "ISC",
13
13
  "dependencies": {
14
14
  "cheerio": "^1.0.0-rc.12",
15
- "@pagepocket/uni-fs": "0.6.2",
16
- "@pagepocket/interceptor": "0.6.2"
15
+ "@pagepocket/interceptor": "0.7.0",
16
+ "@pagepocket/uni-fs": "0.7.0"
17
17
  },
18
18
  "devDependencies": {
19
19
  "@playwright/test": "^1.50.1",