@pagepocket/lib 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -3
- package/dist/builtin-blacklist.d.ts +3 -0
- package/dist/builtin-blacklist.js +6 -0
- package/dist/debug.d.ts +2 -0
- package/dist/debug.js +18 -0
- package/dist/hackers/index.js +6 -0
- package/dist/hackers/replay-block-text-fragment.d.ts +2 -0
- package/dist/hackers/replay-block-text-fragment.js +71 -0
- package/dist/hackers/replay-css-proxy.d.ts +2 -0
- package/dist/hackers/replay-css-proxy.js +206 -0
- package/dist/hackers/replay-dom-rewrite.js +103 -32
- package/dist/hackers/replay-history-path.d.ts +2 -0
- package/dist/hackers/replay-history-path.js +25 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +18 -1
- package/dist/inflight-tracker.d.ts +19 -0
- package/dist/inflight-tracker.js +48 -0
- package/dist/pagepocket.d.ts +3 -1
- package/dist/pagepocket.js +150 -35
- package/dist/path-resolver.js +14 -4
- package/dist/replace-elements.d.ts +9 -0
- package/dist/replace-elements.js +258 -0
- package/dist/replay-script.js +308 -6
- package/dist/resource-proxy.d.ts +34 -0
- package/dist/resource-proxy.js +284 -0
- package/dist/rewrite-links.d.ts +8 -0
- package/dist/rewrite-links.js +122 -12
- package/dist/snapshot-builder.d.ts +2 -1
- package/dist/snapshot-builder.js +75 -2
- package/dist/types.d.ts +88 -1
- package/dist/writers.d.ts +2 -2
- package/dist/writers.js +56 -4
- package/package.json +3 -3
package/dist/types.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { BodySource, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkResponseEvent, ResourceType } from "@pagepocket/interceptor";
|
|
2
|
+
import type { Cheerio, CheerioAPI } from "cheerio";
|
|
2
3
|
export type { BodySource, InterceptOptions, InterceptSession, InterceptTarget, InterceptorActions, InterceptorCapabilities, NavigateOptions, NetworkEvent, NetworkEventHandlers, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, ResourceType, TriggerAction } from "@pagepocket/interceptor";
|
|
4
|
+
import type { NetworkEvent } from "@pagepocket/interceptor";
|
|
3
5
|
export interface PathResolver {
|
|
4
6
|
resolve(input: {
|
|
5
7
|
url: string;
|
|
@@ -47,20 +49,94 @@ export interface CompletionStrategy {
|
|
|
47
49
|
}
|
|
48
50
|
export interface PagePocketOptions {
|
|
49
51
|
}
|
|
52
|
+
export type NetworkEventStream = AsyncIterable<NetworkEvent>;
|
|
50
53
|
export interface CaptureOptions {
|
|
51
54
|
interceptor: NetworkInterceptorAdapter;
|
|
52
55
|
completion?: CompletionStrategy | CompletionStrategy[];
|
|
56
|
+
/**
|
|
57
|
+
* Network idle duration (ms) used to determine capture completion.
|
|
58
|
+
*
|
|
59
|
+
* If `completion` is not provided, PagePocket will wait until the network has
|
|
60
|
+
* been idle (no inflight requests) for this duration.
|
|
61
|
+
*
|
|
62
|
+
* Note: this is NOT a wall-clock timeout from capture start.
|
|
63
|
+
*/
|
|
64
|
+
timeoutMs?: number;
|
|
65
|
+
/**
|
|
66
|
+
* Hard wall-clock limit (ms) for the overall capture session.
|
|
67
|
+
*
|
|
68
|
+
* When `completion` is not provided, PagePocket will stop after either:
|
|
69
|
+
* - network has been idle for `timeoutMs`, OR
|
|
70
|
+
* - `maxDurationMs` has elapsed.
|
|
71
|
+
*/
|
|
72
|
+
maxDurationMs?: number;
|
|
53
73
|
filter?: ResourceFilter;
|
|
54
74
|
pathResolver?: PathResolver;
|
|
55
75
|
contentStore?: ContentStore;
|
|
56
76
|
rewriteEntry?: boolean;
|
|
57
77
|
rewriteCSS?: boolean;
|
|
78
|
+
blacklist?: RegExp[];
|
|
79
|
+
/**
|
|
80
|
+
* Replace parts of the captured HTML (Document response body) during the HTML
|
|
81
|
+
* rewrite stage (Cheerio).
|
|
82
|
+
*/
|
|
83
|
+
replaceElements?: ReplaceElementsConfig;
|
|
58
84
|
limits?: {
|
|
59
85
|
maxTotalBytes?: number;
|
|
60
86
|
maxSingleResourceBytes?: number;
|
|
61
87
|
maxResources?: number;
|
|
62
88
|
};
|
|
63
89
|
}
|
|
90
|
+
export type ReplaceElementsConfig = Array<ReplaceElementRule | ReplaceElementFn | ReplaceElementFnWithQuery>;
|
|
91
|
+
export type MatchQuery = string | {
|
|
92
|
+
selector?: string;
|
|
93
|
+
tagName?: string;
|
|
94
|
+
id?: string;
|
|
95
|
+
attrs?: Record<string, string | RegExp | true>;
|
|
96
|
+
};
|
|
97
|
+
export type ReplaceAction = {
|
|
98
|
+
type: "replaceWithHtml";
|
|
99
|
+
html: string;
|
|
100
|
+
} | {
|
|
101
|
+
type: "replaceWithElement";
|
|
102
|
+
tagName: string;
|
|
103
|
+
textContent?: string;
|
|
104
|
+
html?: string;
|
|
105
|
+
attrs?: Record<string, string | null>;
|
|
106
|
+
} | {
|
|
107
|
+
type: "renameTag";
|
|
108
|
+
to: string;
|
|
109
|
+
keepAttributes?: boolean;
|
|
110
|
+
keepChildren?: boolean;
|
|
111
|
+
} | {
|
|
112
|
+
type: "remove";
|
|
113
|
+
};
|
|
114
|
+
export interface ApplyOptions {
|
|
115
|
+
scope?: "document" | "allFrames";
|
|
116
|
+
limit?: number | "all";
|
|
117
|
+
onReplaced?: "stop" | "continue";
|
|
118
|
+
}
|
|
119
|
+
export interface ReplaceElementRule {
|
|
120
|
+
name?: string;
|
|
121
|
+
match: MatchQuery;
|
|
122
|
+
replace: ReplaceAction;
|
|
123
|
+
apply?: ApplyOptions;
|
|
124
|
+
}
|
|
125
|
+
export interface ReplaceElementContext {
|
|
126
|
+
$: CheerioAPI;
|
|
127
|
+
$el: Cheerio<any>;
|
|
128
|
+
url: string;
|
|
129
|
+
entryUrl: string;
|
|
130
|
+
ruleIndex: number;
|
|
131
|
+
matchIndex: number;
|
|
132
|
+
}
|
|
133
|
+
export type ReplaceElementFn = (ctx: ReplaceElementContext) => void | ReplaceAction | ReplaceAction[] | Promise<void | ReplaceAction | ReplaceAction[]>;
|
|
134
|
+
export interface ReplaceElementFnWithQuery {
|
|
135
|
+
name?: string;
|
|
136
|
+
query: string;
|
|
137
|
+
run: ReplaceElementFn;
|
|
138
|
+
apply?: ApplyOptions;
|
|
139
|
+
}
|
|
64
140
|
export interface SnapshotFile {
|
|
65
141
|
path: string;
|
|
66
142
|
mimeType?: string;
|
|
@@ -84,19 +160,30 @@ export interface PageSnapshot {
|
|
|
84
160
|
};
|
|
85
161
|
content: ContentStoreHandle;
|
|
86
162
|
toDirectory(outDir: string, options?: WriteFSOptions): Promise<WriteResult>;
|
|
87
|
-
toZip(options?: ZipOptions): Promise<
|
|
163
|
+
toZip(options?: ZipOptions): Promise<ZipResult>;
|
|
88
164
|
}
|
|
89
165
|
export interface WriteFSOptions {
|
|
90
166
|
clearCache?: boolean;
|
|
167
|
+
overwrite?: boolean;
|
|
168
|
+
suffix?: string;
|
|
91
169
|
}
|
|
92
170
|
export interface WriteResult {
|
|
93
171
|
filesWritten: number;
|
|
94
172
|
totalBytes: number;
|
|
173
|
+
outputDir?: string;
|
|
95
174
|
}
|
|
96
175
|
export interface ZipOptions {
|
|
97
176
|
asBlob?: boolean;
|
|
98
177
|
clearCache?: boolean;
|
|
178
|
+
overwrite?: boolean;
|
|
179
|
+
suffix?: string;
|
|
180
|
+
outputPath?: string;
|
|
181
|
+
}
|
|
182
|
+
export interface ZipWriteResult {
|
|
183
|
+
data: Uint8Array | Blob;
|
|
184
|
+
outputPath: string;
|
|
99
185
|
}
|
|
186
|
+
export type ZipResult = Uint8Array | Blob | ZipWriteResult;
|
|
100
187
|
export interface ApiRecord {
|
|
101
188
|
url: string;
|
|
102
189
|
method: string;
|
package/dist/writers.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { PageSnapshot, WriteFSOptions, WriteResult, ZipOptions } from "./types";
|
|
1
|
+
import type { PageSnapshot, WriteFSOptions, WriteResult, ZipOptions, ZipResult } from "./types";
|
|
2
2
|
export declare const writeToFS: (snapshot: PageSnapshot, outDir: string, options?: WriteFSOptions) => Promise<WriteResult>;
|
|
3
|
-
export declare const toZip: (snapshot: PageSnapshot, options?: ZipOptions) => Promise<
|
|
3
|
+
export declare const toZip: (snapshot: PageSnapshot, options?: ZipOptions) => Promise<ZipResult>;
|
package/dist/writers.js
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.toZip = exports.writeToFS = void 0;
|
|
4
4
|
const uni_fs_1 = require("@pagepocket/uni-fs");
|
|
5
5
|
const utils_1 = require("./utils");
|
|
6
|
+
const DEFAULT_SUFFIX_PATTERN = "_{num}";
|
|
6
7
|
const normalizePath = (value) => value.replace(/\\/g, "/");
|
|
7
8
|
const joinPath = (base, relative) => {
|
|
8
9
|
const cleanBase = normalizePath(base).replace(/\/+$/, "");
|
|
@@ -24,6 +25,39 @@ const splitPathExtension = (value) => {
|
|
|
24
25
|
}
|
|
25
26
|
return { filename: clean, extension: "" };
|
|
26
27
|
};
|
|
28
|
+
const trimTrailingSlash = (value) => {
|
|
29
|
+
const normalized = normalizePath(value);
|
|
30
|
+
if (normalized === "/") {
|
|
31
|
+
return normalized;
|
|
32
|
+
}
|
|
33
|
+
return normalized.replace(/\/+$/, "");
|
|
34
|
+
};
|
|
35
|
+
const buildSuffix = (pattern, index) => {
|
|
36
|
+
const template = pattern ?? DEFAULT_SUFFIX_PATTERN;
|
|
37
|
+
return template.includes("{num}") ? template.replace("{num}", String(index)) : `${template}${index}`;
|
|
38
|
+
};
|
|
39
|
+
const appendDirectorySuffix = (basePath, suffix) => {
|
|
40
|
+
return `${trimTrailingSlash(basePath)}${suffix}`;
|
|
41
|
+
};
|
|
42
|
+
const appendFileSuffix = (basePath, suffix) => {
|
|
43
|
+
const { filename, extension } = splitPathExtension(basePath);
|
|
44
|
+
if (!extension) {
|
|
45
|
+
return `${filename}${suffix}`;
|
|
46
|
+
}
|
|
47
|
+
return `${filename}${suffix}.${extension}`;
|
|
48
|
+
};
|
|
49
|
+
const resolveUniquePath = async (basePath, options) => {
|
|
50
|
+
if (options.overwrite) {
|
|
51
|
+
return basePath;
|
|
52
|
+
}
|
|
53
|
+
const applySuffix = options.kind === "directory" ? appendDirectorySuffix : appendFileSuffix;
|
|
54
|
+
for (let index = 0;; index += 1) {
|
|
55
|
+
const candidate = index === 0 ? basePath : applySuffix(basePath, buildSuffix(options.suffix, index));
|
|
56
|
+
if (!(await (0, uni_fs_1.existsPath)(candidate, ""))) {
|
|
57
|
+
return candidate;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
};
|
|
27
61
|
const streamToUint8Array = async (stream) => {
|
|
28
62
|
const reader = stream.getReader();
|
|
29
63
|
const chunks = [];
|
|
@@ -48,9 +82,14 @@ const streamToUint8Array = async (stream) => {
|
|
|
48
82
|
const writeToFS = async (snapshot, outDir, options) => {
|
|
49
83
|
let filesWritten = 0;
|
|
50
84
|
let totalBytes = 0;
|
|
85
|
+
const outputDir = await resolveUniquePath(outDir, {
|
|
86
|
+
overwrite: options?.overwrite ?? false,
|
|
87
|
+
suffix: options?.suffix,
|
|
88
|
+
kind: "directory"
|
|
89
|
+
});
|
|
51
90
|
for (const file of snapshot.files) {
|
|
52
91
|
const relative = (0, utils_1.stripLeadingSlash)(file.path);
|
|
53
|
-
const outputPath = joinPath(
|
|
92
|
+
const outputPath = joinPath(outputDir, relative);
|
|
54
93
|
const { filename, extension } = splitPathExtension(outputPath);
|
|
55
94
|
const stream = await snapshot.content.open(file.source);
|
|
56
95
|
const data = await streamToUint8Array(stream);
|
|
@@ -61,7 +100,7 @@ const writeToFS = async (snapshot, outDir, options) => {
|
|
|
61
100
|
if (options?.clearCache ?? true) {
|
|
62
101
|
await snapshot.content.dispose?.();
|
|
63
102
|
}
|
|
64
|
-
return { filesWritten, totalBytes };
|
|
103
|
+
return { filesWritten, totalBytes, outputDir };
|
|
65
104
|
};
|
|
66
105
|
exports.writeToFS = writeToFS;
|
|
67
106
|
const crc32Table = (() => {
|
|
@@ -164,12 +203,25 @@ const toZip = async (snapshot, options) => {
|
|
|
164
203
|
writeUint16(0)
|
|
165
204
|
]);
|
|
166
205
|
const zipBytes = concatBytes([...localChunks, centralDirectory, endRecord]);
|
|
167
|
-
const
|
|
206
|
+
const outputData = options?.asBlob && typeof Blob !== "undefined"
|
|
168
207
|
? new Blob([zipBytes], { type: "application/zip" })
|
|
169
208
|
: zipBytes;
|
|
209
|
+
if (options?.outputPath) {
|
|
210
|
+
const outputPath = await resolveUniquePath(options.outputPath, {
|
|
211
|
+
overwrite: options?.overwrite ?? false,
|
|
212
|
+
suffix: options?.suffix,
|
|
213
|
+
kind: "file"
|
|
214
|
+
});
|
|
215
|
+
const { filename, extension } = splitPathExtension(outputPath);
|
|
216
|
+
await (0, uni_fs_1.write)(filename, extension, outputData);
|
|
217
|
+
if (options?.clearCache ?? true) {
|
|
218
|
+
await snapshot.content.dispose?.();
|
|
219
|
+
}
|
|
220
|
+
return { data: outputData, outputPath };
|
|
221
|
+
}
|
|
170
222
|
if (options?.clearCache ?? true) {
|
|
171
223
|
await snapshot.content.dispose?.();
|
|
172
224
|
}
|
|
173
|
-
return
|
|
225
|
+
return outputData;
|
|
174
226
|
};
|
|
175
227
|
exports.toZip = toZip;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/lib",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Library for rewriting HTML snapshots and inlining local resources.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
"license": "ISC",
|
|
13
13
|
"dependencies": {
|
|
14
14
|
"cheerio": "^1.0.0-rc.12",
|
|
15
|
-
"@pagepocket/
|
|
16
|
-
"@pagepocket/
|
|
15
|
+
"@pagepocket/interceptor": "0.7.0",
|
|
16
|
+
"@pagepocket/uni-fs": "0.7.0"
|
|
17
17
|
},
|
|
18
18
|
"devDependencies": {
|
|
19
19
|
"@playwright/test": "^1.50.1",
|