@pagepocket/lib 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -3
- package/dist/builtin-blacklist.d.ts +3 -0
- package/dist/builtin-blacklist.js +6 -0
- package/dist/debug.d.ts +2 -0
- package/dist/debug.js +18 -0
- package/dist/hackers/index.js +6 -0
- package/dist/hackers/replay-block-text-fragment.d.ts +2 -0
- package/dist/hackers/replay-block-text-fragment.js +71 -0
- package/dist/hackers/replay-css-proxy.d.ts +2 -0
- package/dist/hackers/replay-css-proxy.js +206 -0
- package/dist/hackers/replay-dom-rewrite.js +103 -32
- package/dist/hackers/replay-history-path.d.ts +2 -0
- package/dist/hackers/replay-history-path.js +25 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +18 -1
- package/dist/inflight-tracker.d.ts +19 -0
- package/dist/inflight-tracker.js +48 -0
- package/dist/pagepocket.d.ts +3 -1
- package/dist/pagepocket.js +150 -35
- package/dist/path-resolver.js +14 -4
- package/dist/replace-elements.d.ts +9 -0
- package/dist/replace-elements.js +258 -0
- package/dist/replay-script.js +308 -6
- package/dist/resource-proxy.d.ts +34 -0
- package/dist/resource-proxy.js +284 -0
- package/dist/rewrite-links.d.ts +8 -0
- package/dist/rewrite-links.js +122 -12
- package/dist/snapshot-builder.d.ts +2 -1
- package/dist/snapshot-builder.js +75 -2
- package/dist/types.d.ts +88 -1
- package/dist/writers.d.ts +2 -2
- package/dist/writers.js +56 -4
- package/package.json +3 -3
|
@@ -5,7 +5,9 @@ exports.replayDomRewriter = {
|
|
|
5
5
|
id: "replay-dom-rewriter",
|
|
6
6
|
stage: "replay",
|
|
7
7
|
build: () => `
|
|
8
|
-
//
|
|
8
|
+
// NOTE: When a resource is not found in the snapshot, we keep the original URL.
|
|
9
|
+
// (User choice) We still preserve a limited placeholder behavior for some tags,
|
|
10
|
+
// but only when we have a recorded response body to convert to a data URL.
|
|
9
11
|
const transparentGif = "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==";
|
|
10
12
|
const emptyScript = "data:text/javascript,/*pagepocket-missing*/";
|
|
11
13
|
const emptyStyle = "data:text/css,/*pagepocket-missing*/";
|
|
@@ -31,9 +33,33 @@ exports.replayDomRewriter = {
|
|
|
31
33
|
}
|
|
32
34
|
};
|
|
33
35
|
|
|
36
|
+
const rewritten = new WeakMap();
|
|
37
|
+
|
|
34
38
|
// Rewrite srcset values to local files only (avoid data: URLs in srcset).
|
|
35
39
|
const rewriteSrcset = (value) => {
|
|
36
40
|
if (!value) return value;
|
|
41
|
+
|
|
42
|
+
// Substack-style image/fetch URLs include commas inside the URL token
|
|
43
|
+
// (",w_40,h_40,..."). This makes the srcset invalid and browsers will
|
|
44
|
+
// parse it into garbage candidate URLs. Prefer dropping srcset and relying
|
|
45
|
+
// on the rewritten img[src].
|
|
46
|
+
try {
|
|
47
|
+
const trimmed = String(value || "").trim();
|
|
48
|
+
const hasFetchTransform = trimmed.includes("/image/fetch/");
|
|
49
|
+
const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
|
|
50
|
+
const hasCommaTokens =
|
|
51
|
+
trimmed.includes(",w_") ||
|
|
52
|
+
trimmed.includes(", w_") ||
|
|
53
|
+
trimmed.includes(",h_") ||
|
|
54
|
+
trimmed.includes(", h_") ||
|
|
55
|
+
trimmed.includes(",c_") ||
|
|
56
|
+
trimmed.includes(", c_");
|
|
57
|
+
|
|
58
|
+
if (hasFetchTransform && hasEncodedUrlTail && hasCommaTokens) {
|
|
59
|
+
return "";
|
|
60
|
+
}
|
|
61
|
+
} catch {}
|
|
62
|
+
|
|
37
63
|
return value.split(",").map((part) => {
|
|
38
64
|
const trimmed = part.trim();
|
|
39
65
|
if (!trimmed) return trimmed;
|
|
@@ -46,7 +72,7 @@ exports.replayDomRewriter = {
|
|
|
46
72
|
return descriptor ? localPath + " " + descriptor : localPath;
|
|
47
73
|
}
|
|
48
74
|
return trimmed;
|
|
49
|
-
}).join(",
|
|
75
|
+
}).join(",");
|
|
50
76
|
};
|
|
51
77
|
|
|
52
78
|
// Rewrite element attributes to local files or data URLs.
|
|
@@ -56,9 +82,21 @@ exports.replayDomRewriter = {
|
|
|
56
82
|
onReady(() => rewriteElement(element));
|
|
57
83
|
return;
|
|
58
84
|
}
|
|
85
|
+
const prev = rewritten.get(element);
|
|
86
|
+
const currentSrc = element.getAttribute("src");
|
|
87
|
+
const currentHref = element.getAttribute("href");
|
|
88
|
+
const currentSrcset = element.getAttribute("srcset");
|
|
89
|
+
if (
|
|
90
|
+
prev &&
|
|
91
|
+
prev.src === currentSrc &&
|
|
92
|
+
prev.href === currentHref &&
|
|
93
|
+
prev.srcset === currentSrcset
|
|
94
|
+
) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
59
97
|
const tag = (element.tagName || "").toLowerCase();
|
|
60
|
-
if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe") {
|
|
61
|
-
const src =
|
|
98
|
+
if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe" || tag === "object" || tag === "embed") {
|
|
99
|
+
const src = currentSrc;
|
|
62
100
|
if (src && !isLocalResource(src) && !src.startsWith("data:") && !src.startsWith("blob:")) {
|
|
63
101
|
const localPath = findLocalPath(src);
|
|
64
102
|
if (localPath) {
|
|
@@ -66,13 +104,18 @@ exports.replayDomRewriter = {
|
|
|
66
104
|
return;
|
|
67
105
|
}
|
|
68
106
|
const record = findByUrl(src);
|
|
69
|
-
|
|
70
|
-
|
|
107
|
+
if (record) {
|
|
108
|
+
element.setAttribute("src", toDataUrl(record));
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Keep original URL when we have no snapshot match.
|
|
113
|
+
return;
|
|
71
114
|
}
|
|
72
115
|
}
|
|
73
116
|
|
|
74
117
|
if (tag === "link") {
|
|
75
|
-
const href =
|
|
118
|
+
const href = currentHref;
|
|
76
119
|
const rel = (element.getAttribute("rel") || "").toLowerCase();
|
|
77
120
|
if (href && !isLocalResource(href) && !href.startsWith("data:") && !href.startsWith("blob:")) {
|
|
78
121
|
const localPath = findLocalPath(href);
|
|
@@ -81,15 +124,25 @@ exports.replayDomRewriter = {
|
|
|
81
124
|
return;
|
|
82
125
|
}
|
|
83
126
|
const record = findByUrl(href);
|
|
84
|
-
|
|
85
|
-
|
|
127
|
+
if (record) {
|
|
128
|
+
element.setAttribute("href", toDataUrl(record, "text/css"));
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return;
|
|
86
133
|
}
|
|
87
134
|
}
|
|
88
135
|
|
|
89
|
-
const srcset =
|
|
136
|
+
const srcset = currentSrcset;
|
|
90
137
|
if (srcset) {
|
|
91
138
|
element.setAttribute("srcset", rewriteSrcset(srcset));
|
|
92
139
|
}
|
|
140
|
+
|
|
141
|
+
rewritten.set(element, {
|
|
142
|
+
src: element.getAttribute("src"),
|
|
143
|
+
href: element.getAttribute("href"),
|
|
144
|
+
srcset: element.getAttribute("srcset")
|
|
145
|
+
});
|
|
93
146
|
};
|
|
94
147
|
|
|
95
148
|
// Intercept DOM attribute writes to keep resources local.
|
|
@@ -124,14 +177,9 @@ exports.replayDomRewriter = {
|
|
|
124
177
|
const dataUrl = toDataUrl(record, fallbackType);
|
|
125
178
|
return originalSetAttribute.call(this, name, dataUrl);
|
|
126
179
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
}
|
|
131
|
-
if (attr === "href") {
|
|
132
|
-
const fallback = relLower === "stylesheet" ? emptyStyle : emptyStyle;
|
|
133
|
-
return originalSetAttribute.call(this, name, fallback);
|
|
134
|
-
}
|
|
180
|
+
|
|
181
|
+
// Keep original URL on miss.
|
|
182
|
+
return originalSetAttribute.call(this, name, value);
|
|
135
183
|
}
|
|
136
184
|
return originalSetAttribute.call(this, name, value);
|
|
137
185
|
};
|
|
@@ -165,8 +213,12 @@ exports.replayDomRewriter = {
|
|
|
165
213
|
return;
|
|
166
214
|
}
|
|
167
215
|
const record = findByUrl(rawValue);
|
|
168
|
-
|
|
169
|
-
|
|
216
|
+
if (record) {
|
|
217
|
+
setter.call(this, toDataUrl(record));
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
setter.call(this, rawValue);
|
|
170
222
|
});
|
|
171
223
|
return;
|
|
172
224
|
}
|
|
@@ -180,8 +232,12 @@ exports.replayDomRewriter = {
|
|
|
180
232
|
return;
|
|
181
233
|
}
|
|
182
234
|
const record = findByUrl(rawValue);
|
|
183
|
-
|
|
184
|
-
|
|
235
|
+
if (record) {
|
|
236
|
+
setter.call(this, toDataUrl(record));
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
setter.call(this, rawValue);
|
|
185
241
|
});
|
|
186
242
|
|
|
187
243
|
patchProperty(HTMLScriptElement.prototype, "src", function(value, setter) {
|
|
@@ -198,8 +254,12 @@ exports.replayDomRewriter = {
|
|
|
198
254
|
return;
|
|
199
255
|
}
|
|
200
256
|
const record = findByUrl(rawValue);
|
|
201
|
-
|
|
202
|
-
|
|
257
|
+
if (record) {
|
|
258
|
+
setter.call(this, toDataUrl(record));
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
setter.call(this, rawValue);
|
|
203
263
|
});
|
|
204
264
|
return;
|
|
205
265
|
}
|
|
@@ -213,8 +273,13 @@ exports.replayDomRewriter = {
|
|
|
213
273
|
return;
|
|
214
274
|
}
|
|
215
275
|
const record = findByUrl(rawValue);
|
|
216
|
-
|
|
217
|
-
|
|
276
|
+
if (record) {
|
|
277
|
+
setter.call(this, toDataUrl(record));
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Keep original URL on miss.
|
|
282
|
+
setter.call(this, rawValue);
|
|
218
283
|
});
|
|
219
284
|
|
|
220
285
|
patchProperty(HTMLLinkElement.prototype, "href", function(value, setter) {
|
|
@@ -233,8 +298,12 @@ exports.replayDomRewriter = {
|
|
|
233
298
|
return;
|
|
234
299
|
}
|
|
235
300
|
const record = findByUrl(rawValue);
|
|
236
|
-
|
|
237
|
-
|
|
301
|
+
if (record) {
|
|
302
|
+
setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
setter.call(this, rawValue);
|
|
238
307
|
});
|
|
239
308
|
return;
|
|
240
309
|
}
|
|
@@ -248,8 +317,12 @@ exports.replayDomRewriter = {
|
|
|
248
317
|
return;
|
|
249
318
|
}
|
|
250
319
|
const record = findByUrl(rawValue);
|
|
251
|
-
|
|
252
|
-
|
|
320
|
+
if (record) {
|
|
321
|
+
setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
setter.call(this, rawValue);
|
|
253
326
|
});
|
|
254
327
|
|
|
255
328
|
patchProperty(HTMLImageElement.prototype, "srcset", function(value, setter) {
|
|
@@ -275,8 +348,6 @@ exports.replayDomRewriter = {
|
|
|
275
348
|
mutation.addedNodes.forEach((node) => {
|
|
276
349
|
if (node && node.nodeType === 1) {
|
|
277
350
|
rewriteElement(node);
|
|
278
|
-
const descendants = node.querySelectorAll ? node.querySelectorAll("img,source,video,audio,script,link,iframe") : [];
|
|
279
|
-
descendants.forEach((el) => rewriteElement(el));
|
|
280
351
|
}
|
|
281
352
|
});
|
|
282
353
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.replayHistoryPath = void 0;
|
|
4
|
+
exports.replayHistoryPath = {
|
|
5
|
+
id: "replay-history-path",
|
|
6
|
+
stage: "replay",
|
|
7
|
+
build: () => `
|
|
8
|
+
// Ensure history/location reflects the original captured URL path.
|
|
9
|
+
//
|
|
10
|
+
// When a snapshot is served from a static server root (e.g. http://localhost:8080/index.html),
|
|
11
|
+
// SPA routers that read location.pathname will see "/".
|
|
12
|
+
// For a page captured from https://foo.com/bar/foo, the correct route should be "/bar/foo".
|
|
13
|
+
//
|
|
14
|
+
// We patch history early and replace the current URL without causing navigation.
|
|
15
|
+
try {
|
|
16
|
+
const parsed = new URL(baseUrl);
|
|
17
|
+
const desiredPath = parsed.pathname + (parsed.search || "") + (parsed.hash || "");
|
|
18
|
+
const currentPath = window.location.pathname + window.location.search + window.location.hash;
|
|
19
|
+
|
|
20
|
+
if (desiredPath && desiredPath !== currentPath) {
|
|
21
|
+
history.replaceState(history.state, "", desiredPath);
|
|
22
|
+
}
|
|
23
|
+
} catch {}
|
|
24
|
+
`
|
|
25
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export { PagePocket } from "./pagepocket";
|
|
2
|
-
export type { ApiRecord, ApiSnapshot, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipOptions } from "./types";
|
|
2
|
+
export type { ApiRecord, ApiSnapshot, ApplyOptions, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, MatchQuery, NetworkEventStream, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ReplaceAction, ReplaceElementContext, ReplaceElementFn, ReplaceElementFnWithQuery, ReplaceElementRule, ReplaceElementsConfig, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipWriteResult, ZipResult, ZipOptions } from "./types";
|
|
3
3
|
export { HybridContentStore } from "./content-store";
|
|
4
4
|
export { createDefaultPathResolver, withPrefixPathResolver } from "./path-resolver";
|
|
5
5
|
export { createDefaultResourceFilter } from "./resource-filter";
|
|
6
|
+
export * from "./builtin-blacklist";
|
|
6
7
|
export { networkIdle, timeout } from "./completion";
|
|
7
8
|
export { buildReplayScript } from "./replay-script";
|
|
8
9
|
export { buildPreloadScript } from "./preload";
|
|
9
10
|
export { rewriteEntryHtml, rewriteJsText } from "./rewrite-links";
|
|
10
11
|
export { rewriteCssText } from "./css-rewrite";
|
|
11
12
|
export { writeToFS, toZip } from "./writers";
|
|
13
|
+
export { applyReplaceElements } from "./replace-elements";
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
2
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
|
|
17
|
+
exports.applyReplaceElements = exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
|
|
4
18
|
var pagepocket_1 = require("./pagepocket");
|
|
5
19
|
Object.defineProperty(exports, "PagePocket", { enumerable: true, get: function () { return pagepocket_1.PagePocket; } });
|
|
6
20
|
var content_store_1 = require("./content-store");
|
|
@@ -10,6 +24,7 @@ Object.defineProperty(exports, "createDefaultPathResolver", { enumerable: true,
|
|
|
10
24
|
Object.defineProperty(exports, "withPrefixPathResolver", { enumerable: true, get: function () { return path_resolver_1.withPrefixPathResolver; } });
|
|
11
25
|
var resource_filter_1 = require("./resource-filter");
|
|
12
26
|
Object.defineProperty(exports, "createDefaultResourceFilter", { enumerable: true, get: function () { return resource_filter_1.createDefaultResourceFilter; } });
|
|
27
|
+
__exportStar(require("./builtin-blacklist"), exports);
|
|
13
28
|
var completion_1 = require("./completion");
|
|
14
29
|
Object.defineProperty(exports, "networkIdle", { enumerable: true, get: function () { return completion_1.networkIdle; } });
|
|
15
30
|
Object.defineProperty(exports, "timeout", { enumerable: true, get: function () { return completion_1.timeout; } });
|
|
@@ -25,3 +40,5 @@ Object.defineProperty(exports, "rewriteCssText", { enumerable: true, get: functi
|
|
|
25
40
|
var writers_1 = require("./writers");
|
|
26
41
|
Object.defineProperty(exports, "writeToFS", { enumerable: true, get: function () { return writers_1.writeToFS; } });
|
|
27
42
|
Object.defineProperty(exports, "toZip", { enumerable: true, get: function () { return writers_1.toZip; } });
|
|
43
|
+
var replace_elements_1 = require("./replace-elements");
|
|
44
|
+
Object.defineProperty(exports, "applyReplaceElements", { enumerable: true, get: function () { return replace_elements_1.applyReplaceElements; } });
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { NetworkEvent } from "@pagepocket/interceptor";
|
|
2
|
+
type InflightStats = {
|
|
3
|
+
inflightRequests: number;
|
|
4
|
+
lastNetworkTs: number;
|
|
5
|
+
totalRequests: number;
|
|
6
|
+
};
|
|
7
|
+
export declare class InflightTracker {
|
|
8
|
+
private inflight;
|
|
9
|
+
private inflightRequests;
|
|
10
|
+
private lastNetworkTs;
|
|
11
|
+
private totalRequests;
|
|
12
|
+
private shouldIgnore;
|
|
13
|
+
constructor(options?: {
|
|
14
|
+
shouldIgnore?: (event: NetworkEvent) => boolean;
|
|
15
|
+
});
|
|
16
|
+
handleEvent(event: NetworkEvent): InflightStats;
|
|
17
|
+
getStats(): InflightStats;
|
|
18
|
+
}
|
|
19
|
+
export {};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.InflightTracker = void 0;
|
|
4
|
+
class InflightTracker {
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.inflight = new Set();
|
|
7
|
+
this.inflightRequests = 0;
|
|
8
|
+
this.lastNetworkTs = Date.now();
|
|
9
|
+
this.totalRequests = 0;
|
|
10
|
+
this.shouldIgnore = options?.shouldIgnore ?? (() => false);
|
|
11
|
+
}
|
|
12
|
+
handleEvent(event) {
|
|
13
|
+
if (this.shouldIgnore(event)) {
|
|
14
|
+
return this.getStats();
|
|
15
|
+
}
|
|
16
|
+
if (event?.timestamp) {
|
|
17
|
+
this.lastNetworkTs = event.timestamp;
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
this.lastNetworkTs = Date.now();
|
|
21
|
+
}
|
|
22
|
+
if (event?.type === "request") {
|
|
23
|
+
this.totalRequests += 1;
|
|
24
|
+
if (!this.inflight.has(event.requestId)) {
|
|
25
|
+
this.inflight.add(event.requestId);
|
|
26
|
+
this.inflightRequests += 1;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (event?.type === "response" || event?.type === "failed") {
|
|
30
|
+
if (this.inflight.delete(event.requestId)) {
|
|
31
|
+
this.inflightRequests = Math.max(0, this.inflightRequests - 1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
inflightRequests: this.inflightRequests,
|
|
36
|
+
lastNetworkTs: this.lastNetworkTs,
|
|
37
|
+
totalRequests: this.totalRequests
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
getStats() {
|
|
41
|
+
return {
|
|
42
|
+
inflightRequests: this.inflightRequests,
|
|
43
|
+
lastNetworkTs: this.lastNetworkTs,
|
|
44
|
+
totalRequests: this.totalRequests
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
exports.InflightTracker = InflightTracker;
|
package/dist/pagepocket.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import type { InterceptTarget } from "@pagepocket/interceptor";
|
|
2
|
-
import type { CaptureOptions, PagePocketOptions, PageSnapshot } from "./types";
|
|
2
|
+
import type { CaptureOptions, NetworkEventStream, PagePocketOptions, PageSnapshot } from "./types";
|
|
3
3
|
export declare class PagePocket {
|
|
4
4
|
private target;
|
|
5
5
|
private options;
|
|
6
|
+
private eventStream;
|
|
6
7
|
private constructor();
|
|
7
8
|
static fromURL(url: string, options?: PagePocketOptions): PagePocket;
|
|
8
9
|
static fromTarget(target: InterceptTarget, options?: PagePocketOptions): PagePocket;
|
|
10
|
+
interceptedRequestEvents(): NetworkEventStream;
|
|
9
11
|
capture(options?: CaptureOptions): Promise<PageSnapshot>;
|
|
10
12
|
}
|
package/dist/pagepocket.js
CHANGED
|
@@ -3,12 +3,111 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.PagePocket = void 0;
|
|
4
4
|
const completion_1 = require("./completion");
|
|
5
5
|
const content_store_1 = require("./content-store");
|
|
6
|
+
const debug_1 = require("./debug");
|
|
7
|
+
const inflight_tracker_1 = require("./inflight-tracker");
|
|
6
8
|
const network_store_1 = require("./network-store");
|
|
7
9
|
const path_resolver_1 = require("./path-resolver");
|
|
8
10
|
const resource_filter_1 = require("./resource-filter");
|
|
9
11
|
const snapshot_builder_1 = require("./snapshot-builder");
|
|
12
|
+
class AsyncEventQueue {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.values = [];
|
|
15
|
+
this.pending = [];
|
|
16
|
+
this.closed = false;
|
|
17
|
+
}
|
|
18
|
+
push(value) {
|
|
19
|
+
if (this.closed) {
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
if (this.pending.length > 0) {
|
|
23
|
+
const resolve = this.pending.shift();
|
|
24
|
+
resolve?.({ kind: "value", value });
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
this.values.push({ kind: "value", value });
|
|
28
|
+
}
|
|
29
|
+
close() {
|
|
30
|
+
if (this.closed) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
this.closed = true;
|
|
34
|
+
if (this.pending.length > 0) {
|
|
35
|
+
for (const resolve of this.pending) {
|
|
36
|
+
resolve({ kind: "close" });
|
|
37
|
+
}
|
|
38
|
+
this.pending = [];
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
this.values.push({ kind: "close" });
|
|
42
|
+
}
|
|
43
|
+
async *iterate() {
|
|
44
|
+
while (true) {
|
|
45
|
+
if (this.values.length > 0) {
|
|
46
|
+
const item = this.values.shift();
|
|
47
|
+
if (!item || item.kind === "close") {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
yield item.value;
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
if (this.closed) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
const item = await new Promise((resolve) => {
|
|
57
|
+
this.pending.push(resolve);
|
|
58
|
+
});
|
|
59
|
+
if (item.kind === "close") {
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
yield item.value;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
class BroadcastAsyncIterable {
|
|
67
|
+
constructor() {
|
|
68
|
+
this.queues = new Set();
|
|
69
|
+
this.closed = false;
|
|
70
|
+
}
|
|
71
|
+
subscribe() {
|
|
72
|
+
const queue = new AsyncEventQueue();
|
|
73
|
+
if (this.closed) {
|
|
74
|
+
queue.close();
|
|
75
|
+
}
|
|
76
|
+
this.queues.add(queue);
|
|
77
|
+
return (async function* (owner) {
|
|
78
|
+
try {
|
|
79
|
+
for await (const value of queue.iterate()) {
|
|
80
|
+
yield value;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
owner.queues.delete(queue);
|
|
85
|
+
queue.close();
|
|
86
|
+
}
|
|
87
|
+
})(this);
|
|
88
|
+
}
|
|
89
|
+
emit(value) {
|
|
90
|
+
if (this.queues.size === 0) {
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
for (const queue of this.queues) {
|
|
94
|
+
queue.push(value);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
close() {
|
|
98
|
+
if (this.closed) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
this.closed = true;
|
|
102
|
+
for (const queue of this.queues) {
|
|
103
|
+
queue.close();
|
|
104
|
+
}
|
|
105
|
+
this.queues.clear();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
10
108
|
class PagePocket {
|
|
11
109
|
constructor(target, options) {
|
|
110
|
+
this.eventStream = new BroadcastAsyncIterable();
|
|
12
111
|
this.target = target;
|
|
13
112
|
this.options = options ?? {};
|
|
14
113
|
}
|
|
@@ -18,77 +117,92 @@ class PagePocket {
|
|
|
18
117
|
static fromTarget(target, options) {
|
|
19
118
|
return new PagePocket(target, options);
|
|
20
119
|
}
|
|
120
|
+
interceptedRequestEvents() {
|
|
121
|
+
return this.eventStream.subscribe();
|
|
122
|
+
}
|
|
21
123
|
async capture(options) {
|
|
22
124
|
if (!options?.interceptor) {
|
|
23
125
|
throw new Error("CaptureOptions.interceptor is required.");
|
|
24
126
|
}
|
|
25
127
|
const contentStore = options?.contentStore ?? new content_store_1.HybridContentStore();
|
|
26
|
-
const
|
|
128
|
+
const baseFilter = options?.filter ?? (0, resource_filter_1.createDefaultResourceFilter)();
|
|
129
|
+
const blacklist = options?.blacklist ?? [];
|
|
130
|
+
const isBlacklisted = (url) => typeof url === "string" && blacklist.some((pattern) => pattern.test(url));
|
|
131
|
+
const filter = blacklist.length === 0
|
|
132
|
+
? baseFilter
|
|
133
|
+
: {
|
|
134
|
+
shouldSave(req, res) {
|
|
135
|
+
if (isBlacklisted(req.url)) {
|
|
136
|
+
(0, debug_1.debug_log)("[pagepocket] blacklist match (skip save)", req.url);
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
return baseFilter.shouldSave(req, res);
|
|
140
|
+
}
|
|
141
|
+
};
|
|
27
142
|
const pathResolver = options?.pathResolver ?? (0, path_resolver_1.createDefaultPathResolver)();
|
|
28
143
|
const rewriteEntry = options?.rewriteEntry ?? true;
|
|
29
144
|
const rewriteCSS = options?.rewriteCSS ?? true;
|
|
145
|
+
const replaceElements = options?.replaceElements;
|
|
30
146
|
const limits = options?.limits;
|
|
31
147
|
const completionStrategies = (0, completion_1.normalizeCompletion)(options?.completion);
|
|
32
|
-
const
|
|
148
|
+
const idleMs = options?.timeoutMs ?? 5000;
|
|
149
|
+
const maxDurationMs = options?.maxDurationMs;
|
|
150
|
+
const completion = completionStrategies.length > 0
|
|
151
|
+
? completionStrategies
|
|
152
|
+
: [(0, completion_1.networkIdle)(idleMs), ...(maxDurationMs !== undefined ? [(0, completion_1.timeout)(maxDurationMs)] : [])];
|
|
33
153
|
const store = new network_store_1.NetworkStore({
|
|
34
154
|
contentStore,
|
|
35
155
|
filter,
|
|
36
156
|
limits
|
|
37
157
|
});
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const pendingEvents = new Set();
|
|
43
|
-
const onEvent = (event) => {
|
|
44
|
-
if (event?.timestamp) {
|
|
45
|
-
lastNetworkTs = event.timestamp;
|
|
46
|
-
}
|
|
47
|
-
else {
|
|
48
|
-
lastNetworkTs = Date.now();
|
|
49
|
-
}
|
|
50
|
-
if (event?.type === "request") {
|
|
51
|
-
totalRequests += 1;
|
|
52
|
-
if (!inflight.has(event.requestId)) {
|
|
53
|
-
inflight.add(event.requestId);
|
|
54
|
-
inflightRequests += 1;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (event?.type === "response" || event?.type === "failed") {
|
|
58
|
-
if (inflight.delete(event.requestId)) {
|
|
59
|
-
inflightRequests = Math.max(0, inflightRequests - 1);
|
|
158
|
+
const inflightTracker = new inflight_tracker_1.InflightTracker({
|
|
159
|
+
shouldIgnore: (event) => {
|
|
160
|
+
if (!isBlacklisted(event.url)) {
|
|
161
|
+
return false;
|
|
60
162
|
}
|
|
163
|
+
(0, debug_1.debug_log)("[pagepocket] blacklist match (ignore inflight)", event.url);
|
|
164
|
+
return true;
|
|
61
165
|
}
|
|
166
|
+
});
|
|
167
|
+
const getStats = () => {
|
|
168
|
+
const stats = inflightTracker.getStats();
|
|
169
|
+
(0, debug_1.debug_log)("[pagepocket] stats", stats);
|
|
170
|
+
return stats;
|
|
171
|
+
};
|
|
172
|
+
const pendingEvents = new Set();
|
|
173
|
+
const onEvent = (event) => {
|
|
174
|
+
this.eventStream.emit(event);
|
|
175
|
+
const stats = inflightTracker.handleEvent(event);
|
|
176
|
+
(0, debug_1.debug_log)("[pagepocket] inflight", stats.inflightRequests, "event", event.type, event.requestId, event.url);
|
|
62
177
|
const task = store.handleEvent(event);
|
|
63
178
|
pendingEvents.add(task);
|
|
64
179
|
task.finally(() => pendingEvents.delete(task));
|
|
65
180
|
};
|
|
66
|
-
|
|
181
|
+
(0, debug_1.debug_log)("[pagepocket] session start");
|
|
182
|
+
const session = await options.interceptor.start(this.target, { onEvent }, { timeoutMs: options?.timeoutMs, maxDurationMs: options?.maxDurationMs });
|
|
183
|
+
(0, debug_1.debug_log)("[pagepocket] session started");
|
|
67
184
|
if (this.target.kind === "url" && session?.navigate) {
|
|
185
|
+
(0, debug_1.debug_log)("[pagepocket] navigate", this.target.url);
|
|
68
186
|
await session.navigate(this.target.url);
|
|
187
|
+
(0, debug_1.debug_log)("[pagepocket] navigated");
|
|
69
188
|
}
|
|
70
189
|
if (completion.length === 1) {
|
|
71
190
|
await completion[0].wait({
|
|
72
191
|
now: () => Date.now(),
|
|
73
|
-
getStats
|
|
74
|
-
inflightRequests,
|
|
75
|
-
lastNetworkTs,
|
|
76
|
-
totalRequests
|
|
77
|
-
})
|
|
192
|
+
getStats
|
|
78
193
|
});
|
|
79
194
|
}
|
|
80
195
|
else if (completion.length > 1) {
|
|
81
196
|
await Promise.race(completion.map((strategy) => strategy.wait({
|
|
82
197
|
now: () => Date.now(),
|
|
83
|
-
getStats
|
|
84
|
-
inflightRequests,
|
|
85
|
-
lastNetworkTs,
|
|
86
|
-
totalRequests
|
|
87
|
-
})
|
|
198
|
+
getStats
|
|
88
199
|
})));
|
|
89
200
|
}
|
|
201
|
+
(0, debug_1.debug_log)("[pagepocket] session stop");
|
|
90
202
|
await session.stop();
|
|
203
|
+
(0, debug_1.debug_log)("[pagepocket] session stopped");
|
|
91
204
|
await Promise.all(pendingEvents);
|
|
205
|
+
this.eventStream.close();
|
|
92
206
|
const entryUrl = this.target.kind === "url" ? this.target.url : "";
|
|
93
207
|
return (0, snapshot_builder_1.buildSnapshot)({
|
|
94
208
|
entryUrl,
|
|
@@ -99,6 +213,7 @@ class PagePocket {
|
|
|
99
213
|
pathResolver,
|
|
100
214
|
rewriteEntry,
|
|
101
215
|
rewriteCSS,
|
|
216
|
+
replaceElements,
|
|
102
217
|
warnings: store.getWarnings()
|
|
103
218
|
});
|
|
104
219
|
}
|