@pagepocket/lib 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -3
- package/dist/builtin-blacklist.d.ts +3 -0
- package/dist/builtin-blacklist.js +6 -0
- package/dist/debug.d.ts +2 -0
- package/dist/debug.js +18 -0
- package/dist/hackers/index.js +2 -0
- package/dist/hackers/replay-css-proxy.d.ts +2 -0
- package/dist/hackers/replay-css-proxy.js +206 -0
- package/dist/hackers/replay-dom-rewrite.js +57 -26
- package/dist/index.d.ts +3 -1
- package/dist/index.js +18 -1
- package/dist/inflight-tracker.d.ts +19 -0
- package/dist/inflight-tracker.js +48 -0
- package/dist/pagepocket.d.ts +3 -1
- package/dist/pagepocket.js +150 -35
- package/dist/replace-elements.d.ts +9 -0
- package/dist/replace-elements.js +258 -0
- package/dist/replay-script.js +286 -2
- package/dist/resource-proxy.d.ts +34 -0
- package/dist/resource-proxy.js +284 -0
- package/dist/rewrite-links.d.ts +8 -0
- package/dist/rewrite-links.js +8 -0
- package/dist/snapshot-builder.d.ts +2 -1
- package/dist/snapshot-builder.js +51 -1
- package/dist/types.d.ts +88 -1
- package/dist/writers.d.ts +2 -2
- package/dist/writers.js +56 -4
- package/package.json +3 -3
|
@@ -5,7 +5,9 @@ exports.replayDomRewriter = {
|
|
|
5
5
|
id: "replay-dom-rewriter",
|
|
6
6
|
stage: "replay",
|
|
7
7
|
build: () => `
|
|
8
|
-
//
|
|
8
|
+
// NOTE: When a resource is not found in the snapshot, we keep the original URL.
|
|
9
|
+
// (User choice) We still preserve a limited placeholder behavior for some tags,
|
|
10
|
+
// but only when we have a recorded response body to convert to a data URL.
|
|
9
11
|
const transparentGif = "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==";
|
|
10
12
|
const emptyScript = "data:text/javascript,/*pagepocket-missing*/";
|
|
11
13
|
const emptyStyle = "data:text/css,/*pagepocket-missing*/";
|
|
@@ -93,7 +95,7 @@ exports.replayDomRewriter = {
|
|
|
93
95
|
return;
|
|
94
96
|
}
|
|
95
97
|
const tag = (element.tagName || "").toLowerCase();
|
|
96
|
-
if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe") {
|
|
98
|
+
if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe" || tag === "object" || tag === "embed") {
|
|
97
99
|
const src = currentSrc;
|
|
98
100
|
if (src && !isLocalResource(src) && !src.startsWith("data:") && !src.startsWith("blob:")) {
|
|
99
101
|
const localPath = findLocalPath(src);
|
|
@@ -102,8 +104,13 @@ exports.replayDomRewriter = {
|
|
|
102
104
|
return;
|
|
103
105
|
}
|
|
104
106
|
const record = findByUrl(src);
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
if (record) {
|
|
108
|
+
element.setAttribute("src", toDataUrl(record));
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Keep original URL when we have no snapshot match.
|
|
113
|
+
return;
|
|
107
114
|
}
|
|
108
115
|
}
|
|
109
116
|
|
|
@@ -117,8 +124,12 @@ exports.replayDomRewriter = {
|
|
|
117
124
|
return;
|
|
118
125
|
}
|
|
119
126
|
const record = findByUrl(href);
|
|
120
|
-
|
|
121
|
-
|
|
127
|
+
if (record) {
|
|
128
|
+
element.setAttribute("href", toDataUrl(record, "text/css"));
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return;
|
|
122
133
|
}
|
|
123
134
|
}
|
|
124
135
|
|
|
@@ -166,14 +177,9 @@ exports.replayDomRewriter = {
|
|
|
166
177
|
const dataUrl = toDataUrl(record, fallbackType);
|
|
167
178
|
return originalSetAttribute.call(this, name, dataUrl);
|
|
168
179
|
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
}
|
|
173
|
-
if (attr === "href") {
|
|
174
|
-
const fallback = relLower === "stylesheet" ? emptyStyle : emptyStyle;
|
|
175
|
-
return originalSetAttribute.call(this, name, fallback);
|
|
176
|
-
}
|
|
180
|
+
|
|
181
|
+
// Keep original URL on miss.
|
|
182
|
+
return originalSetAttribute.call(this, name, value);
|
|
177
183
|
}
|
|
178
184
|
return originalSetAttribute.call(this, name, value);
|
|
179
185
|
};
|
|
@@ -207,8 +213,12 @@ exports.replayDomRewriter = {
|
|
|
207
213
|
return;
|
|
208
214
|
}
|
|
209
215
|
const record = findByUrl(rawValue);
|
|
210
|
-
|
|
211
|
-
|
|
216
|
+
if (record) {
|
|
217
|
+
setter.call(this, toDataUrl(record));
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
setter.call(this, rawValue);
|
|
212
222
|
});
|
|
213
223
|
return;
|
|
214
224
|
}
|
|
@@ -222,8 +232,12 @@ exports.replayDomRewriter = {
|
|
|
222
232
|
return;
|
|
223
233
|
}
|
|
224
234
|
const record = findByUrl(rawValue);
|
|
225
|
-
|
|
226
|
-
|
|
235
|
+
if (record) {
|
|
236
|
+
setter.call(this, toDataUrl(record));
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
setter.call(this, rawValue);
|
|
227
241
|
});
|
|
228
242
|
|
|
229
243
|
patchProperty(HTMLScriptElement.prototype, "src", function(value, setter) {
|
|
@@ -240,8 +254,12 @@ exports.replayDomRewriter = {
|
|
|
240
254
|
return;
|
|
241
255
|
}
|
|
242
256
|
const record = findByUrl(rawValue);
|
|
243
|
-
|
|
244
|
-
|
|
257
|
+
if (record) {
|
|
258
|
+
setter.call(this, toDataUrl(record));
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
setter.call(this, rawValue);
|
|
245
263
|
});
|
|
246
264
|
return;
|
|
247
265
|
}
|
|
@@ -255,8 +273,13 @@ exports.replayDomRewriter = {
|
|
|
255
273
|
return;
|
|
256
274
|
}
|
|
257
275
|
const record = findByUrl(rawValue);
|
|
258
|
-
|
|
259
|
-
|
|
276
|
+
if (record) {
|
|
277
|
+
setter.call(this, toDataUrl(record));
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Keep original URL on miss.
|
|
282
|
+
setter.call(this, rawValue);
|
|
260
283
|
});
|
|
261
284
|
|
|
262
285
|
patchProperty(HTMLLinkElement.prototype, "href", function(value, setter) {
|
|
@@ -275,8 +298,12 @@ exports.replayDomRewriter = {
|
|
|
275
298
|
return;
|
|
276
299
|
}
|
|
277
300
|
const record = findByUrl(rawValue);
|
|
278
|
-
|
|
279
|
-
|
|
301
|
+
if (record) {
|
|
302
|
+
setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
setter.call(this, rawValue);
|
|
280
307
|
});
|
|
281
308
|
return;
|
|
282
309
|
}
|
|
@@ -290,8 +317,12 @@ exports.replayDomRewriter = {
|
|
|
290
317
|
return;
|
|
291
318
|
}
|
|
292
319
|
const record = findByUrl(rawValue);
|
|
293
|
-
|
|
294
|
-
|
|
320
|
+
if (record) {
|
|
321
|
+
setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
setter.call(this, rawValue);
|
|
295
326
|
});
|
|
296
327
|
|
|
297
328
|
patchProperty(HTMLImageElement.prototype, "srcset", function(value, setter) {
|
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export { PagePocket } from "./pagepocket";
|
|
2
|
-
export type { ApiRecord, ApiSnapshot, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipOptions } from "./types";
|
|
2
|
+
export type { ApiRecord, ApiSnapshot, ApplyOptions, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, MatchQuery, NetworkEventStream, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ReplaceAction, ReplaceElementContext, ReplaceElementFn, ReplaceElementFnWithQuery, ReplaceElementRule, ReplaceElementsConfig, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipWriteResult, ZipResult, ZipOptions } from "./types";
|
|
3
3
|
export { HybridContentStore } from "./content-store";
|
|
4
4
|
export { createDefaultPathResolver, withPrefixPathResolver } from "./path-resolver";
|
|
5
5
|
export { createDefaultResourceFilter } from "./resource-filter";
|
|
6
|
+
export * from "./builtin-blacklist";
|
|
6
7
|
export { networkIdle, timeout } from "./completion";
|
|
7
8
|
export { buildReplayScript } from "./replay-script";
|
|
8
9
|
export { buildPreloadScript } from "./preload";
|
|
9
10
|
export { rewriteEntryHtml, rewriteJsText } from "./rewrite-links";
|
|
10
11
|
export { rewriteCssText } from "./css-rewrite";
|
|
11
12
|
export { writeToFS, toZip } from "./writers";
|
|
13
|
+
export { applyReplaceElements } from "./replace-elements";
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
2
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
|
|
17
|
+
exports.applyReplaceElements = exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
|
|
4
18
|
var pagepocket_1 = require("./pagepocket");
|
|
5
19
|
Object.defineProperty(exports, "PagePocket", { enumerable: true, get: function () { return pagepocket_1.PagePocket; } });
|
|
6
20
|
var content_store_1 = require("./content-store");
|
|
@@ -10,6 +24,7 @@ Object.defineProperty(exports, "createDefaultPathResolver", { enumerable: true,
|
|
|
10
24
|
Object.defineProperty(exports, "withPrefixPathResolver", { enumerable: true, get: function () { return path_resolver_1.withPrefixPathResolver; } });
|
|
11
25
|
var resource_filter_1 = require("./resource-filter");
|
|
12
26
|
Object.defineProperty(exports, "createDefaultResourceFilter", { enumerable: true, get: function () { return resource_filter_1.createDefaultResourceFilter; } });
|
|
27
|
+
__exportStar(require("./builtin-blacklist"), exports);
|
|
13
28
|
var completion_1 = require("./completion");
|
|
14
29
|
Object.defineProperty(exports, "networkIdle", { enumerable: true, get: function () { return completion_1.networkIdle; } });
|
|
15
30
|
Object.defineProperty(exports, "timeout", { enumerable: true, get: function () { return completion_1.timeout; } });
|
|
@@ -25,3 +40,5 @@ Object.defineProperty(exports, "rewriteCssText", { enumerable: true, get: functi
|
|
|
25
40
|
var writers_1 = require("./writers");
|
|
26
41
|
Object.defineProperty(exports, "writeToFS", { enumerable: true, get: function () { return writers_1.writeToFS; } });
|
|
27
42
|
Object.defineProperty(exports, "toZip", { enumerable: true, get: function () { return writers_1.toZip; } });
|
|
43
|
+
var replace_elements_1 = require("./replace-elements");
|
|
44
|
+
Object.defineProperty(exports, "applyReplaceElements", { enumerable: true, get: function () { return replace_elements_1.applyReplaceElements; } });
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { NetworkEvent } from "@pagepocket/interceptor";
|
|
2
|
+
type InflightStats = {
|
|
3
|
+
inflightRequests: number;
|
|
4
|
+
lastNetworkTs: number;
|
|
5
|
+
totalRequests: number;
|
|
6
|
+
};
|
|
7
|
+
export declare class InflightTracker {
|
|
8
|
+
private inflight;
|
|
9
|
+
private inflightRequests;
|
|
10
|
+
private lastNetworkTs;
|
|
11
|
+
private totalRequests;
|
|
12
|
+
private shouldIgnore;
|
|
13
|
+
constructor(options?: {
|
|
14
|
+
shouldIgnore?: (event: NetworkEvent) => boolean;
|
|
15
|
+
});
|
|
16
|
+
handleEvent(event: NetworkEvent): InflightStats;
|
|
17
|
+
getStats(): InflightStats;
|
|
18
|
+
}
|
|
19
|
+
export {};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.InflightTracker = void 0;
|
|
4
|
+
class InflightTracker {
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.inflight = new Set();
|
|
7
|
+
this.inflightRequests = 0;
|
|
8
|
+
this.lastNetworkTs = Date.now();
|
|
9
|
+
this.totalRequests = 0;
|
|
10
|
+
this.shouldIgnore = options?.shouldIgnore ?? (() => false);
|
|
11
|
+
}
|
|
12
|
+
handleEvent(event) {
|
|
13
|
+
if (this.shouldIgnore(event)) {
|
|
14
|
+
return this.getStats();
|
|
15
|
+
}
|
|
16
|
+
if (event?.timestamp) {
|
|
17
|
+
this.lastNetworkTs = event.timestamp;
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
this.lastNetworkTs = Date.now();
|
|
21
|
+
}
|
|
22
|
+
if (event?.type === "request") {
|
|
23
|
+
this.totalRequests += 1;
|
|
24
|
+
if (!this.inflight.has(event.requestId)) {
|
|
25
|
+
this.inflight.add(event.requestId);
|
|
26
|
+
this.inflightRequests += 1;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (event?.type === "response" || event?.type === "failed") {
|
|
30
|
+
if (this.inflight.delete(event.requestId)) {
|
|
31
|
+
this.inflightRequests = Math.max(0, this.inflightRequests - 1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
inflightRequests: this.inflightRequests,
|
|
36
|
+
lastNetworkTs: this.lastNetworkTs,
|
|
37
|
+
totalRequests: this.totalRequests
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
getStats() {
|
|
41
|
+
return {
|
|
42
|
+
inflightRequests: this.inflightRequests,
|
|
43
|
+
lastNetworkTs: this.lastNetworkTs,
|
|
44
|
+
totalRequests: this.totalRequests
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
exports.InflightTracker = InflightTracker;
|
package/dist/pagepocket.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import type { InterceptTarget } from "@pagepocket/interceptor";
|
|
2
|
-
import type { CaptureOptions, PagePocketOptions, PageSnapshot } from "./types";
|
|
2
|
+
import type { CaptureOptions, NetworkEventStream, PagePocketOptions, PageSnapshot } from "./types";
|
|
3
3
|
export declare class PagePocket {
|
|
4
4
|
private target;
|
|
5
5
|
private options;
|
|
6
|
+
private eventStream;
|
|
6
7
|
private constructor();
|
|
7
8
|
static fromURL(url: string, options?: PagePocketOptions): PagePocket;
|
|
8
9
|
static fromTarget(target: InterceptTarget, options?: PagePocketOptions): PagePocket;
|
|
10
|
+
interceptedRequestEvents(): NetworkEventStream;
|
|
9
11
|
capture(options?: CaptureOptions): Promise<PageSnapshot>;
|
|
10
12
|
}
|
package/dist/pagepocket.js
CHANGED
|
@@ -3,12 +3,111 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.PagePocket = void 0;
|
|
4
4
|
const completion_1 = require("./completion");
|
|
5
5
|
const content_store_1 = require("./content-store");
|
|
6
|
+
const debug_1 = require("./debug");
|
|
7
|
+
const inflight_tracker_1 = require("./inflight-tracker");
|
|
6
8
|
const network_store_1 = require("./network-store");
|
|
7
9
|
const path_resolver_1 = require("./path-resolver");
|
|
8
10
|
const resource_filter_1 = require("./resource-filter");
|
|
9
11
|
const snapshot_builder_1 = require("./snapshot-builder");
|
|
12
|
+
class AsyncEventQueue {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.values = [];
|
|
15
|
+
this.pending = [];
|
|
16
|
+
this.closed = false;
|
|
17
|
+
}
|
|
18
|
+
push(value) {
|
|
19
|
+
if (this.closed) {
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
if (this.pending.length > 0) {
|
|
23
|
+
const resolve = this.pending.shift();
|
|
24
|
+
resolve?.({ kind: "value", value });
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
this.values.push({ kind: "value", value });
|
|
28
|
+
}
|
|
29
|
+
close() {
|
|
30
|
+
if (this.closed) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
this.closed = true;
|
|
34
|
+
if (this.pending.length > 0) {
|
|
35
|
+
for (const resolve of this.pending) {
|
|
36
|
+
resolve({ kind: "close" });
|
|
37
|
+
}
|
|
38
|
+
this.pending = [];
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
this.values.push({ kind: "close" });
|
|
42
|
+
}
|
|
43
|
+
async *iterate() {
|
|
44
|
+
while (true) {
|
|
45
|
+
if (this.values.length > 0) {
|
|
46
|
+
const item = this.values.shift();
|
|
47
|
+
if (!item || item.kind === "close") {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
yield item.value;
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
if (this.closed) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
const item = await new Promise((resolve) => {
|
|
57
|
+
this.pending.push(resolve);
|
|
58
|
+
});
|
|
59
|
+
if (item.kind === "close") {
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
yield item.value;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
class BroadcastAsyncIterable {
|
|
67
|
+
constructor() {
|
|
68
|
+
this.queues = new Set();
|
|
69
|
+
this.closed = false;
|
|
70
|
+
}
|
|
71
|
+
subscribe() {
|
|
72
|
+
const queue = new AsyncEventQueue();
|
|
73
|
+
if (this.closed) {
|
|
74
|
+
queue.close();
|
|
75
|
+
}
|
|
76
|
+
this.queues.add(queue);
|
|
77
|
+
return (async function* (owner) {
|
|
78
|
+
try {
|
|
79
|
+
for await (const value of queue.iterate()) {
|
|
80
|
+
yield value;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
owner.queues.delete(queue);
|
|
85
|
+
queue.close();
|
|
86
|
+
}
|
|
87
|
+
})(this);
|
|
88
|
+
}
|
|
89
|
+
emit(value) {
|
|
90
|
+
if (this.queues.size === 0) {
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
for (const queue of this.queues) {
|
|
94
|
+
queue.push(value);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
close() {
|
|
98
|
+
if (this.closed) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
this.closed = true;
|
|
102
|
+
for (const queue of this.queues) {
|
|
103
|
+
queue.close();
|
|
104
|
+
}
|
|
105
|
+
this.queues.clear();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
10
108
|
class PagePocket {
|
|
11
109
|
constructor(target, options) {
|
|
110
|
+
this.eventStream = new BroadcastAsyncIterable();
|
|
12
111
|
this.target = target;
|
|
13
112
|
this.options = options ?? {};
|
|
14
113
|
}
|
|
@@ -18,77 +117,92 @@ class PagePocket {
|
|
|
18
117
|
static fromTarget(target, options) {
|
|
19
118
|
return new PagePocket(target, options);
|
|
20
119
|
}
|
|
120
|
+
interceptedRequestEvents() {
|
|
121
|
+
return this.eventStream.subscribe();
|
|
122
|
+
}
|
|
21
123
|
async capture(options) {
|
|
22
124
|
if (!options?.interceptor) {
|
|
23
125
|
throw new Error("CaptureOptions.interceptor is required.");
|
|
24
126
|
}
|
|
25
127
|
const contentStore = options?.contentStore ?? new content_store_1.HybridContentStore();
|
|
26
|
-
const
|
|
128
|
+
const baseFilter = options?.filter ?? (0, resource_filter_1.createDefaultResourceFilter)();
|
|
129
|
+
const blacklist = options?.blacklist ?? [];
|
|
130
|
+
const isBlacklisted = (url) => typeof url === "string" && blacklist.some((pattern) => pattern.test(url));
|
|
131
|
+
const filter = blacklist.length === 0
|
|
132
|
+
? baseFilter
|
|
133
|
+
: {
|
|
134
|
+
shouldSave(req, res) {
|
|
135
|
+
if (isBlacklisted(req.url)) {
|
|
136
|
+
(0, debug_1.debug_log)("[pagepocket] blacklist match (skip save)", req.url);
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
return baseFilter.shouldSave(req, res);
|
|
140
|
+
}
|
|
141
|
+
};
|
|
27
142
|
const pathResolver = options?.pathResolver ?? (0, path_resolver_1.createDefaultPathResolver)();
|
|
28
143
|
const rewriteEntry = options?.rewriteEntry ?? true;
|
|
29
144
|
const rewriteCSS = options?.rewriteCSS ?? true;
|
|
145
|
+
const replaceElements = options?.replaceElements;
|
|
30
146
|
const limits = options?.limits;
|
|
31
147
|
const completionStrategies = (0, completion_1.normalizeCompletion)(options?.completion);
|
|
32
|
-
const
|
|
148
|
+
const idleMs = options?.timeoutMs ?? 5000;
|
|
149
|
+
const maxDurationMs = options?.maxDurationMs;
|
|
150
|
+
const completion = completionStrategies.length > 0
|
|
151
|
+
? completionStrategies
|
|
152
|
+
: [(0, completion_1.networkIdle)(idleMs), ...(maxDurationMs !== undefined ? [(0, completion_1.timeout)(maxDurationMs)] : [])];
|
|
33
153
|
const store = new network_store_1.NetworkStore({
|
|
34
154
|
contentStore,
|
|
35
155
|
filter,
|
|
36
156
|
limits
|
|
37
157
|
});
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const pendingEvents = new Set();
|
|
43
|
-
const onEvent = (event) => {
|
|
44
|
-
if (event?.timestamp) {
|
|
45
|
-
lastNetworkTs = event.timestamp;
|
|
46
|
-
}
|
|
47
|
-
else {
|
|
48
|
-
lastNetworkTs = Date.now();
|
|
49
|
-
}
|
|
50
|
-
if (event?.type === "request") {
|
|
51
|
-
totalRequests += 1;
|
|
52
|
-
if (!inflight.has(event.requestId)) {
|
|
53
|
-
inflight.add(event.requestId);
|
|
54
|
-
inflightRequests += 1;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (event?.type === "response" || event?.type === "failed") {
|
|
58
|
-
if (inflight.delete(event.requestId)) {
|
|
59
|
-
inflightRequests = Math.max(0, inflightRequests - 1);
|
|
158
|
+
const inflightTracker = new inflight_tracker_1.InflightTracker({
|
|
159
|
+
shouldIgnore: (event) => {
|
|
160
|
+
if (!isBlacklisted(event.url)) {
|
|
161
|
+
return false;
|
|
60
162
|
}
|
|
163
|
+
(0, debug_1.debug_log)("[pagepocket] blacklist match (ignore inflight)", event.url);
|
|
164
|
+
return true;
|
|
61
165
|
}
|
|
166
|
+
});
|
|
167
|
+
const getStats = () => {
|
|
168
|
+
const stats = inflightTracker.getStats();
|
|
169
|
+
(0, debug_1.debug_log)("[pagepocket] stats", stats);
|
|
170
|
+
return stats;
|
|
171
|
+
};
|
|
172
|
+
const pendingEvents = new Set();
|
|
173
|
+
const onEvent = (event) => {
|
|
174
|
+
this.eventStream.emit(event);
|
|
175
|
+
const stats = inflightTracker.handleEvent(event);
|
|
176
|
+
(0, debug_1.debug_log)("[pagepocket] inflight", stats.inflightRequests, "event", event.type, event.requestId, event.url);
|
|
62
177
|
const task = store.handleEvent(event);
|
|
63
178
|
pendingEvents.add(task);
|
|
64
179
|
task.finally(() => pendingEvents.delete(task));
|
|
65
180
|
};
|
|
66
|
-
|
|
181
|
+
(0, debug_1.debug_log)("[pagepocket] session start");
|
|
182
|
+
const session = await options.interceptor.start(this.target, { onEvent }, { timeoutMs: options?.timeoutMs, maxDurationMs: options?.maxDurationMs });
|
|
183
|
+
(0, debug_1.debug_log)("[pagepocket] session started");
|
|
67
184
|
if (this.target.kind === "url" && session?.navigate) {
|
|
185
|
+
(0, debug_1.debug_log)("[pagepocket] navigate", this.target.url);
|
|
68
186
|
await session.navigate(this.target.url);
|
|
187
|
+
(0, debug_1.debug_log)("[pagepocket] navigated");
|
|
69
188
|
}
|
|
70
189
|
if (completion.length === 1) {
|
|
71
190
|
await completion[0].wait({
|
|
72
191
|
now: () => Date.now(),
|
|
73
|
-
getStats
|
|
74
|
-
inflightRequests,
|
|
75
|
-
lastNetworkTs,
|
|
76
|
-
totalRequests
|
|
77
|
-
})
|
|
192
|
+
getStats
|
|
78
193
|
});
|
|
79
194
|
}
|
|
80
195
|
else if (completion.length > 1) {
|
|
81
196
|
await Promise.race(completion.map((strategy) => strategy.wait({
|
|
82
197
|
now: () => Date.now(),
|
|
83
|
-
getStats
|
|
84
|
-
inflightRequests,
|
|
85
|
-
lastNetworkTs,
|
|
86
|
-
totalRequests
|
|
87
|
-
})
|
|
198
|
+
getStats
|
|
88
199
|
})));
|
|
89
200
|
}
|
|
201
|
+
(0, debug_1.debug_log)("[pagepocket] session stop");
|
|
90
202
|
await session.stop();
|
|
203
|
+
(0, debug_1.debug_log)("[pagepocket] session stopped");
|
|
91
204
|
await Promise.all(pendingEvents);
|
|
205
|
+
this.eventStream.close();
|
|
92
206
|
const entryUrl = this.target.kind === "url" ? this.target.url : "";
|
|
93
207
|
return (0, snapshot_builder_1.buildSnapshot)({
|
|
94
208
|
entryUrl,
|
|
@@ -99,6 +213,7 @@ class PagePocket {
|
|
|
99
213
|
pathResolver,
|
|
100
214
|
rewriteEntry,
|
|
101
215
|
rewriteCSS,
|
|
216
|
+
replaceElements,
|
|
102
217
|
warnings: store.getWarnings()
|
|
103
218
|
});
|
|
104
219
|
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { CheerioAPI } from "cheerio";
|
|
2
|
+
import type { ReplaceElementsConfig } from "./types";
|
|
3
|
+
export declare const applyReplaceElements: (input: {
|
|
4
|
+
$: CheerioAPI;
|
|
5
|
+
entryUrl: string;
|
|
6
|
+
url: string;
|
|
7
|
+
replaceElements?: ReplaceElementsConfig;
|
|
8
|
+
isEntryDocument: boolean;
|
|
9
|
+
}) => Promise<void>;
|