@pagepocket/lib 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,9 @@ exports.replayDomRewriter = {
5
5
  id: "replay-dom-rewriter",
6
6
  stage: "replay",
7
7
  build: () => `
8
- // Placeholder data URLs for missing resources.
8
+ // NOTE: When a resource is not found in the snapshot, we keep the original URL.
9
+ // (User choice) We still preserve a limited placeholder behavior for some tags,
10
+ // but only when we have a recorded response body to convert to a data URL.
9
11
  const transparentGif = "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==";
10
12
  const emptyScript = "data:text/javascript,/*pagepocket-missing*/";
11
13
  const emptyStyle = "data:text/css,/*pagepocket-missing*/";
@@ -31,9 +33,33 @@ exports.replayDomRewriter = {
31
33
  }
32
34
  };
33
35
 
36
+ const rewritten = new WeakMap();
37
+
34
38
  // Rewrite srcset values to local files only (avoid data: URLs in srcset).
35
39
  const rewriteSrcset = (value) => {
36
40
  if (!value) return value;
41
+
42
+ // Substack-style image/fetch URLs include commas inside the URL token
43
+ // (",w_40,h_40,..."). This makes the srcset invalid and browsers will
44
+ // parse it into garbage candidate URLs. Prefer dropping srcset and relying
45
+ // on the rewritten img[src].
46
+ try {
47
+ const trimmed = String(value || "").trim();
48
+ const hasFetchTransform = trimmed.includes("/image/fetch/");
49
+ const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
50
+ const hasCommaTokens =
51
+ trimmed.includes(",w_") ||
52
+ trimmed.includes(", w_") ||
53
+ trimmed.includes(",h_") ||
54
+ trimmed.includes(", h_") ||
55
+ trimmed.includes(",c_") ||
56
+ trimmed.includes(", c_");
57
+
58
+ if (hasFetchTransform && hasEncodedUrlTail && hasCommaTokens) {
59
+ return "";
60
+ }
61
+ } catch {}
62
+
37
63
  return value.split(",").map((part) => {
38
64
  const trimmed = part.trim();
39
65
  if (!trimmed) return trimmed;
@@ -46,7 +72,7 @@ exports.replayDomRewriter = {
46
72
  return descriptor ? localPath + " " + descriptor : localPath;
47
73
  }
48
74
  return trimmed;
49
- }).join(", ");
75
+ }).join(",");
50
76
  };
51
77
 
52
78
  // Rewrite element attributes to local files or data URLs.
@@ -56,9 +82,21 @@ exports.replayDomRewriter = {
56
82
  onReady(() => rewriteElement(element));
57
83
  return;
58
84
  }
85
+ const prev = rewritten.get(element);
86
+ const currentSrc = element.getAttribute("src");
87
+ const currentHref = element.getAttribute("href");
88
+ const currentSrcset = element.getAttribute("srcset");
89
+ if (
90
+ prev &&
91
+ prev.src === currentSrc &&
92
+ prev.href === currentHref &&
93
+ prev.srcset === currentSrcset
94
+ ) {
95
+ return;
96
+ }
59
97
  const tag = (element.tagName || "").toLowerCase();
60
- if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe") {
61
- const src = element.getAttribute("src");
98
+ if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe" || tag === "object" || tag === "embed") {
99
+ const src = currentSrc;
62
100
  if (src && !isLocalResource(src) && !src.startsWith("data:") && !src.startsWith("blob:")) {
63
101
  const localPath = findLocalPath(src);
64
102
  if (localPath) {
@@ -66,13 +104,18 @@ exports.replayDomRewriter = {
66
104
  return;
67
105
  }
68
106
  const record = findByUrl(src);
69
- const fallback = tag === "script" ? emptyScript : transparentGif;
70
- element.setAttribute("src", record ? toDataUrl(record) : fallback);
107
+ if (record) {
108
+ element.setAttribute("src", toDataUrl(record));
109
+ return;
110
+ }
111
+
112
+ // Keep original URL when we have no snapshot match.
113
+ return;
71
114
  }
72
115
  }
73
116
 
74
117
  if (tag === "link") {
75
- const href = element.getAttribute("href");
118
+ const href = currentHref;
76
119
  const rel = (element.getAttribute("rel") || "").toLowerCase();
77
120
  if (href && !isLocalResource(href) && !href.startsWith("data:") && !href.startsWith("blob:")) {
78
121
  const localPath = findLocalPath(href);
@@ -81,15 +124,25 @@ exports.replayDomRewriter = {
81
124
  return;
82
125
  }
83
126
  const record = findByUrl(href);
84
- const fallback = rel === "stylesheet" ? emptyStyle : emptyStyle;
85
- element.setAttribute("href", record ? toDataUrl(record, "text/css") : fallback);
127
+ if (record) {
128
+ element.setAttribute("href", toDataUrl(record, "text/css"));
129
+ return;
130
+ }
131
+
132
+ return;
86
133
  }
87
134
  }
88
135
 
89
- const srcset = element.getAttribute("srcset");
136
+ const srcset = currentSrcset;
90
137
  if (srcset) {
91
138
  element.setAttribute("srcset", rewriteSrcset(srcset));
92
139
  }
140
+
141
+ rewritten.set(element, {
142
+ src: element.getAttribute("src"),
143
+ href: element.getAttribute("href"),
144
+ srcset: element.getAttribute("srcset")
145
+ });
93
146
  };
94
147
 
95
148
  // Intercept DOM attribute writes to keep resources local.
@@ -124,14 +177,9 @@ exports.replayDomRewriter = {
124
177
  const dataUrl = toDataUrl(record, fallbackType);
125
178
  return originalSetAttribute.call(this, name, dataUrl);
126
179
  }
127
- if (attr === "src") {
128
- const fallback = tag === "script" ? emptyScript : transparentGif;
129
- return originalSetAttribute.call(this, name, fallback);
130
- }
131
- if (attr === "href") {
132
- const fallback = relLower === "stylesheet" ? emptyStyle : emptyStyle;
133
- return originalSetAttribute.call(this, name, fallback);
134
- }
180
+
181
+ // Keep original URL on miss.
182
+ return originalSetAttribute.call(this, name, value);
135
183
  }
136
184
  return originalSetAttribute.call(this, name, value);
137
185
  };
@@ -165,8 +213,12 @@ exports.replayDomRewriter = {
165
213
  return;
166
214
  }
167
215
  const record = findByUrl(rawValue);
168
- const next = record ? toDataUrl(record) : transparentGif;
169
- setter.call(this, next);
216
+ if (record) {
217
+ setter.call(this, toDataUrl(record));
218
+ return;
219
+ }
220
+
221
+ setter.call(this, rawValue);
170
222
  });
171
223
  return;
172
224
  }
@@ -180,8 +232,12 @@ exports.replayDomRewriter = {
180
232
  return;
181
233
  }
182
234
  const record = findByUrl(rawValue);
183
- const next = record ? toDataUrl(record) : transparentGif;
184
- setter.call(this, next);
235
+ if (record) {
236
+ setter.call(this, toDataUrl(record));
237
+ return;
238
+ }
239
+
240
+ setter.call(this, rawValue);
185
241
  });
186
242
 
187
243
  patchProperty(HTMLScriptElement.prototype, "src", function(value, setter) {
@@ -198,8 +254,12 @@ exports.replayDomRewriter = {
198
254
  return;
199
255
  }
200
256
  const record = findByUrl(rawValue);
201
- const next = record ? toDataUrl(record) : emptyScript;
202
- setter.call(this, next);
257
+ if (record) {
258
+ setter.call(this, toDataUrl(record));
259
+ return;
260
+ }
261
+
262
+ setter.call(this, rawValue);
203
263
  });
204
264
  return;
205
265
  }
@@ -213,8 +273,13 @@ exports.replayDomRewriter = {
213
273
  return;
214
274
  }
215
275
  const record = findByUrl(rawValue);
216
- const next = record ? toDataUrl(record) : emptyScript;
217
- setter.call(this, next);
276
+ if (record) {
277
+ setter.call(this, toDataUrl(record));
278
+ return;
279
+ }
280
+
281
+ // Keep original URL on miss.
282
+ setter.call(this, rawValue);
218
283
  });
219
284
 
220
285
  patchProperty(HTMLLinkElement.prototype, "href", function(value, setter) {
@@ -233,8 +298,12 @@ exports.replayDomRewriter = {
233
298
  return;
234
299
  }
235
300
  const record = findByUrl(rawValue);
236
- const next = record ? toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined) : emptyStyle;
237
- setter.call(this, next);
301
+ if (record) {
302
+ setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
303
+ return;
304
+ }
305
+
306
+ setter.call(this, rawValue);
238
307
  });
239
308
  return;
240
309
  }
@@ -248,8 +317,12 @@ exports.replayDomRewriter = {
248
317
  return;
249
318
  }
250
319
  const record = findByUrl(rawValue);
251
- const next = record ? toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined) : emptyStyle;
252
- setter.call(this, next);
320
+ if (record) {
321
+ setter.call(this, toDataUrl(record, relLower.includes("stylesheet") ? "text/css" : undefined));
322
+ return;
323
+ }
324
+
325
+ setter.call(this, rawValue);
253
326
  });
254
327
 
255
328
  patchProperty(HTMLImageElement.prototype, "srcset", function(value, setter) {
@@ -275,8 +348,6 @@ exports.replayDomRewriter = {
275
348
  mutation.addedNodes.forEach((node) => {
276
349
  if (node && node.nodeType === 1) {
277
350
  rewriteElement(node);
278
- const descendants = node.querySelectorAll ? node.querySelectorAll("img,source,video,audio,script,link,iframe") : [];
279
- descendants.forEach((el) => rewriteElement(el));
280
351
  }
281
352
  });
282
353
  }
@@ -0,0 +1,2 @@
1
+ import type { ScriptHacker } from "./types";
2
+ export declare const replayHistoryPath: ScriptHacker;
@@ -0,0 +1,25 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.replayHistoryPath = void 0;
4
+ exports.replayHistoryPath = {
5
+ id: "replay-history-path",
6
+ stage: "replay",
7
+ build: () => `
8
+ // Ensure history/location reflects the original captured URL path.
9
+ //
10
+ // When a snapshot is served from a static server root (e.g. http://localhost:8080/index.html),
11
+ // SPA routers that read location.pathname will see "/".
12
+ // For a page captured from https://foo.com/bar/foo, the correct route should be "/bar/foo".
13
+ //
14
+ // We patch history early and replace the current URL without causing navigation.
15
+ try {
16
+ const parsed = new URL(baseUrl);
17
+ const desiredPath = parsed.pathname + (parsed.search || "") + (parsed.hash || "");
18
+ const currentPath = window.location.pathname + window.location.search + window.location.hash;
19
+
20
+ if (desiredPath && desiredPath !== currentPath) {
21
+ history.replaceState(history.state, "", desiredPath);
22
+ }
23
+ } catch {}
24
+ `
25
+ };
package/dist/index.d.ts CHANGED
@@ -1,11 +1,13 @@
1
1
  export { PagePocket } from "./pagepocket";
2
- export type { ApiRecord, ApiSnapshot, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipOptions } from "./types";
2
+ export type { ApiRecord, ApiSnapshot, ApplyOptions, CaptureOptions, CompletionContext, CompletionStrategy, ContentRef, ContentStore, ContentStoreHandle, MatchQuery, NetworkEventStream, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, PagePocketOptions, PageSnapshot, PathResolver, ReplaceAction, ReplaceElementContext, ReplaceElementFn, ReplaceElementFnWithQuery, ReplaceElementRule, ReplaceElementsConfig, ResourceFilter, SnapshotFile, WriteFSOptions, WriteResult, ZipWriteResult, ZipResult, ZipOptions } from "./types";
3
3
  export { HybridContentStore } from "./content-store";
4
4
  export { createDefaultPathResolver, withPrefixPathResolver } from "./path-resolver";
5
5
  export { createDefaultResourceFilter } from "./resource-filter";
6
+ export * from "./builtin-blacklist";
6
7
  export { networkIdle, timeout } from "./completion";
7
8
  export { buildReplayScript } from "./replay-script";
8
9
  export { buildPreloadScript } from "./preload";
9
10
  export { rewriteEntryHtml, rewriteJsText } from "./rewrite-links";
10
11
  export { rewriteCssText } from "./css-rewrite";
11
12
  export { writeToFS, toZip } from "./writers";
13
+ export { applyReplaceElements } from "./replace-elements";
package/dist/index.js CHANGED
@@ -1,6 +1,20 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
2
16
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
17
+ exports.applyReplaceElements = exports.toZip = exports.writeToFS = exports.rewriteCssText = exports.rewriteJsText = exports.rewriteEntryHtml = exports.buildPreloadScript = exports.buildReplayScript = exports.timeout = exports.networkIdle = exports.createDefaultResourceFilter = exports.withPrefixPathResolver = exports.createDefaultPathResolver = exports.HybridContentStore = exports.PagePocket = void 0;
4
18
  var pagepocket_1 = require("./pagepocket");
5
19
  Object.defineProperty(exports, "PagePocket", { enumerable: true, get: function () { return pagepocket_1.PagePocket; } });
6
20
  var content_store_1 = require("./content-store");
@@ -10,6 +24,7 @@ Object.defineProperty(exports, "createDefaultPathResolver", { enumerable: true,
10
24
  Object.defineProperty(exports, "withPrefixPathResolver", { enumerable: true, get: function () { return path_resolver_1.withPrefixPathResolver; } });
11
25
  var resource_filter_1 = require("./resource-filter");
12
26
  Object.defineProperty(exports, "createDefaultResourceFilter", { enumerable: true, get: function () { return resource_filter_1.createDefaultResourceFilter; } });
27
+ __exportStar(require("./builtin-blacklist"), exports);
13
28
  var completion_1 = require("./completion");
14
29
  Object.defineProperty(exports, "networkIdle", { enumerable: true, get: function () { return completion_1.networkIdle; } });
15
30
  Object.defineProperty(exports, "timeout", { enumerable: true, get: function () { return completion_1.timeout; } });
@@ -25,3 +40,5 @@ Object.defineProperty(exports, "rewriteCssText", { enumerable: true, get: functi
25
40
  var writers_1 = require("./writers");
26
41
  Object.defineProperty(exports, "writeToFS", { enumerable: true, get: function () { return writers_1.writeToFS; } });
27
42
  Object.defineProperty(exports, "toZip", { enumerable: true, get: function () { return writers_1.toZip; } });
43
+ var replace_elements_1 = require("./replace-elements");
44
+ Object.defineProperty(exports, "applyReplaceElements", { enumerable: true, get: function () { return replace_elements_1.applyReplaceElements; } });
@@ -0,0 +1,19 @@
1
+ import type { NetworkEvent } from "@pagepocket/interceptor";
2
+ type InflightStats = {
3
+ inflightRequests: number;
4
+ lastNetworkTs: number;
5
+ totalRequests: number;
6
+ };
7
+ export declare class InflightTracker {
8
+ private inflight;
9
+ private inflightRequests;
10
+ private lastNetworkTs;
11
+ private totalRequests;
12
+ private shouldIgnore;
13
+ constructor(options?: {
14
+ shouldIgnore?: (event: NetworkEvent) => boolean;
15
+ });
16
+ handleEvent(event: NetworkEvent): InflightStats;
17
+ getStats(): InflightStats;
18
+ }
19
+ export {};
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.InflightTracker = void 0;
4
+ class InflightTracker {
5
+ constructor(options) {
6
+ this.inflight = new Set();
7
+ this.inflightRequests = 0;
8
+ this.lastNetworkTs = Date.now();
9
+ this.totalRequests = 0;
10
+ this.shouldIgnore = options?.shouldIgnore ?? (() => false);
11
+ }
12
+ handleEvent(event) {
13
+ if (this.shouldIgnore(event)) {
14
+ return this.getStats();
15
+ }
16
+ if (event?.timestamp) {
17
+ this.lastNetworkTs = event.timestamp;
18
+ }
19
+ else {
20
+ this.lastNetworkTs = Date.now();
21
+ }
22
+ if (event?.type === "request") {
23
+ this.totalRequests += 1;
24
+ if (!this.inflight.has(event.requestId)) {
25
+ this.inflight.add(event.requestId);
26
+ this.inflightRequests += 1;
27
+ }
28
+ }
29
+ if (event?.type === "response" || event?.type === "failed") {
30
+ if (this.inflight.delete(event.requestId)) {
31
+ this.inflightRequests = Math.max(0, this.inflightRequests - 1);
32
+ }
33
+ }
34
+ return {
35
+ inflightRequests: this.inflightRequests,
36
+ lastNetworkTs: this.lastNetworkTs,
37
+ totalRequests: this.totalRequests
38
+ };
39
+ }
40
+ getStats() {
41
+ return {
42
+ inflightRequests: this.inflightRequests,
43
+ lastNetworkTs: this.lastNetworkTs,
44
+ totalRequests: this.totalRequests
45
+ };
46
+ }
47
+ }
48
+ exports.InflightTracker = InflightTracker;
@@ -1,10 +1,12 @@
1
1
  import type { InterceptTarget } from "@pagepocket/interceptor";
2
- import type { CaptureOptions, PagePocketOptions, PageSnapshot } from "./types";
2
+ import type { CaptureOptions, NetworkEventStream, PagePocketOptions, PageSnapshot } from "./types";
3
3
  export declare class PagePocket {
4
4
  private target;
5
5
  private options;
6
+ private eventStream;
6
7
  private constructor();
7
8
  static fromURL(url: string, options?: PagePocketOptions): PagePocket;
8
9
  static fromTarget(target: InterceptTarget, options?: PagePocketOptions): PagePocket;
10
+ interceptedRequestEvents(): NetworkEventStream;
9
11
  capture(options?: CaptureOptions): Promise<PageSnapshot>;
10
12
  }
@@ -3,12 +3,111 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.PagePocket = void 0;
4
4
  const completion_1 = require("./completion");
5
5
  const content_store_1 = require("./content-store");
6
+ const debug_1 = require("./debug");
7
+ const inflight_tracker_1 = require("./inflight-tracker");
6
8
  const network_store_1 = require("./network-store");
7
9
  const path_resolver_1 = require("./path-resolver");
8
10
  const resource_filter_1 = require("./resource-filter");
9
11
  const snapshot_builder_1 = require("./snapshot-builder");
12
+ class AsyncEventQueue {
13
+ constructor() {
14
+ this.values = [];
15
+ this.pending = [];
16
+ this.closed = false;
17
+ }
18
+ push(value) {
19
+ if (this.closed) {
20
+ return;
21
+ }
22
+ if (this.pending.length > 0) {
23
+ const resolve = this.pending.shift();
24
+ resolve?.({ kind: "value", value });
25
+ return;
26
+ }
27
+ this.values.push({ kind: "value", value });
28
+ }
29
+ close() {
30
+ if (this.closed) {
31
+ return;
32
+ }
33
+ this.closed = true;
34
+ if (this.pending.length > 0) {
35
+ for (const resolve of this.pending) {
36
+ resolve({ kind: "close" });
37
+ }
38
+ this.pending = [];
39
+ return;
40
+ }
41
+ this.values.push({ kind: "close" });
42
+ }
43
+ async *iterate() {
44
+ while (true) {
45
+ if (this.values.length > 0) {
46
+ const item = this.values.shift();
47
+ if (!item || item.kind === "close") {
48
+ return;
49
+ }
50
+ yield item.value;
51
+ continue;
52
+ }
53
+ if (this.closed) {
54
+ return;
55
+ }
56
+ const item = await new Promise((resolve) => {
57
+ this.pending.push(resolve);
58
+ });
59
+ if (item.kind === "close") {
60
+ return;
61
+ }
62
+ yield item.value;
63
+ }
64
+ }
65
+ }
66
+ class BroadcastAsyncIterable {
67
+ constructor() {
68
+ this.queues = new Set();
69
+ this.closed = false;
70
+ }
71
+ subscribe() {
72
+ const queue = new AsyncEventQueue();
73
+ if (this.closed) {
74
+ queue.close();
75
+ }
76
+ this.queues.add(queue);
77
+ return (async function* (owner) {
78
+ try {
79
+ for await (const value of queue.iterate()) {
80
+ yield value;
81
+ }
82
+ }
83
+ finally {
84
+ owner.queues.delete(queue);
85
+ queue.close();
86
+ }
87
+ })(this);
88
+ }
89
+ emit(value) {
90
+ if (this.queues.size === 0) {
91
+ return;
92
+ }
93
+ for (const queue of this.queues) {
94
+ queue.push(value);
95
+ }
96
+ }
97
+ close() {
98
+ if (this.closed) {
99
+ return;
100
+ }
101
+ this.closed = true;
102
+ for (const queue of this.queues) {
103
+ queue.close();
104
+ }
105
+ this.queues.clear();
106
+ }
107
+ }
10
108
  class PagePocket {
11
109
  constructor(target, options) {
110
+ this.eventStream = new BroadcastAsyncIterable();
12
111
  this.target = target;
13
112
  this.options = options ?? {};
14
113
  }
@@ -18,77 +117,92 @@ class PagePocket {
18
117
  static fromTarget(target, options) {
19
118
  return new PagePocket(target, options);
20
119
  }
120
+ interceptedRequestEvents() {
121
+ return this.eventStream.subscribe();
122
+ }
21
123
  async capture(options) {
22
124
  if (!options?.interceptor) {
23
125
  throw new Error("CaptureOptions.interceptor is required.");
24
126
  }
25
127
  const contentStore = options?.contentStore ?? new content_store_1.HybridContentStore();
26
- const filter = options?.filter ?? (0, resource_filter_1.createDefaultResourceFilter)();
128
+ const baseFilter = options?.filter ?? (0, resource_filter_1.createDefaultResourceFilter)();
129
+ const blacklist = options?.blacklist ?? [];
130
+ const isBlacklisted = (url) => typeof url === "string" && blacklist.some((pattern) => pattern.test(url));
131
+ const filter = blacklist.length === 0
132
+ ? baseFilter
133
+ : {
134
+ shouldSave(req, res) {
135
+ if (isBlacklisted(req.url)) {
136
+ (0, debug_1.debug_log)("[pagepocket] blacklist match (skip save)", req.url);
137
+ return false;
138
+ }
139
+ return baseFilter.shouldSave(req, res);
140
+ }
141
+ };
27
142
  const pathResolver = options?.pathResolver ?? (0, path_resolver_1.createDefaultPathResolver)();
28
143
  const rewriteEntry = options?.rewriteEntry ?? true;
29
144
  const rewriteCSS = options?.rewriteCSS ?? true;
145
+ const replaceElements = options?.replaceElements;
30
146
  const limits = options?.limits;
31
147
  const completionStrategies = (0, completion_1.normalizeCompletion)(options?.completion);
32
- const completion = completionStrategies.length > 0 ? completionStrategies : [(0, completion_1.networkIdle)(1000), (0, completion_1.timeout)(5000)];
148
+ const idleMs = options?.timeoutMs ?? 5000;
149
+ const maxDurationMs = options?.maxDurationMs;
150
+ const completion = completionStrategies.length > 0
151
+ ? completionStrategies
152
+ : [(0, completion_1.networkIdle)(idleMs), ...(maxDurationMs !== undefined ? [(0, completion_1.timeout)(maxDurationMs)] : [])];
33
153
  const store = new network_store_1.NetworkStore({
34
154
  contentStore,
35
155
  filter,
36
156
  limits
37
157
  });
38
- const inflight = new Set();
39
- let inflightRequests = 0;
40
- let lastNetworkTs = Date.now();
41
- let totalRequests = 0;
42
- const pendingEvents = new Set();
43
- const onEvent = (event) => {
44
- if (event?.timestamp) {
45
- lastNetworkTs = event.timestamp;
46
- }
47
- else {
48
- lastNetworkTs = Date.now();
49
- }
50
- if (event?.type === "request") {
51
- totalRequests += 1;
52
- if (!inflight.has(event.requestId)) {
53
- inflight.add(event.requestId);
54
- inflightRequests += 1;
55
- }
56
- }
57
- if (event?.type === "response" || event?.type === "failed") {
58
- if (inflight.delete(event.requestId)) {
59
- inflightRequests = Math.max(0, inflightRequests - 1);
158
+ const inflightTracker = new inflight_tracker_1.InflightTracker({
159
+ shouldIgnore: (event) => {
160
+ if (!isBlacklisted(event.url)) {
161
+ return false;
60
162
  }
163
+ (0, debug_1.debug_log)("[pagepocket] blacklist match (ignore inflight)", event.url);
164
+ return true;
61
165
  }
166
+ });
167
+ const getStats = () => {
168
+ const stats = inflightTracker.getStats();
169
+ (0, debug_1.debug_log)("[pagepocket] stats", stats);
170
+ return stats;
171
+ };
172
+ const pendingEvents = new Set();
173
+ const onEvent = (event) => {
174
+ this.eventStream.emit(event);
175
+ const stats = inflightTracker.handleEvent(event);
176
+ (0, debug_1.debug_log)("[pagepocket] inflight", stats.inflightRequests, "event", event.type, event.requestId, event.url);
62
177
  const task = store.handleEvent(event);
63
178
  pendingEvents.add(task);
64
179
  task.finally(() => pendingEvents.delete(task));
65
180
  };
66
- const session = await options.interceptor.start(this.target, { onEvent });
181
+ (0, debug_1.debug_log)("[pagepocket] session start");
182
+ const session = await options.interceptor.start(this.target, { onEvent }, { timeoutMs: options?.timeoutMs, maxDurationMs: options?.maxDurationMs });
183
+ (0, debug_1.debug_log)("[pagepocket] session started");
67
184
  if (this.target.kind === "url" && session?.navigate) {
185
+ (0, debug_1.debug_log)("[pagepocket] navigate", this.target.url);
68
186
  await session.navigate(this.target.url);
187
+ (0, debug_1.debug_log)("[pagepocket] navigated");
69
188
  }
70
189
  if (completion.length === 1) {
71
190
  await completion[0].wait({
72
191
  now: () => Date.now(),
73
- getStats: () => ({
74
- inflightRequests,
75
- lastNetworkTs,
76
- totalRequests
77
- })
192
+ getStats
78
193
  });
79
194
  }
80
195
  else if (completion.length > 1) {
81
196
  await Promise.race(completion.map((strategy) => strategy.wait({
82
197
  now: () => Date.now(),
83
- getStats: () => ({
84
- inflightRequests,
85
- lastNetworkTs,
86
- totalRequests
87
- })
198
+ getStats
88
199
  })));
89
200
  }
201
+ (0, debug_1.debug_log)("[pagepocket] session stop");
90
202
  await session.stop();
203
+ (0, debug_1.debug_log)("[pagepocket] session stopped");
91
204
  await Promise.all(pendingEvents);
205
+ this.eventStream.close();
92
206
  const entryUrl = this.target.kind === "url" ? this.target.url : "";
93
207
  return (0, snapshot_builder_1.buildSnapshot)({
94
208
  entryUrl,
@@ -99,6 +213,7 @@ class PagePocket {
99
213
  pathResolver,
100
214
  rewriteEntry,
101
215
  rewriteCSS,
216
+ replaceElements,
102
217
  warnings: store.getWarnings()
103
218
  });
104
219
  }