@pagepocket/lib 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,148 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.buildReplayScript = void 0;
4
+ exports.matchAPI = matchAPI;
4
5
  const hackers_1 = require("./hackers");
5
- const buildReplayScript = (requestsPath, baseUrl) => {
6
+ function matchAPI(options) {
7
+ const { records, byKey, baseUrl, method, url, body } = options;
8
+ const normalizeBody = (value) => {
9
+ if (value === undefined || value === null)
10
+ return "";
11
+ if (typeof value === "string")
12
+ return value;
13
+ try {
14
+ return String(value);
15
+ }
16
+ catch {
17
+ return "";
18
+ }
19
+ };
20
+ const normalizeUrl = (input) => {
21
+ try {
22
+ return new URL(input, baseUrl).toString();
23
+ }
24
+ catch {
25
+ return input;
26
+ }
27
+ };
28
+ const stripHash = (value) => {
29
+ const index = value.indexOf("#");
30
+ return index === -1 ? value : value.slice(0, index);
31
+ };
32
+ const stripTrailingSlash = (value) => {
33
+ if (value.length > 1 && value.endsWith("/")) {
34
+ return value.slice(0, -1);
35
+ }
36
+ return value;
37
+ };
38
+ const safeUrl = (input) => {
39
+ try {
40
+ return new URL(input, baseUrl);
41
+ }
42
+ catch {
43
+ return null;
44
+ }
45
+ };
46
+ const toPathSearch = (input) => {
47
+ const parsed = safeUrl(input);
48
+ if (!parsed)
49
+ return input;
50
+ return parsed.pathname + parsed.search;
51
+ };
52
+ const toPathname = (input) => {
53
+ const parsed = safeUrl(input);
54
+ return parsed ? parsed.pathname : input;
55
+ };
56
+ const buildUrlVariants = (input) => {
57
+ const variants = new Set();
58
+ const push = (value) => {
59
+ if (!value)
60
+ return;
61
+ variants.add(value);
62
+ };
63
+ const raw = String(input ?? "");
64
+ push(raw);
65
+ push(stripHash(raw));
66
+ push(stripTrailingSlash(raw));
67
+ push(stripTrailingSlash(stripHash(raw)));
68
+ const absolute = normalizeUrl(raw);
69
+ push(absolute);
70
+ const absoluteNoHash = stripHash(absolute);
71
+ push(absoluteNoHash);
72
+ push(stripTrailingSlash(absoluteNoHash));
73
+ const pathSearch = toPathSearch(raw);
74
+ push(pathSearch);
75
+ push(stripTrailingSlash(pathSearch));
76
+ const pathname = toPathname(raw);
77
+ push(pathname);
78
+ push(stripTrailingSlash(pathname));
79
+ return Array.from(variants);
80
+ };
81
+ const makeKey = (keyMethod, keyUrl, keyBody) => keyMethod.toUpperCase() + " " + normalizeUrl(keyUrl) + " " + normalizeBody(keyBody);
82
+ const urlVariants = buildUrlVariants(url);
83
+ const bodyValue = normalizeBody(body);
84
+ const methodValue = (method || "GET").toUpperCase();
85
+ const tryLookup = (keyMethod, keyBody) => {
86
+ if (!byKey)
87
+ return undefined;
88
+ for (const urlVariant of urlVariants) {
89
+ const record = byKey.get(makeKey(keyMethod, urlVariant, keyBody));
90
+ if (record)
91
+ return record;
92
+ }
93
+ return undefined;
94
+ };
95
+ const matchOrder = [
96
+ [methodValue, bodyValue],
97
+ [methodValue, ""],
98
+ ["GET", ""],
99
+ ["GET", bodyValue]
100
+ ];
101
+ for (const [keyMethod, keyBody] of matchOrder) {
102
+ const record = tryLookup(keyMethod, keyBody);
103
+ if (record)
104
+ return record;
105
+ }
106
+ const urlMatches = (inputUrl, recordUrl) => {
107
+ const inputAbs = stripHash(normalizeUrl(inputUrl));
108
+ const recordAbs = stripHash(normalizeUrl(recordUrl));
109
+ if (inputAbs === recordAbs)
110
+ return true;
111
+ const inputPathSearch = stripTrailingSlash(toPathSearch(inputUrl));
112
+ const recordPathSearch = stripTrailingSlash(toPathSearch(recordUrl));
113
+ if (inputPathSearch === recordPathSearch)
114
+ return true;
115
+ const inputPath = stripTrailingSlash(toPathname(inputUrl));
116
+ const recordPath = stripTrailingSlash(toPathname(recordUrl));
117
+ if (inputPath === recordPath)
118
+ return true;
119
+ return false;
120
+ };
121
+ const scanRecords = (keyMethod, keyBody) => {
122
+ for (const record of records || []) {
123
+ if (!record || !record.url || !record.method)
124
+ continue;
125
+ if (record.method.toUpperCase() !== keyMethod)
126
+ continue;
127
+ if (!urlMatches(url, record.url))
128
+ continue;
129
+ const recordBody = record.requestBody || record.requestBodyBase64 || "";
130
+ if (keyBody && recordBody !== keyBody)
131
+ continue;
132
+ return record;
133
+ }
134
+ return undefined;
135
+ };
136
+ for (const [keyMethod, keyBody] of matchOrder) {
137
+ const record = scanRecords(keyMethod, keyBody);
138
+ if (record)
139
+ return record;
140
+ }
141
+ return undefined;
142
+ }
143
+ const buildReplayScript = (apiPath, baseUrl) => {
6
144
  const basePayload = JSON.stringify(baseUrl);
7
- const requestsPayload = JSON.stringify(requestsPath);
145
+ const apiPayload = JSON.stringify(apiPath);
8
146
  const context = { stage: "replay" };
9
147
  const hackerScripts = hackers_1.replayHackers
10
148
  .map((hacker) => ` // hacker:${hacker.id}\n${hacker.build(context)}`)
@@ -12,34 +150,25 @@ const buildReplayScript = (requestsPath, baseUrl) => {
12
150
  return `
13
151
  <script>
14
152
  (function(){
15
- // Load the snapshot metadata before patching runtime APIs.
16
153
  const baseUrl = ${basePayload};
17
- const requestsUrl = ${requestsPayload};
154
+ const apiUrl = ${apiPayload};
18
155
  const __pagepocketOriginalFetch = window.fetch ? window.fetch.bind(window) : null;
19
156
 
20
- const loadSnapshot = async () => {
157
+ const loadApiSnapshot = async () => {
21
158
  try {
22
159
  if (!__pagepocketOriginalFetch) {
23
160
  throw new Error("Fetch is unavailable");
24
161
  }
25
- const response = await __pagepocketOriginalFetch(requestsUrl);
162
+ const response = await __pagepocketOriginalFetch(apiUrl);
26
163
  if (!response.ok) {
27
- throw new Error("Failed to load snapshot metadata");
164
+ throw new Error("Failed to load api.json");
28
165
  }
29
166
  return await response.json();
30
167
  } catch {
31
- return {
32
- url: baseUrl,
33
- title: "",
34
- capturedAt: "",
35
- fetchXhrRecords: [],
36
- networkRecords: [],
37
- resources: []
38
- };
168
+ return { version: "1.0", url: baseUrl, createdAt: 0, records: [] };
39
169
  }
40
170
  };
41
171
 
42
- // Soften JSON parse failures to avoid halting replay flows.
43
172
  const originalResponseJson = Response && Response.prototype && Response.prototype.json;
44
173
  if (originalResponseJson) {
45
174
  Response.prototype.json = function(...args) {
@@ -51,7 +180,6 @@ const buildReplayScript = (requestsPath, baseUrl) => {
51
180
  };
52
181
  }
53
182
 
54
- // Guard to reapply patches if overwritten later.
55
183
  const ensureReplayPatches = () => {
56
184
  try {
57
185
  if (!window.fetch.__pagepocketOriginal && typeof __pagepocketOriginalFetch === "function") {
@@ -66,134 +194,39 @@ const buildReplayScript = (requestsPath, baseUrl) => {
66
194
  };
67
195
 
68
196
  let records = [];
69
- let networkRecords = [];
70
197
  const byKey = new Map();
71
198
 
72
- const localResourceSet = new Set();
73
- const resourceUrlMap = new Map();
74
-
75
199
  const normalizeUrl = (input) => {
76
200
  try { return new URL(input, baseUrl).toString(); } catch { return input; }
77
201
  };
78
202
 
79
- let baseOrigin = "";
80
- let baseDir = "";
81
- try {
82
- const parsedBase = new URL(baseUrl);
83
- baseOrigin = parsedBase.origin;
84
- baseDir = new URL(".", parsedBase).toString().replace(/\\/$/, "");
85
- } catch {}
86
-
87
-
88
- const expandUrlVariants = (value) => {
89
- const variants = [];
90
- if (typeof value === "string") {
91
- variants.push(value);
92
- variants.push(normalizeUrl(value));
93
- if (baseOrigin && value.startsWith("/")) {
94
- variants.push(baseOrigin + value);
95
- if (baseDir) variants.push(baseDir + value);
96
- } else if (baseDir) {
97
- variants.push(baseDir + (value.startsWith("/") ? value : "/" + value));
98
- }
99
- try {
100
- const parsed = new URL(value, baseUrl);
101
- const pathWithSearch = (parsed.pathname || "") + (parsed.search || "");
102
- if (baseOrigin && parsed.origin !== baseOrigin) {
103
- variants.push(baseOrigin + pathWithSearch);
104
- if (baseDir) {
105
- const path = pathWithSearch.startsWith("/") ? pathWithSearch : "/" + pathWithSearch;
106
- variants.push(baseDir + path);
107
- }
108
- }
109
- } catch {}
110
- }
111
- return Array.from(new Set(variants.filter(Boolean)));
112
- };
113
-
114
203
  const normalizeBody = (body) => {
115
204
  if (body === undefined || body === null) return "";
116
205
  if (typeof body === "string") return body;
117
206
  try { return String(body); } catch { return ""; }
118
207
  };
119
208
 
120
- // Build a stable key so requests with identical method/url/body match the same response.
121
209
  const makeKey = (method, url, body) => method.toUpperCase() + " " + normalizeUrl(url) + " " + normalizeBody(body);
122
- const makeVariantKeys = (method, url, body) => {
123
- return expandUrlVariants(url).map((variant) => makeKey(method, variant, body));
124
- };
125
- const normalizeNetworkRecord = (record) => {
126
- if (!record || typeof record !== "object") {
127
- return record;
128
- }
129
- if (record.response && record.response.body !== undefined) {
130
- const response = record.response || {};
131
- const encoding = response.bodyEncoding || "text";
132
- return {
133
- url: record.url,
134
- method: record.method || "GET",
135
- requestBody: record.requestBody || "",
136
- status: response.status,
137
- statusText: response.statusText,
138
- responseHeaders: response.headers,
139
- responseBody: encoding === "text" ? response.body : undefined,
140
- responseBodyBase64: encoding === "base64" ? response.body : undefined,
141
- responseEncoding: encoding,
142
- error: record.error,
143
- timestamp: record.timestamp
144
- };
145
- }
146
- return record;
147
- };
210
+ const makeVariantKeys = (method, url, body) => [makeKey(method, url, body)];
211
+
212
+ const matchAPI = ${matchAPI.toString()};
148
213
 
149
214
  const primeLookups = (snapshot) => {
150
- records = snapshot.fetchXhrRecords || [];
151
- networkRecords = (snapshot.networkRecords || []).map(normalizeNetworkRecord);
215
+ records = snapshot.records || [];
152
216
  byKey.clear();
153
- localResourceSet.clear();
154
- resourceUrlMap.clear();
155
-
156
217
  for (const record of records) {
157
218
  if (!record || !record.url || !record.method) continue;
158
- const keys = makeVariantKeys(record.method, record.url, record.requestBody || "");
159
- for (const key of keys) {
160
- if (!byKey.has(key)) {
161
- byKey.set(key, record);
162
- }
163
- }
164
- }
165
-
166
- for (const record of networkRecords) {
167
- if (!record || !record.url || !record.method) continue;
168
- const keys = makeVariantKeys(record.method, record.url, record.requestBody || "");
219
+ const keys = makeVariantKeys(record.method, record.url, record.requestBody || record.requestBodyBase64 || "");
169
220
  for (const key of keys) {
170
221
  if (!byKey.has(key)) {
171
222
  byKey.set(key, record);
172
223
  }
173
224
  }
174
225
  }
175
-
176
- // Track local resource files and map original URLs to local paths.
177
- const resourceList = snapshot.resources || [];
178
- for (const item of resourceList) {
179
- if (!item || !item.localPath) continue;
180
- localResourceSet.add(item.localPath);
181
- localResourceSet.add("./" + item.localPath);
182
- localResourceSet.add("/" + item.localPath);
183
-
184
- if (item.url) {
185
- const variants = expandUrlVariants(item.url);
186
- for (const variant of variants) {
187
- resourceUrlMap.set(variant, item.localPath);
188
- }
189
- }
190
- }
191
226
  };
192
227
 
193
-
194
228
  const ready = (async () => {
195
- // Deserialize the snapshot and prepare lookup tables for offline responses.
196
- const snapshot = (await loadSnapshot()) || {};
229
+ const snapshot = (await loadApiSnapshot()) || {};
197
230
  primeLookups(snapshot);
198
231
  return snapshot;
199
232
  })();
@@ -201,80 +234,34 @@ const buildReplayScript = (requestsPath, baseUrl) => {
201
234
  const isLocalResource = (value) => {
202
235
  if (!value) return false;
203
236
  if (value.startsWith("data:") || value.startsWith("blob:")) return true;
204
- return localResourceSet.has(value);
237
+ if (value.startsWith("/")) return true;
238
+ return false;
205
239
  };
206
240
 
207
- // Lookup helpers for request records and local assets.
208
241
  const findRecord = (method, url, body) => {
209
- const variants = expandUrlVariants(url);
210
- for (const variant of variants) {
211
- const key = makeKey(method, variant, body);
212
- if (byKey.has(key)) return byKey.get(key);
213
- }
214
- for (const variant of variants) {
215
- const fallbackKey = makeKey(method, variant, "");
216
- if (byKey.has(fallbackKey)) return byKey.get(fallbackKey);
217
- }
218
- for (const variant of variants) {
219
- const getKey = makeKey("GET", variant, "");
220
- if (byKey.has(getKey)) return byKey.get(getKey);
221
- }
222
- return null;
242
+ return matchAPI({ records, byKey, baseUrl, method, url, body });
223
243
  };
224
244
 
225
245
  const findByUrl = (url) => {
226
246
  if (isLocalResource(url)) return null;
227
- const variants = expandUrlVariants(url);
228
- for (const variant of variants) {
229
- const direct = byKey.get(makeKey("GET", variant, ""));
230
- if (direct) return direct;
231
- }
232
- // Attempt a looser match: ignore querystring if needed.
233
- for (const variant of variants) {
234
- try {
235
- const withoutQuery = new URL(variant).origin + new URL(variant).pathname;
236
- const direct = byKey.get(makeKey("GET", withoutQuery, ""));
237
- if (direct) return direct;
238
- } catch {}
239
- }
240
- return null;
247
+ return matchAPI({ records, byKey, baseUrl, method: "GET", url, body: "" });
241
248
  };
242
249
 
243
- const findLocalPath = (url) => {
244
- if (!url) return null;
245
- const variants = expandUrlVariants(url);
246
- for (const variant of variants) {
247
- const hit = resourceUrlMap.get(variant);
248
- if (hit) return hit;
249
- }
250
- for (const variant of variants) {
251
- try {
252
- const withoutQuery = new URL(variant).origin + new URL(variant).pathname;
253
- const hit = resourceUrlMap.get(withoutQuery);
254
- if (hit) return hit;
255
- } catch {}
256
- }
257
- // If still not found, fallback to data URLs if present.
258
- return null;
259
- };
250
+ const findLocalPath = () => null;
260
251
 
261
- // Safe property injection for emulating XHR state transitions.
262
252
  const defineProp = (obj, key, value) => {
263
253
  try {
264
254
  Object.defineProperty(obj, key, { value, configurable: true });
265
255
  } catch {}
266
256
  };
267
257
 
268
- // Base64 helpers for binary payloads.
269
258
  const decodeBase64 = (input) => {
270
259
  try {
271
260
  const binary = atob(input || "");
272
261
  const bytes = new Uint8Array(binary.length);
273
-
274
262
  Array.from(binary).forEach((char, index) => {
275
263
  bytes[index] = char.charCodeAt(0);
276
264
  });
277
-
278
265
  return bytes;
279
266
  } catch {
280
267
  return new Uint8Array();
@@ -295,7 +282,6 @@ const buildReplayScript = (requestsPath, baseUrl) => {
295
282
  }
296
283
  };
297
284
 
298
- // Resolve a content type from recorded response headers.
299
285
  const getContentType = (record) => {
300
286
  const headers = record.responseHeaders || {};
301
287
  for (const key in headers) {
@@ -306,21 +292,18 @@ const buildReplayScript = (requestsPath, baseUrl) => {
306
292
  return "application/octet-stream";
307
293
  };
308
294
 
309
- // Turn a recorded response into a data URL for inline usage.
310
295
  const toDataUrl = (record, fallbackType) => {
311
296
  if (!record) return "";
312
297
  const contentType = getContentType(record) || fallbackType || "application/octet-stream";
313
298
  if (record.responseEncoding === "base64" && record.responseBodyBase64) {
314
299
  return "data:" + contentType + ";base64," + record.responseBodyBase64;
315
300
  }
316
-
317
301
  if (record.responseBody) {
318
302
  return "data:" + contentType + ";base64," + textToBase64(record.responseBody);
319
303
  }
320
304
  return "data:" + (fallbackType || "application/octet-stream") + ",";
321
305
  };
322
306
 
323
- // Build a real Response object from the recorded payload.
324
307
  const responseFromRecord = (record) => {
325
308
  const headers = new Headers(record.responseHeaders || {});
326
309
  if (record.responseEncoding === "base64" && record.responseBodyBase64) {
@@ -0,0 +1,2 @@
1
+ import type { ResourceFilter } from "./types";
2
+ export declare const createDefaultResourceFilter: () => ResourceFilter;
@@ -0,0 +1,34 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createDefaultResourceFilter = void 0;
4
+ const DEFAULT_ALLOWED = new Set([
5
+ "document",
6
+ "stylesheet",
7
+ "script",
8
+ "image",
9
+ "font",
10
+ "media"
11
+ ]);
12
+ const isSkippableUrl = (url) => url.startsWith("data:") ||
13
+ url.startsWith("blob:") ||
14
+ url.startsWith("mailto:") ||
15
+ url.startsWith("tel:") ||
16
+ url.startsWith("javascript:");
17
+ const createDefaultResourceFilter = () => ({
18
+ shouldSave(req, res) {
19
+ if (isSkippableUrl(req.url)) {
20
+ return false;
21
+ }
22
+ if (req.resourceType && (req.resourceType === "fetch" || req.resourceType === "xhr")) {
23
+ return false;
24
+ }
25
+ if (res && res.status >= 400) {
26
+ return false;
27
+ }
28
+ if (req.resourceType) {
29
+ return DEFAULT_ALLOWED.has(req.resourceType);
30
+ }
31
+ return true;
32
+ }
33
+ });
34
+ exports.createDefaultResourceFilter = createDefaultResourceFilter;
@@ -1,15 +1,13 @@
1
- import type { CheerioAPI } from "cheerio";
2
- import type { DownloadedResource } from "./download-resources";
3
- import { type ResourceReference, type SrcsetReference } from "./resources";
4
- import type { NetworkRecord } from "./types";
5
- type RewriteLinksInput = {
6
- $: CheerioAPI;
7
- resourceUrls: ResourceReference[];
8
- srcsetItems: SrcsetReference[];
9
- baseUrl: string;
10
- assetsDirName: string;
11
- resourceMap: Map<string, DownloadedResource>;
12
- networkRecords: NetworkRecord[];
13
- };
14
- export declare const rewriteLinks: (input: RewriteLinksInput) => Promise<void>;
1
+ type UrlResolver = (absoluteUrl: string) => string | null;
2
+ export declare const rewriteJsText: (source: string, resolve: UrlResolver, baseUrl: string) => Promise<string>;
3
+ export declare const rewriteEntryHtml: (input: {
4
+ html: string;
5
+ entryUrl: string;
6
+ apiPath: string;
7
+ resolve: UrlResolver;
8
+ rewriteLinks?: boolean;
9
+ }) => Promise<{
10
+ html: string;
11
+ title?: string;
12
+ }>;
15
13
  export {};