@pagepocket/lib 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/hackers/index.js +4 -0
- package/dist/hackers/replay-block-text-fragment.d.ts +2 -0
- package/dist/hackers/replay-block-text-fragment.js +71 -0
- package/dist/hackers/replay-dom-rewrite.js +46 -6
- package/dist/hackers/replay-history-path.d.ts +2 -0
- package/dist/hackers/replay-history-path.js +25 -0
- package/dist/path-resolver.js +14 -4
- package/dist/replay-script.js +22 -4
- package/dist/rewrite-links.js +114 -12
- package/dist/snapshot-builder.js +24 -1
- package/package.json +3 -3
package/dist/hackers/index.js
CHANGED
|
@@ -4,14 +4,18 @@ exports.replayHackers = exports.preloadHackers = void 0;
|
|
|
4
4
|
const preload_fetch_1 = require("./preload-fetch");
|
|
5
5
|
const preload_xhr_1 = require("./preload-xhr");
|
|
6
6
|
const replay_beacon_1 = require("./replay-beacon");
|
|
7
|
+
const replay_block_text_fragment_1 = require("./replay-block-text-fragment");
|
|
7
8
|
const replay_dom_rewrite_1 = require("./replay-dom-rewrite");
|
|
8
9
|
const replay_eventsource_1 = require("./replay-eventsource");
|
|
9
10
|
const replay_fetch_1 = require("./replay-fetch");
|
|
11
|
+
const replay_history_path_1 = require("./replay-history-path");
|
|
10
12
|
const replay_svg_image_1 = require("./replay-svg-image");
|
|
11
13
|
const replay_websocket_1 = require("./replay-websocket");
|
|
12
14
|
const replay_xhr_1 = require("./replay-xhr");
|
|
13
15
|
exports.preloadHackers = [preload_fetch_1.preloadFetchRecorder, preload_xhr_1.preloadXhrRecorder];
|
|
14
16
|
exports.replayHackers = [
|
|
17
|
+
replay_block_text_fragment_1.replayBlockTextFragment,
|
|
18
|
+
replay_history_path_1.replayHistoryPath,
|
|
15
19
|
replay_fetch_1.replayFetchResponder,
|
|
16
20
|
replay_xhr_1.replayXhrResponder,
|
|
17
21
|
replay_dom_rewrite_1.replayDomRewriter,
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.replayBlockTextFragment = void 0;
|
|
4
|
+
exports.replayBlockTextFragment = {
|
|
5
|
+
id: "replay-block-text-fragment",
|
|
6
|
+
stage: "replay",
|
|
7
|
+
build: () => `
|
|
8
|
+
// Block navigations that only add a Text Fragment.
|
|
9
|
+
//
|
|
10
|
+
// Some sites (e.g. Substack) call document.location.replace("#:~:text=...")
|
|
11
|
+
// to trigger the browser's Text Fragment highlighting. On large pages this
|
|
12
|
+
// can cause severe jank/freezes while the browser scans the document.
|
|
13
|
+
//
|
|
14
|
+
// We intercept location.replace/assign. If the navigation target differs
|
|
15
|
+
// only by hash and the hash starts with "#:~:text=", we update history
|
|
16
|
+
// without triggering a navigation.
|
|
17
|
+
const shouldBlockTextFragmentNavigation = (targetUrl) => {
|
|
18
|
+
try {
|
|
19
|
+
const current = new URL(window.location.href);
|
|
20
|
+
const next = new URL(String(targetUrl), current);
|
|
21
|
+
if (!next.hash || !next.hash.startsWith("#:~:text=")) {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return (
|
|
26
|
+
current.origin === next.origin &&
|
|
27
|
+
current.pathname === next.pathname &&
|
|
28
|
+
current.search === next.search
|
|
29
|
+
);
|
|
30
|
+
} catch {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const patchLocationMethod = (name) => {
|
|
36
|
+
try {
|
|
37
|
+
const loc = window.location;
|
|
38
|
+
const original = loc && loc[name];
|
|
39
|
+
if (typeof original !== "function") {
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const patched = function(url) {
|
|
44
|
+
try {
|
|
45
|
+
if (shouldBlockTextFragmentNavigation(url)) {
|
|
46
|
+
const current = new URL(window.location.href);
|
|
47
|
+
const next = new URL(String(url), current);
|
|
48
|
+
history.replaceState(history.state, document.title, next.hash);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
} catch {}
|
|
52
|
+
|
|
53
|
+
return original.call(window.location, url);
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
patched.__pagepocketOriginal = original;
|
|
58
|
+
} catch {}
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
Object.defineProperty(loc, name, { configurable: true, value: patched });
|
|
62
|
+
} catch {
|
|
63
|
+
// Some browsers don't allow redefining Location methods.
|
|
64
|
+
}
|
|
65
|
+
} catch {}
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
patchLocationMethod("replace");
|
|
69
|
+
patchLocationMethod("assign");
|
|
70
|
+
`
|
|
71
|
+
};
|
|
@@ -31,9 +31,33 @@ exports.replayDomRewriter = {
|
|
|
31
31
|
}
|
|
32
32
|
};
|
|
33
33
|
|
|
34
|
+
const rewritten = new WeakMap();
|
|
35
|
+
|
|
34
36
|
// Rewrite srcset values to local files only (avoid data: URLs in srcset).
|
|
35
37
|
const rewriteSrcset = (value) => {
|
|
36
38
|
if (!value) return value;
|
|
39
|
+
|
|
40
|
+
// Substack-style image/fetch URLs include commas inside the URL token
|
|
41
|
+
// (",w_40,h_40,..."). This makes the srcset invalid and browsers will
|
|
42
|
+
// parse it into garbage candidate URLs. Prefer dropping srcset and relying
|
|
43
|
+
// on the rewritten img[src].
|
|
44
|
+
try {
|
|
45
|
+
const trimmed = String(value || "").trim();
|
|
46
|
+
const hasFetchTransform = trimmed.includes("/image/fetch/");
|
|
47
|
+
const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
|
|
48
|
+
const hasCommaTokens =
|
|
49
|
+
trimmed.includes(",w_") ||
|
|
50
|
+
trimmed.includes(", w_") ||
|
|
51
|
+
trimmed.includes(",h_") ||
|
|
52
|
+
trimmed.includes(", h_") ||
|
|
53
|
+
trimmed.includes(",c_") ||
|
|
54
|
+
trimmed.includes(", c_");
|
|
55
|
+
|
|
56
|
+
if (hasFetchTransform && hasEncodedUrlTail && hasCommaTokens) {
|
|
57
|
+
return "";
|
|
58
|
+
}
|
|
59
|
+
} catch {}
|
|
60
|
+
|
|
37
61
|
return value.split(",").map((part) => {
|
|
38
62
|
const trimmed = part.trim();
|
|
39
63
|
if (!trimmed) return trimmed;
|
|
@@ -46,7 +70,7 @@ exports.replayDomRewriter = {
|
|
|
46
70
|
return descriptor ? localPath + " " + descriptor : localPath;
|
|
47
71
|
}
|
|
48
72
|
return trimmed;
|
|
49
|
-
}).join(",
|
|
73
|
+
}).join(",");
|
|
50
74
|
};
|
|
51
75
|
|
|
52
76
|
// Rewrite element attributes to local files or data URLs.
|
|
@@ -56,9 +80,21 @@ exports.replayDomRewriter = {
|
|
|
56
80
|
onReady(() => rewriteElement(element));
|
|
57
81
|
return;
|
|
58
82
|
}
|
|
83
|
+
const prev = rewritten.get(element);
|
|
84
|
+
const currentSrc = element.getAttribute("src");
|
|
85
|
+
const currentHref = element.getAttribute("href");
|
|
86
|
+
const currentSrcset = element.getAttribute("srcset");
|
|
87
|
+
if (
|
|
88
|
+
prev &&
|
|
89
|
+
prev.src === currentSrc &&
|
|
90
|
+
prev.href === currentHref &&
|
|
91
|
+
prev.srcset === currentSrcset
|
|
92
|
+
) {
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
59
95
|
const tag = (element.tagName || "").toLowerCase();
|
|
60
96
|
if (tag === "img" || tag === "source" || tag === "video" || tag === "audio" || tag === "script" || tag === "iframe") {
|
|
61
|
-
const src =
|
|
97
|
+
const src = currentSrc;
|
|
62
98
|
if (src && !isLocalResource(src) && !src.startsWith("data:") && !src.startsWith("blob:")) {
|
|
63
99
|
const localPath = findLocalPath(src);
|
|
64
100
|
if (localPath) {
|
|
@@ -72,7 +108,7 @@ exports.replayDomRewriter = {
|
|
|
72
108
|
}
|
|
73
109
|
|
|
74
110
|
if (tag === "link") {
|
|
75
|
-
const href =
|
|
111
|
+
const href = currentHref;
|
|
76
112
|
const rel = (element.getAttribute("rel") || "").toLowerCase();
|
|
77
113
|
if (href && !isLocalResource(href) && !href.startsWith("data:") && !href.startsWith("blob:")) {
|
|
78
114
|
const localPath = findLocalPath(href);
|
|
@@ -86,10 +122,16 @@ exports.replayDomRewriter = {
|
|
|
86
122
|
}
|
|
87
123
|
}
|
|
88
124
|
|
|
89
|
-
const srcset =
|
|
125
|
+
const srcset = currentSrcset;
|
|
90
126
|
if (srcset) {
|
|
91
127
|
element.setAttribute("srcset", rewriteSrcset(srcset));
|
|
92
128
|
}
|
|
129
|
+
|
|
130
|
+
rewritten.set(element, {
|
|
131
|
+
src: element.getAttribute("src"),
|
|
132
|
+
href: element.getAttribute("href"),
|
|
133
|
+
srcset: element.getAttribute("srcset")
|
|
134
|
+
});
|
|
93
135
|
};
|
|
94
136
|
|
|
95
137
|
// Intercept DOM attribute writes to keep resources local.
|
|
@@ -275,8 +317,6 @@ exports.replayDomRewriter = {
|
|
|
275
317
|
mutation.addedNodes.forEach((node) => {
|
|
276
318
|
if (node && node.nodeType === 1) {
|
|
277
319
|
rewriteElement(node);
|
|
278
|
-
const descendants = node.querySelectorAll ? node.querySelectorAll("img,source,video,audio,script,link,iframe") : [];
|
|
279
|
-
descendants.forEach((el) => rewriteElement(el));
|
|
280
320
|
}
|
|
281
321
|
});
|
|
282
322
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.replayHistoryPath = void 0;
|
|
4
|
+
exports.replayHistoryPath = {
|
|
5
|
+
id: "replay-history-path",
|
|
6
|
+
stage: "replay",
|
|
7
|
+
build: () => `
|
|
8
|
+
// Ensure history/location reflects the original captured URL path.
|
|
9
|
+
//
|
|
10
|
+
// When a snapshot is served from a static server root (e.g. http://localhost:8080/index.html),
|
|
11
|
+
// SPA routers that read location.pathname will see "/".
|
|
12
|
+
// For a page captured from https://foo.com/bar/foo, the correct route should be "/bar/foo".
|
|
13
|
+
//
|
|
14
|
+
// We patch history early and replace the current URL without causing navigation.
|
|
15
|
+
try {
|
|
16
|
+
const parsed = new URL(baseUrl);
|
|
17
|
+
const desiredPath = parsed.pathname + (parsed.search || "") + (parsed.hash || "");
|
|
18
|
+
const currentPath = window.location.pathname + window.location.search + window.location.hash;
|
|
19
|
+
|
|
20
|
+
if (desiredPath && desiredPath !== currentPath) {
|
|
21
|
+
history.replaceState(history.state, "", desiredPath);
|
|
22
|
+
}
|
|
23
|
+
} catch {}
|
|
24
|
+
`
|
|
25
|
+
};
|
package/dist/path-resolver.js
CHANGED
|
@@ -4,12 +4,22 @@ exports.isDocumentType = exports.withPrefixPathResolver = exports.resolveCrossOr
|
|
|
4
4
|
const utils_1 = require("./utils");
|
|
5
5
|
const normalizePathname = (pathname) => {
|
|
6
6
|
const normalized = pathname || "/";
|
|
7
|
+
const hasTrailingSlash = normalized.endsWith("/");
|
|
7
8
|
const clean = (0, utils_1.sanitizePosixPath)(normalized);
|
|
8
|
-
const
|
|
9
|
-
if (
|
|
10
|
-
|
|
9
|
+
const parts = clean.split("/").filter(Boolean);
|
|
10
|
+
if (hasTrailingSlash) {
|
|
11
|
+
parts.push("index");
|
|
11
12
|
}
|
|
12
|
-
|
|
13
|
+
if (parts.length === 0) {
|
|
14
|
+
return "/index";
|
|
15
|
+
}
|
|
16
|
+
const rewritten = parts.map((part, index) => {
|
|
17
|
+
if (index < parts.length - 1 && part.includes(".")) {
|
|
18
|
+
return `${part}__pp_dir`;
|
|
19
|
+
}
|
|
20
|
+
return part;
|
|
21
|
+
});
|
|
22
|
+
return `/${rewritten.join("/")}`;
|
|
13
23
|
};
|
|
14
24
|
const withSuffix = (path, suffix) => {
|
|
15
25
|
const lastSlash = path.lastIndexOf("/");
|
package/dist/replay-script.js
CHANGED
|
@@ -242,9 +242,15 @@ const buildReplayScript = (apiPath, baseUrl) => {
|
|
|
242
242
|
return matchAPI({ records, byKey, baseUrl, method, url, body });
|
|
243
243
|
};
|
|
244
244
|
|
|
245
|
+
const urlLookupCache = new Map();
|
|
245
246
|
const findByUrl = (url) => {
|
|
246
247
|
if (isLocalResource(url)) return null;
|
|
247
|
-
|
|
248
|
+
if (urlLookupCache.has(url)) {
|
|
249
|
+
return urlLookupCache.get(url);
|
|
250
|
+
}
|
|
251
|
+
const record = matchAPI({ records, byKey, baseUrl, method: "GET", url, body: "" });
|
|
252
|
+
urlLookupCache.set(url, record || null);
|
|
253
|
+
return record;
|
|
248
254
|
};
|
|
249
255
|
|
|
250
256
|
const findLocalPath = () => null;
|
|
@@ -292,16 +298,28 @@ const buildReplayScript = (apiPath, baseUrl) => {
|
|
|
292
298
|
return "application/octet-stream";
|
|
293
299
|
};
|
|
294
300
|
|
|
301
|
+
const dataUrlCache = new Map();
|
|
295
302
|
const toDataUrl = (record, fallbackType) => {
|
|
296
303
|
if (!record) return "";
|
|
297
304
|
const contentType = getContentType(record) || fallbackType || "application/octet-stream";
|
|
305
|
+
const cacheKey = (record.url || "") + "|" + contentType + "|" + (record.responseEncoding || "") + "|" +
|
|
306
|
+
(record.responseBodyBase64 ? "b64:" + record.responseBodyBase64.length : "txt:" + (record.responseBody ? record.responseBody.length : 0));
|
|
307
|
+
if (dataUrlCache.has(cacheKey)) {
|
|
308
|
+
return dataUrlCache.get(cacheKey);
|
|
309
|
+
}
|
|
298
310
|
if (record.responseEncoding === "base64" && record.responseBodyBase64) {
|
|
299
|
-
|
|
311
|
+
const dataUrl = "data:" + contentType + ";base64," + record.responseBodyBase64;
|
|
312
|
+
dataUrlCache.set(cacheKey, dataUrl);
|
|
313
|
+
return dataUrl;
|
|
300
314
|
}
|
|
301
315
|
if (record.responseBody) {
|
|
302
|
-
|
|
316
|
+
const dataUrl = "data:" + contentType + ";base64," + textToBase64(record.responseBody);
|
|
317
|
+
dataUrlCache.set(cacheKey, dataUrl);
|
|
318
|
+
return dataUrl;
|
|
303
319
|
}
|
|
304
|
-
|
|
320
|
+
const dataUrl = "data:" + (fallbackType || "application/octet-stream") + ",";
|
|
321
|
+
dataUrlCache.set(cacheKey, dataUrl);
|
|
322
|
+
return dataUrl;
|
|
305
323
|
};
|
|
306
324
|
|
|
307
325
|
const responseFromRecord = (record) => {
|
package/dist/rewrite-links.js
CHANGED
|
@@ -59,18 +59,83 @@ const resolveUrlValue = (value, baseUrl, resolve) => {
|
|
|
59
59
|
return null;
|
|
60
60
|
}
|
|
61
61
|
};
|
|
62
|
+
const isUnsafeSrcsetValue = (value) => {
|
|
63
|
+
const trimmed = value.trim();
|
|
64
|
+
if (!trimmed) {
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
// Some sites (notably Substack) emit image transform URLs that contain commas
|
|
68
|
+
// inside the URL itself (e.g. "/image/fetch/...,$w_40,$h_40,.../https%3A...").
|
|
69
|
+
//
|
|
70
|
+
// In the HTML srcset grammar, commas separate candidates, so unescaped commas
|
|
71
|
+
// inside a URL make the srcset invalid. Browsers will parse it into garbage
|
|
72
|
+
// URLs like "https%3A%2F%2F...png" and try to fetch them.
|
|
73
|
+
//
|
|
74
|
+
// For offline snapshots, it's better to drop srcset entirely and rely on
|
|
75
|
+
// the already-rewritten img[src].
|
|
76
|
+
const hasFetchTransform = trimmed.includes("/image/fetch/");
|
|
77
|
+
const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
|
|
78
|
+
const hasCommaTokens = trimmed.includes(",w_") ||
|
|
79
|
+
trimmed.includes(", w_") ||
|
|
80
|
+
trimmed.includes(",h_") ||
|
|
81
|
+
trimmed.includes(", h_") ||
|
|
82
|
+
trimmed.includes(",c_") ||
|
|
83
|
+
trimmed.includes(", c_");
|
|
84
|
+
return hasFetchTransform && hasEncodedUrlTail && hasCommaTokens;
|
|
85
|
+
};
|
|
86
|
+
const isDescriptorToken = (token) => {
|
|
87
|
+
const trimmed = token.trim();
|
|
88
|
+
if (!trimmed)
|
|
89
|
+
return false;
|
|
90
|
+
// Common srcset descriptors: 1x, 2x, 320w
|
|
91
|
+
return /^\d+(\.\d+)?x$/i.test(trimmed) || /^\d+w$/i.test(trimmed);
|
|
92
|
+
};
|
|
93
|
+
const parseSrcset = (input) => {
|
|
94
|
+
// Minimal srcset parser:
|
|
95
|
+
// - Candidates are separated by commas.
|
|
96
|
+
// - Each candidate is "<url> [descriptor]".
|
|
97
|
+
// - URLs may contain spaces/commas (e.g. CDN transform strings). To avoid
|
|
98
|
+
// breaking those, we locate the descriptor from the *end* of the candidate.
|
|
99
|
+
const rawCandidates = input
|
|
100
|
+
.split(",")
|
|
101
|
+
.map((c) => c.trim())
|
|
102
|
+
.filter(Boolean);
|
|
103
|
+
return rawCandidates.map((candidate) => {
|
|
104
|
+
const tokens = candidate.split(/\s+/).filter(Boolean);
|
|
105
|
+
if (tokens.length === 0) {
|
|
106
|
+
return { url: candidate };
|
|
107
|
+
}
|
|
108
|
+
const last = tokens[tokens.length - 1] ?? "";
|
|
109
|
+
if (tokens.length >= 2 && isDescriptorToken(last)) {
|
|
110
|
+
const descriptor = last;
|
|
111
|
+
const url = candidate.slice(0, candidate.lastIndexOf(descriptor)).trim();
|
|
112
|
+
return { url, descriptor };
|
|
113
|
+
}
|
|
114
|
+
return { url: candidate };
|
|
115
|
+
});
|
|
116
|
+
};
|
|
117
|
+
const stringifySrcset = (candidates) => {
|
|
118
|
+
return (candidates
|
|
119
|
+
.map((c) => {
|
|
120
|
+
const url = c.url.trim();
|
|
121
|
+
if (!c.descriptor)
|
|
122
|
+
return url;
|
|
123
|
+
return `${url} ${c.descriptor.trim()}`;
|
|
124
|
+
})
|
|
125
|
+
.filter(Boolean)
|
|
126
|
+
// Don't introduce spaces after commas inside URL tokens.
|
|
127
|
+
.join(","));
|
|
128
|
+
};
|
|
62
129
|
const rewriteSrcsetValue = (value, baseUrl, resolve) => {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const resolved = resolveUrlValue(
|
|
69
|
-
|
|
70
|
-
return part;
|
|
71
|
-
return descriptor ? `${resolved} ${descriptor}` : resolved;
|
|
130
|
+
if (isUnsafeSrcsetValue(value)) {
|
|
131
|
+
return "";
|
|
132
|
+
}
|
|
133
|
+
const candidates = parseSrcset(value);
|
|
134
|
+
const rewritten = candidates.map((c) => {
|
|
135
|
+
const resolved = resolveUrlValue(c.url, baseUrl, resolve);
|
|
136
|
+
return { url: resolved ?? c.url, descriptor: c.descriptor };
|
|
72
137
|
});
|
|
73
|
-
return rewritten
|
|
138
|
+
return stringifySrcset(rewritten);
|
|
74
139
|
};
|
|
75
140
|
const rewriteMetaRefresh = (content, baseUrl, resolve) => {
|
|
76
141
|
const parts = content.split(";");
|
|
@@ -80,7 +145,13 @@ const rewriteMetaRefresh = (content, baseUrl, resolve) => {
|
|
|
80
145
|
if (urlPartIndex === -1)
|
|
81
146
|
return content;
|
|
82
147
|
const urlPart = parts[urlPartIndex];
|
|
83
|
-
|
|
148
|
+
let rawUrl = urlPart.split("=").slice(1).join("=").trim();
|
|
149
|
+
// Some pages quote the URL value (url="/next" or url='/next').
|
|
150
|
+
// Strip a single pair of surrounding quotes to improve rewrite coverage.
|
|
151
|
+
if ((rawUrl.startsWith('"') && rawUrl.endsWith('"')) ||
|
|
152
|
+
(rawUrl.startsWith("'") && rawUrl.endsWith("'"))) {
|
|
153
|
+
rawUrl = rawUrl.slice(1, -1).trim();
|
|
154
|
+
}
|
|
84
155
|
const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
|
|
85
156
|
if (!resolved)
|
|
86
157
|
return content;
|
|
@@ -89,6 +160,18 @@ const rewriteMetaRefresh = (content, baseUrl, resolve) => {
|
|
|
89
160
|
nextParts[urlPartIndex] = next;
|
|
90
161
|
return nextParts.join(";");
|
|
91
162
|
};
|
|
163
|
+
const shouldRewriteLinkHref = ($element) => {
|
|
164
|
+
const rel = ($element.attr("rel") || "").trim().toLowerCase();
|
|
165
|
+
if (!rel) {
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
// Only rewrite link rels that are expected to load a resource.
|
|
169
|
+
// Avoid rewriting navigational/SEO links like canonical, preconnect, etc.
|
|
170
|
+
return (rel.includes("stylesheet") ||
|
|
171
|
+
rel.includes("preload") ||
|
|
172
|
+
rel.includes("prefetch") ||
|
|
173
|
+
rel.includes("icon"));
|
|
174
|
+
};
|
|
92
175
|
const rewriteJsText = async (source, resolve, baseUrl) => {
|
|
93
176
|
const replaceSpecifier = async (specifier) => {
|
|
94
177
|
const trimmed = specifier.trim();
|
|
@@ -171,7 +254,19 @@ const rewriteEntryHtml = async (input) => {
|
|
|
171
254
|
rewriteAttr("iframe[src]", "src");
|
|
172
255
|
rewriteAttr("embed[src]", "src");
|
|
173
256
|
rewriteAttr("object[data]", "data");
|
|
174
|
-
|
|
257
|
+
$("link[href]").each((_, element) => {
|
|
258
|
+
const el = $(element);
|
|
259
|
+
if (!shouldRewriteLinkHref(el)) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
const value = el.attr("href");
|
|
263
|
+
if (!value)
|
|
264
|
+
return;
|
|
265
|
+
const resolved = resolveUrlValue(value, baseUrl, resolve);
|
|
266
|
+
if (resolved) {
|
|
267
|
+
el.attr("href", resolved);
|
|
268
|
+
}
|
|
269
|
+
});
|
|
175
270
|
rewriteAttr("[poster]", "poster");
|
|
176
271
|
rewriteDataAttrs("[data-src]", "data-src");
|
|
177
272
|
rewriteDataAttrs("[data-href]", "data-href");
|
|
@@ -184,6 +279,13 @@ const rewriteEntryHtml = async (input) => {
|
|
|
184
279
|
const rewritten = rewriteSrcsetValue(value, baseUrl, resolve);
|
|
185
280
|
$(element).attr("srcset", rewritten);
|
|
186
281
|
});
|
|
282
|
+
$("link[imagesrcset]").each((_, element) => {
|
|
283
|
+
const value = $(element).attr("imagesrcset");
|
|
284
|
+
if (!value)
|
|
285
|
+
return;
|
|
286
|
+
const rewritten = rewriteSrcsetValue(value, baseUrl, resolve);
|
|
287
|
+
$(element).attr("imagesrcset", rewritten);
|
|
288
|
+
});
|
|
187
289
|
$("meta[http-equiv]").each((_, element) => {
|
|
188
290
|
const httpEquiv = ($(element).attr("http-equiv") || "").toLowerCase();
|
|
189
291
|
if (httpEquiv !== "refresh")
|
package/dist/snapshot-builder.js
CHANGED
|
@@ -6,6 +6,18 @@ const path_resolver_1 = require("./path-resolver");
|
|
|
6
6
|
const rewrite_links_1 = require("./rewrite-links");
|
|
7
7
|
const snapshot_1 = require("./snapshot");
|
|
8
8
|
const utils_1 = require("./utils");
|
|
9
|
+
const escapePercentForStaticServers = (value) => {
|
|
10
|
+
// Many static servers decode percent-encoding in the request path before
|
|
11
|
+
// resolving it to a filesystem path.
|
|
12
|
+
//
|
|
13
|
+
// Our snapshots can contain literal "%2F" sequences in filenames (e.g.
|
|
14
|
+
// Substack image URLs embedded into a path segment). When a server decodes
|
|
15
|
+
// "%2F" to "/", it changes the path structure and causes 404s.
|
|
16
|
+
//
|
|
17
|
+
// Escaping "%" to "%25" makes the request decode back to the original
|
|
18
|
+
// filename on disk.
|
|
19
|
+
return value.split("%").join("%25");
|
|
20
|
+
};
|
|
9
21
|
const streamToUint8Array = async (stream) => {
|
|
10
22
|
const reader = stream.getReader();
|
|
11
23
|
const chunks = [];
|
|
@@ -147,7 +159,18 @@ const buildSnapshot = async (input) => {
|
|
|
147
159
|
});
|
|
148
160
|
urlToPath.set(resource.request.url, path);
|
|
149
161
|
}
|
|
150
|
-
const resolve = (absoluteUrl) =>
|
|
162
|
+
const resolve = (absoluteUrl) => {
|
|
163
|
+
const resolved = urlToPath.get(absoluteUrl);
|
|
164
|
+
if (!resolved) {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
// Only escape snapshot-local paths. (Defensive: resolved should always be
|
|
168
|
+
// a path, but avoid breaking any unexpected absolute URLs.)
|
|
169
|
+
if (resolved.includes("://")) {
|
|
170
|
+
return resolved;
|
|
171
|
+
}
|
|
172
|
+
return escapePercentForStaticServers(resolved);
|
|
173
|
+
};
|
|
151
174
|
const apiPath = (0, utils_1.ensureLeadingSlash)(multiDoc ? `${(0, utils_1.sanitizePosixPath)(docDir)}/api.json` : "/api.json");
|
|
152
175
|
for (const resource of group.resources) {
|
|
153
176
|
if (resource.request.resourceType === "document") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/lib",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.3",
|
|
4
4
|
"description": "Library for rewriting HTML snapshots and inlining local resources.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
"license": "ISC",
|
|
13
13
|
"dependencies": {
|
|
14
14
|
"cheerio": "^1.0.0-rc.12",
|
|
15
|
-
"@pagepocket/interceptor": "0.6.
|
|
16
|
-
"@pagepocket/uni-fs": "0.6.
|
|
15
|
+
"@pagepocket/interceptor": "0.6.3",
|
|
16
|
+
"@pagepocket/uni-fs": "0.6.3"
|
|
17
17
|
},
|
|
18
18
|
"devDependencies": {
|
|
19
19
|
"@playwright/test": "^1.50.1",
|