@pagepocket/lib 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/dist/build-snapshot-from-bundle.d.ts +23 -0
- package/dist/build-snapshot-from-bundle.js +68 -0
- package/dist/builtin-blacklist.js +3 -6
- package/dist/bundle/from-network-store.d.ts +10 -0
- package/dist/bundle/from-network-store.js +26 -0
- package/dist/bundle/types.d.ts +32 -0
- package/dist/bundle/types.js +2 -0
- package/dist/capture/index.d.ts +14 -0
- package/dist/capture/index.js +86 -0
- package/dist/capture/memory-content-store.d.ts +4 -0
- package/dist/capture/memory-content-store.js +42 -0
- package/dist/capture/types.d.ts +61 -0
- package/dist/capture/types.js +2 -0
- package/dist/content-store.js +3 -8
- package/dist/content-type.d.ts +1 -1
- package/dist/content-type.js +2 -28
- package/dist/core/_impl/completion.d.ts +4 -0
- package/dist/core/_impl/completion.js +29 -0
- package/dist/core/_impl/content-store.d.ts +21 -0
- package/dist/core/_impl/content-store.js +91 -0
- package/dist/core/_impl/debug.d.ts +1 -0
- package/dist/core/_impl/debug.js +16 -0
- package/dist/core/_impl/inflight-tracker.d.ts +19 -0
- package/dist/core/_impl/inflight-tracker.js +48 -0
- package/dist/core/_impl/pagepocket.d.ts +27 -0
- package/dist/core/_impl/pagepocket.js +155 -0
- package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
- package/dist/core/capture/_impl/memory-content-store.js +42 -0
- package/dist/core/capture/_impl/types.d.ts +61 -0
- package/dist/core/capture/_impl/types.js +2 -0
- package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
- package/dist/core/capture/internal/memory-content-store.js +42 -0
- package/dist/core/capture/internal/types.d.ts +61 -0
- package/dist/core/capture/internal/types.js +2 -0
- package/dist/core/capture/memory-content-store.d.ts +4 -0
- package/dist/core/capture/memory-content-store.js +38 -0
- package/dist/core/capture/types.d.ts +61 -0
- package/dist/core/capture/types.js +1 -0
- package/dist/core/completion.d.ts +4 -0
- package/dist/core/completion.js +23 -0
- package/dist/core/content-store.d.ts +21 -0
- package/dist/core/content-store.js +54 -0
- package/dist/core/debug.d.ts +1 -0
- package/dist/core/debug.js +12 -0
- package/dist/core/file-tree-merge.d.ts +2 -0
- package/dist/core/file-tree-merge.js +27 -0
- package/dist/core/file-tree.d.ts +36 -0
- package/dist/core/file-tree.js +1 -0
- package/dist/core/inflight-tracker.d.ts +19 -0
- package/dist/core/inflight-tracker.js +44 -0
- package/dist/core/internal/completion.d.ts +4 -0
- package/dist/core/internal/completion.js +29 -0
- package/dist/core/internal/content-store.d.ts +21 -0
- package/dist/core/internal/content-store.js +91 -0
- package/dist/core/internal/debug.d.ts +1 -0
- package/dist/core/internal/debug.js +16 -0
- package/dist/core/internal/inflight-tracker.d.ts +19 -0
- package/dist/core/internal/inflight-tracker.js +48 -0
- package/dist/core/internal/pagepocket.d.ts +27 -0
- package/dist/core/internal/pagepocket.js +155 -0
- package/dist/core/pagepocket.d.ts +38 -0
- package/dist/core/pagepocket.js +57 -0
- package/dist/core/plugin/_impl/context.d.ts +47 -0
- package/dist/core/plugin/_impl/context.js +142 -0
- package/dist/core/plugin/_impl/runner.d.ts +12 -0
- package/dist/core/plugin/_impl/runner.js +232 -0
- package/dist/core/plugin/_impl/types.d.ts +108 -0
- package/dist/core/plugin/_impl/types.js +2 -0
- package/dist/core/plugin/context.d.ts +47 -0
- package/dist/core/plugin/context.js +205 -0
- package/dist/core/plugin/internal/context.d.ts +47 -0
- package/dist/core/plugin/internal/context.js +142 -0
- package/dist/core/plugin/internal/runner.d.ts +12 -0
- package/dist/core/plugin/internal/runner.js +232 -0
- package/dist/core/plugin/internal/types.d.ts +108 -0
- package/dist/core/plugin/internal/types.js +2 -0
- package/dist/core/plugin/runner-utils.d.ts +9 -0
- package/dist/core/plugin/runner-utils.js +29 -0
- package/dist/core/plugin/runner.d.ts +12 -0
- package/dist/core/plugin/runner.js +118 -0
- package/dist/core/plugin/types.d.ts +117 -0
- package/dist/core/plugin/types.js +1 -0
- package/dist/core/runtime/types.d.ts +14 -0
- package/dist/core/runtime/types.js +2 -0
- package/dist/css-rewrite.js +1 -5
- package/dist/debug.d.ts +0 -1
- package/dist/debug.js +3 -5
- package/dist/files/types.d.ts +41 -0
- package/dist/files/types.js +2 -0
- package/dist/hack-html.js +20 -13
- package/dist/hackers/index.d.ts +1 -1
- package/dist/hackers/index.js +24 -27
- package/dist/hackers/preload-fetch.d.ts +1 -1
- package/dist/hackers/preload-fetch.js +1 -4
- package/dist/hackers/preload-xhr.d.ts +1 -1
- package/dist/hackers/preload-xhr.js +1 -4
- package/dist/hackers/replay-beacon.d.ts +1 -1
- package/dist/hackers/replay-beacon.js +1 -4
- package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
- package/dist/hackers/replay-block-text-fragment.js +1 -4
- package/dist/hackers/replay-css-proxy.d.ts +1 -1
- package/dist/hackers/replay-css-proxy.js +9 -12
- package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
- package/dist/hackers/replay-dom-rewrite.js +165 -154
- package/dist/hackers/replay-eventsource.d.ts +1 -1
- package/dist/hackers/replay-eventsource.js +1 -4
- package/dist/hackers/replay-fetch.d.ts +1 -1
- package/dist/hackers/replay-fetch.js +1 -4
- package/dist/hackers/replay-history-path.d.ts +1 -1
- package/dist/hackers/replay-history-path.js +1 -4
- package/dist/hackers/replay-svg-image.d.ts +1 -1
- package/dist/hackers/replay-svg-image.js +1 -4
- package/dist/hackers/replay-websocket.d.ts +1 -1
- package/dist/hackers/replay-websocket.js +1 -4
- package/dist/hackers/replay-xhr.d.ts +1 -1
- package/dist/hackers/replay-xhr.js +1 -4
- package/dist/hackers/types.js +1 -2
- package/dist/index.d.ts +29 -13
- package/dist/index.js +23 -44
- package/dist/kind-map.d.ts +68 -0
- package/dist/kind-map.js +58 -0
- package/dist/network-store.js +12 -1
- package/dist/pagepocket.d.ts +19 -4
- package/dist/pagepocket.js +36 -102
- package/dist/path-resolver.d.ts +1 -2
- package/dist/path-resolver.js +9 -16
- package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
- package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
- package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
- package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
- package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
- package/dist/plugin/builtins/to-directory-plugin.js +20 -0
- package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
- package/dist/plugin/builtins/to-zip-plugin.js +19 -0
- package/dist/plugin/context.d.ts +47 -0
- package/dist/plugin/context.js +142 -0
- package/dist/plugin/runner.d.ts +12 -0
- package/dist/plugin/runner.js +232 -0
- package/dist/plugin/types.d.ts +108 -0
- package/dist/plugin/types.js +2 -0
- package/dist/plugins/build-files-from-capture.d.ts +5 -0
- package/dist/plugins/build-files-from-capture.js +85 -0
- package/dist/plugins/build-warc.d.ts +5 -0
- package/dist/plugins/build-warc.js +225 -0
- package/dist/plugins/builtins/manifest.d.ts +2 -0
- package/dist/plugins/builtins/manifest.js +42 -0
- package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
- package/dist/plugins/builtins/snapshot-directory.js +24 -0
- package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
- package/dist/plugins/builtins/snapshot-zip.js +25 -0
- package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
- package/dist/plugins/capture-http-lighterceptor.js +85 -0
- package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
- package/dist/plugins/capture-http-puppeteer.js +85 -0
- package/dist/plugins/host.d.ts +37 -0
- package/dist/plugins/host.js +105 -0
- package/dist/plugins/index.d.ts +6 -0
- package/dist/plugins/index.js +11 -0
- package/dist/plugins/ordering.d.ts +2 -0
- package/dist/plugins/ordering.js +19 -0
- package/dist/plugins/types.d.ts +51 -0
- package/dist/plugins/types.js +2 -0
- package/dist/preload.js +3 -7
- package/dist/replace-elements/actions.d.ts +5 -0
- package/dist/replace-elements/actions.js +86 -0
- package/dist/replace-elements/match.d.ts +5 -0
- package/dist/replace-elements/match.js +46 -0
- package/dist/replace-elements/normalize.d.ts +21 -0
- package/dist/replace-elements/normalize.js +50 -0
- package/dist/replace-elements.d.ts +1 -1
- package/dist/replace-elements.js +5 -185
- package/dist/replay/match-api.d.ts +10 -0
- package/dist/replay/match-api.js +162 -0
- package/dist/replay/templates/match-api-source.d.ts +1 -0
- package/dist/replay/templates/match-api-source.js +137 -0
- package/dist/replay/templates/replay-script-template.d.ts +5 -0
- package/dist/replay/templates/replay-script-template.js +337 -0
- package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
- package/dist/replay/templates/resource-proxy-script.js +274 -0
- package/dist/replay-script.d.ts +3 -10
- package/dist/replay-script.js +11 -625
- package/dist/resource-filter.d.ts +1 -1
- package/dist/resource-filter.js +1 -5
- package/dist/resource-proxy/escape-percent.d.ts +1 -0
- package/dist/resource-proxy/escape-percent.js +12 -0
- package/dist/resource-proxy/multimap.d.ts +3 -0
- package/dist/resource-proxy/multimap.js +18 -0
- package/dist/resource-proxy/pathname-variants.d.ts +3 -0
- package/dist/resource-proxy/pathname-variants.js +54 -0
- package/dist/resource-proxy.d.ts +4 -2
- package/dist/resource-proxy.js +48 -117
- package/dist/resources.js +4 -42
- package/dist/rewrite-links/js-imports.d.ts +3 -0
- package/dist/rewrite-links/js-imports.js +56 -0
- package/dist/rewrite-links/link-rel.d.ts +2 -0
- package/dist/rewrite-links/link-rel.js +10 -0
- package/dist/rewrite-links/meta-refresh.d.ts +3 -0
- package/dist/rewrite-links/meta-refresh.js +22 -0
- package/dist/rewrite-links/skip.d.ts +1 -0
- package/dist/rewrite-links/skip.js +10 -0
- package/dist/rewrite-links/srcset.d.ts +3 -0
- package/dist/rewrite-links/srcset.js +63 -0
- package/dist/rewrite-links/url-resolve.d.ts +3 -0
- package/dist/rewrite-links/url-resolve.js +13 -0
- package/dist/rewrite-links.d.ts +3 -3
- package/dist/rewrite-links.js +31 -240
- package/dist/snapshot-builder/api.d.ts +3 -0
- package/dist/snapshot-builder/api.js +6 -0
- package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
- package/dist/snapshot-builder/build-snapshot.js +138 -0
- package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
- package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
- package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
- package/dist/snapshot-builder/capture-index/index.js +1 -0
- package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
- package/dist/snapshot-builder/capture-index/types.js +1 -0
- package/dist/snapshot-builder/capture-index.d.ts +12 -0
- package/dist/snapshot-builder/capture-index.js +173 -0
- package/dist/snapshot-builder/emit-document.d.ts +24 -0
- package/dist/snapshot-builder/emit-document.js +50 -0
- package/dist/snapshot-builder/grouping.d.ts +8 -0
- package/dist/snapshot-builder/grouping.js +87 -0
- package/dist/snapshot-builder/http.d.ts +6 -0
- package/dist/snapshot-builder/http.js +28 -0
- package/dist/snapshot-builder/index.d.ts +4 -0
- package/dist/snapshot-builder/index.js +2 -0
- package/dist/snapshot-builder/path-map.d.ts +3 -0
- package/dist/snapshot-builder/path-map.js +35 -0
- package/dist/snapshot-builder/resources-path.d.ts +23 -0
- package/dist/snapshot-builder/resources-path.js +47 -0
- package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
- package/dist/snapshot-builder/rewrite-resource.js +52 -0
- package/dist/snapshot-builder/types.d.ts +37 -0
- package/dist/snapshot-builder/types.js +2 -0
- package/dist/snapshot-builder.d.ts +12 -8
- package/dist/snapshot-builder.js +252 -27
- package/dist/types.d.ts +122 -78
- package/dist/types.js +4 -2
- package/dist/units/contracts-bridge.d.ts +76 -0
- package/dist/units/contracts-bridge.js +6 -0
- package/dist/units/index.d.ts +4 -0
- package/dist/units/index.js +2 -0
- package/dist/units/runner.d.ts +11 -0
- package/dist/units/runner.js +270 -0
- package/dist/units/types.d.ts +39 -0
- package/dist/units/types.js +1 -0
- package/dist/utils/streams.d.ts +2 -0
- package/dist/utils/streams.js +29 -0
- package/dist/utils.d.ts +35 -1
- package/dist/utils.js +107 -29
- package/dist/v3/contracts-bridge.d.ts +69 -0
- package/dist/v3/contracts-bridge.js +5 -0
- package/dist/v3/index.d.ts +4 -0
- package/dist/v3/index.js +2 -0
- package/dist/v3/runner.d.ts +20 -0
- package/dist/v3/runner.js +245 -0
- package/dist/v3/types.d.ts +39 -0
- package/dist/v3/types.js +1 -0
- package/dist/writers.js +3 -1
- package/package.json +11 -3
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export const addMulti = (map, key, value) => {
|
|
2
|
+
const existing = map.get(key);
|
|
3
|
+
if (!existing) {
|
|
4
|
+
map.set(key, value);
|
|
5
|
+
return;
|
|
6
|
+
}
|
|
7
|
+
if (Array.isArray(existing)) {
|
|
8
|
+
existing.push(value);
|
|
9
|
+
return;
|
|
10
|
+
}
|
|
11
|
+
map.set(key, [existing, value]);
|
|
12
|
+
};
|
|
13
|
+
export const toArray = (value) => {
|
|
14
|
+
if (!value) {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
return Array.isArray(value) ? value : [value];
|
|
18
|
+
};
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { stripHash, stripTrailingSlash } from "@pagepocket/shared";
|
|
2
|
+
const isLikelyHostname = (value) => {
|
|
3
|
+
if (!value)
|
|
4
|
+
return false;
|
|
5
|
+
if (value === "localhost")
|
|
6
|
+
return true;
|
|
7
|
+
return value.includes(".");
|
|
8
|
+
};
|
|
9
|
+
const encodeEmbeddedUrlTailIfPresent = (pathname) => {
|
|
10
|
+
const raw = String(pathname || "");
|
|
11
|
+
if (!raw.includes("/http")) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
const parts = raw.split("/");
|
|
15
|
+
for (let i = 0; i < parts.length; i += 1) {
|
|
16
|
+
const scheme = parts[i];
|
|
17
|
+
if (scheme !== "http:" && scheme !== "https:") {
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
const hasDoubleSlash = parts[i + 1] === "";
|
|
21
|
+
const host = parts[i + 2] || "";
|
|
22
|
+
if (!hasDoubleSlash || !isLikelyHostname(host)) {
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
const embedded = scheme + "//" + parts.slice(i + 2).join("/");
|
|
26
|
+
const encoded = encodeURIComponent(embedded);
|
|
27
|
+
const nextParts = parts.slice(0, i).concat(encoded);
|
|
28
|
+
const rebuilt = nextParts.join("/") || "/";
|
|
29
|
+
return rebuilt.startsWith("/") ? rebuilt : "/" + rebuilt;
|
|
30
|
+
}
|
|
31
|
+
return null;
|
|
32
|
+
};
|
|
33
|
+
export const makePathnameVariants = (pathname) => {
|
|
34
|
+
const variants = new Set();
|
|
35
|
+
const push = (value) => {
|
|
36
|
+
if (!value)
|
|
37
|
+
return;
|
|
38
|
+
variants.add(value);
|
|
39
|
+
};
|
|
40
|
+
push(pathname);
|
|
41
|
+
push(stripTrailingSlash(pathname));
|
|
42
|
+
const encodedTail = encodeEmbeddedUrlTailIfPresent(pathname);
|
|
43
|
+
if (encodedTail && encodedTail !== pathname) {
|
|
44
|
+
push(encodedTail);
|
|
45
|
+
push(stripTrailingSlash(encodedTail));
|
|
46
|
+
}
|
|
47
|
+
return Array.from(variants);
|
|
48
|
+
};
|
|
49
|
+
export const getBasename = (pathname) => {
|
|
50
|
+
const clean = pathname.split("?")[0] || "";
|
|
51
|
+
const parts = clean.split("/").filter(Boolean);
|
|
52
|
+
return parts[parts.length - 1] || "";
|
|
53
|
+
};
|
|
54
|
+
export const stripHashFromUrlString = (url) => stripHash(url);
|
package/dist/resource-proxy.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type MultiMap } from "./resource-proxy/multimap.js";
|
|
2
|
+
import type { ResourceType } from "./types.js";
|
|
2
3
|
export type ResourcesPathSnapshotItem = {
|
|
3
4
|
url: string;
|
|
4
5
|
path: string;
|
|
@@ -16,10 +17,11 @@ type IndexedItem = ResourcesPathSnapshotItem & {
|
|
|
16
17
|
pathname: string;
|
|
17
18
|
pathnameWithSearch: string;
|
|
18
19
|
basename: string;
|
|
20
|
+
canonicalUrl: string;
|
|
19
21
|
};
|
|
20
|
-
type MultiMap<K, V> = Map<K, V | V[]>;
|
|
21
22
|
export type ResourceProxyIndex = {
|
|
22
23
|
byExactUrl: Map<string, IndexedItem>;
|
|
24
|
+
byCanonicalUrl: Map<string, IndexedItem>;
|
|
23
25
|
byPathnameWithSearch: MultiMap<string, IndexedItem>;
|
|
24
26
|
byPathname: MultiMap<string, IndexedItem>;
|
|
25
27
|
byBasename: MultiMap<string, IndexedItem>;
|
package/dist/resource-proxy.js
CHANGED
|
@@ -1,113 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
14
|
-
map.set(key, [existing, value]);
|
|
15
|
-
};
|
|
16
|
-
const toArray = (value) => {
|
|
17
|
-
if (!value) {
|
|
18
|
-
return [];
|
|
19
|
-
}
|
|
20
|
-
return Array.isArray(value) ? value : [value];
|
|
21
|
-
};
|
|
22
|
-
const stripHash = (value) => {
|
|
23
|
-
const index = value.indexOf("#");
|
|
24
|
-
return index === -1 ? value : value.slice(0, index);
|
|
25
|
-
};
|
|
26
|
-
const stripTrailingSlash = (value) => {
|
|
27
|
-
if (!value || value === "/") {
|
|
28
|
-
return value;
|
|
29
|
-
}
|
|
30
|
-
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
31
|
-
};
|
|
32
|
-
const looksAlreadyEscapedForStaticServers = (value) => {
|
|
33
|
-
// Heuristic: if the path contains "%25XX" patterns, it was likely already
|
|
34
|
-
// escaped once ("%" -> "%25") to survive static-server decoding.
|
|
35
|
-
//
|
|
36
|
-
// This is intentionally conservative; double-escaping breaks lookups.
|
|
37
|
-
return /%25[0-9a-fA-F]{2}/.test(value);
|
|
38
|
-
};
|
|
39
|
-
const escapePercentForStaticServersOnce = (value) => {
|
|
40
|
-
if (!value) {
|
|
41
|
-
return value;
|
|
42
|
-
}
|
|
43
|
-
if (looksAlreadyEscapedForStaticServers(value)) {
|
|
44
|
-
return value;
|
|
45
|
-
}
|
|
46
|
-
return value.split("%").join("%25");
|
|
47
|
-
};
|
|
48
|
-
const isLikelyHostname = (value) => {
|
|
49
|
-
// Keep this loose; we only use it as a guard for embedded-URL detection.
|
|
50
|
-
if (!value)
|
|
51
|
-
return false;
|
|
52
|
-
if (value === "localhost")
|
|
53
|
-
return true;
|
|
54
|
-
return value.includes(".");
|
|
55
|
-
};
|
|
56
|
-
const encodeEmbeddedUrlTailIfPresent = (pathname) => {
|
|
57
|
-
// Some CDNs embed a full absolute URL into a single path segment using
|
|
58
|
-
// encodeURIComponent (e.g. ".../https%3A%2F%2Fexample.com%2Fa.png").
|
|
59
|
-
//
|
|
60
|
-
// Other runtimes may request the *decoded* form in-path (e.g.
|
|
61
|
-
// ".../https://example.com/a.png"), which changes path segments.
|
|
62
|
-
//
|
|
63
|
-
// To be resilient, detect an embedded absolute URL tail (http(s)://...) and
|
|
64
|
-
// produce an alternate pathname with that tail collapsed into one encoded
|
|
65
|
-
// segment.
|
|
66
|
-
const raw = String(pathname || "");
|
|
67
|
-
if (!raw.includes("/http")) {
|
|
68
|
-
return null;
|
|
69
|
-
}
|
|
70
|
-
const parts = raw.split("/");
|
|
71
|
-
for (let i = 0; i < parts.length; i += 1) {
|
|
72
|
-
const scheme = parts[i];
|
|
73
|
-
if (scheme !== "http:" && scheme !== "https:") {
|
|
74
|
-
continue;
|
|
75
|
-
}
|
|
76
|
-
// A real absolute URL in-path is typically split like:
|
|
77
|
-
// ["...", "https:", "", "example.com", "a", "b.png"]
|
|
78
|
-
const hasDoubleSlash = parts[i + 1] === "";
|
|
79
|
-
const host = parts[i + 2] || "";
|
|
80
|
-
if (!hasDoubleSlash || !isLikelyHostname(host)) {
|
|
81
|
-
continue;
|
|
82
|
-
}
|
|
83
|
-
const embedded = scheme + "//" + parts.slice(i + 2).join("/");
|
|
84
|
-
const encoded = encodeURIComponent(embedded);
|
|
85
|
-
const nextParts = parts.slice(0, i).concat(encoded);
|
|
86
|
-
const rebuilt = nextParts.join("/") || "/";
|
|
87
|
-
return rebuilt.startsWith("/") ? rebuilt : "/" + rebuilt;
|
|
88
|
-
}
|
|
89
|
-
return null;
|
|
90
|
-
};
|
|
91
|
-
const makePathnameVariants = (pathname) => {
|
|
92
|
-
const variants = new Set();
|
|
93
|
-
const push = (value) => {
|
|
94
|
-
if (!value)
|
|
95
|
-
return;
|
|
96
|
-
variants.add(value);
|
|
97
|
-
};
|
|
98
|
-
push(pathname);
|
|
99
|
-
push(stripTrailingSlash(pathname));
|
|
100
|
-
const encodedTail = encodeEmbeddedUrlTailIfPresent(pathname);
|
|
101
|
-
if (encodedTail && encodedTail !== pathname) {
|
|
102
|
-
push(encodedTail);
|
|
103
|
-
push(stripTrailingSlash(encodedTail));
|
|
104
|
-
}
|
|
105
|
-
return Array.from(variants);
|
|
106
|
-
};
|
|
107
|
-
const getBasename = (pathname) => {
|
|
108
|
-
const clean = pathname.split("?")[0] || "";
|
|
109
|
-
const parts = clean.split("/").filter(Boolean);
|
|
110
|
-
return parts[parts.length - 1] || "";
|
|
1
|
+
import { stripHash } from "@pagepocket/shared";
|
|
2
|
+
import { escapePercentForStaticServersOnce } from "./resource-proxy/escape-percent.js";
|
|
3
|
+
import { addMulti, toArray } from "./resource-proxy/multimap.js";
|
|
4
|
+
import { getBasename, makePathnameVariants } from "./resource-proxy/pathname-variants.js";
|
|
5
|
+
import { urlEquivalent } from "./utils.js";
|
|
6
|
+
const canonicalizeHttpUrlForIndex = (url) => {
|
|
7
|
+
// Keep the host and full path/search stable, but ignore http/https scheme.
|
|
8
|
+
// This is used only as a secondary exact-match key.
|
|
9
|
+
if (url.protocol === "http:" || url.protocol === "https:") {
|
|
10
|
+
return `//${url.host}${url.pathname}${url.search}`;
|
|
11
|
+
}
|
|
12
|
+
return url.toString();
|
|
111
13
|
};
|
|
112
14
|
const toUrlOrNull = (value) => {
|
|
113
15
|
try {
|
|
@@ -117,8 +19,9 @@ const toUrlOrNull = (value) => {
|
|
|
117
19
|
return null;
|
|
118
20
|
}
|
|
119
21
|
};
|
|
120
|
-
const buildResourceProxyIndex = (snapshot) => {
|
|
22
|
+
export const buildResourceProxyIndex = (snapshot) => {
|
|
121
23
|
const byExactUrl = new Map();
|
|
24
|
+
const byCanonicalUrl = new Map();
|
|
122
25
|
const byPathnameWithSearch = new Map();
|
|
123
26
|
const byPathname = new Map();
|
|
124
27
|
const byBasename = new Map();
|
|
@@ -138,11 +41,18 @@ const buildResourceProxyIndex = (snapshot) => {
|
|
|
138
41
|
parsed,
|
|
139
42
|
pathname,
|
|
140
43
|
pathnameWithSearch,
|
|
141
|
-
basename
|
|
44
|
+
basename,
|
|
45
|
+
canonicalUrl: canonicalizeHttpUrlForIndex(parsed)
|
|
142
46
|
};
|
|
143
47
|
// Prefer first-seen item for exact URL.
|
|
144
|
-
|
|
145
|
-
|
|
48
|
+
const exactKey = parsed.toString();
|
|
49
|
+
if (!byExactUrl.has(exactKey)) {
|
|
50
|
+
byExactUrl.set(exactKey, indexed);
|
|
51
|
+
}
|
|
52
|
+
// Secondary exact key: ignore http/https protocol differences.
|
|
53
|
+
const canonicalKey = indexed.canonicalUrl;
|
|
54
|
+
if (!byCanonicalUrl.has(canonicalKey)) {
|
|
55
|
+
byCanonicalUrl.set(canonicalKey, indexed);
|
|
146
56
|
}
|
|
147
57
|
addMulti(byPathnameWithSearch, pathnameWithSearch, indexed);
|
|
148
58
|
addMulti(byPathname, pathname, indexed);
|
|
@@ -152,12 +62,12 @@ const buildResourceProxyIndex = (snapshot) => {
|
|
|
152
62
|
}
|
|
153
63
|
return {
|
|
154
64
|
byExactUrl,
|
|
65
|
+
byCanonicalUrl,
|
|
155
66
|
byPathnameWithSearch,
|
|
156
67
|
byPathname,
|
|
157
68
|
byBasename
|
|
158
69
|
};
|
|
159
70
|
};
|
|
160
|
-
exports.buildResourceProxyIndex = buildResourceProxyIndex;
|
|
161
71
|
const uniqByPath = (items) => {
|
|
162
72
|
const seen = new Set();
|
|
163
73
|
const out = [];
|
|
@@ -216,7 +126,7 @@ const makeSuffixes = (pathname) => {
|
|
|
216
126
|
}
|
|
217
127
|
return out;
|
|
218
128
|
};
|
|
219
|
-
const resolveToLocalPath = (options) => {
|
|
129
|
+
export const resolveToLocalPath = (options) => {
|
|
220
130
|
const { requestUrl, baseUrl, index } = options;
|
|
221
131
|
if (!requestUrl) {
|
|
222
132
|
return undefined;
|
|
@@ -236,12 +146,31 @@ const resolveToLocalPath = (options) => {
|
|
|
236
146
|
if (exact) {
|
|
237
147
|
return escapePercentForStaticServersOnce(exact.path);
|
|
238
148
|
}
|
|
149
|
+
// Scheme-insensitive exact match for http/https.
|
|
150
|
+
// This is a safe optimization and also fixes snapshots where the runtime URL
|
|
151
|
+
// differs only by protocol from the recorded one.
|
|
152
|
+
const canonicalAbs = canonicalizeHttpUrlForIndex(abs);
|
|
153
|
+
const canonicalExact = index.byCanonicalUrl.get(canonicalAbs);
|
|
154
|
+
if (canonicalExact) {
|
|
155
|
+
return escapePercentForStaticServersOnce(canonicalExact.path);
|
|
156
|
+
}
|
|
239
157
|
const withoutHash = stripHash(absString);
|
|
240
158
|
if (withoutHash !== absString) {
|
|
241
159
|
const found = index.byExactUrl.get(withoutHash);
|
|
242
160
|
if (found) {
|
|
243
161
|
return escapePercentForStaticServersOnce(found.path);
|
|
244
162
|
}
|
|
163
|
+
try {
|
|
164
|
+
const withoutHashUrl = new URL(withoutHash);
|
|
165
|
+
const canonicalWithoutHash = canonicalizeHttpUrlForIndex(withoutHashUrl);
|
|
166
|
+
const canonicalFound = index.byCanonicalUrl.get(canonicalWithoutHash);
|
|
167
|
+
if (canonicalFound) {
|
|
168
|
+
return escapePercentForStaticServersOnce(canonicalFound.path);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
// ignore
|
|
173
|
+
}
|
|
245
174
|
}
|
|
246
175
|
const pathname = abs.pathname || "/";
|
|
247
176
|
const pathnameVariants = makePathnameVariants(pathname);
|
|
@@ -258,6 +187,9 @@ const resolveToLocalPath = (options) => {
|
|
|
258
187
|
const items = toArray(index.byPathname.get(key));
|
|
259
188
|
const match = tryCandidates(items, baseUrl, 99);
|
|
260
189
|
if (match) {
|
|
190
|
+
if (!urlEquivalent(requestUrl, match.url, { baseUrl })) {
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
261
193
|
return escapePercentForStaticServersOnce(match.path);
|
|
262
194
|
}
|
|
263
195
|
}
|
|
@@ -281,4 +213,3 @@ const resolveToLocalPath = (options) => {
|
|
|
281
213
|
}
|
|
282
214
|
return undefined;
|
|
283
215
|
};
|
|
284
|
-
exports.resolveToLocalPath = resolveToLocalPath;
|
package/dist/resources.js
CHANGED
|
@@ -1,41 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.extractResourceUrls = exports.toAbsoluteUrl = void 0;
|
|
37
|
-
const cheerio = __importStar(require("cheerio"));
|
|
38
|
-
const toAbsoluteUrl = (baseUrl, resourceUrl) => {
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
export const toAbsoluteUrl = (baseUrl, resourceUrl) => {
|
|
39
3
|
try {
|
|
40
4
|
return new URL(resourceUrl, baseUrl).toString();
|
|
41
5
|
}
|
|
@@ -43,8 +7,7 @@ const toAbsoluteUrl = (baseUrl, resourceUrl) => {
|
|
|
43
7
|
return resourceUrl;
|
|
44
8
|
}
|
|
45
9
|
};
|
|
46
|
-
|
|
47
|
-
const extractResourceUrls = (html, baseUrl) => {
|
|
10
|
+
export const extractResourceUrls = (html, baseUrl) => {
|
|
48
11
|
const $ = cheerio.load(html);
|
|
49
12
|
const urls = [];
|
|
50
13
|
const collect = (selector, attr) => {
|
|
@@ -74,9 +37,8 @@ const extractResourceUrls = (html, baseUrl) => {
|
|
|
74
37
|
return {
|
|
75
38
|
attr,
|
|
76
39
|
element,
|
|
77
|
-
url:
|
|
40
|
+
url: toAbsoluteUrl(baseUrl, value)
|
|
78
41
|
};
|
|
79
42
|
});
|
|
80
43
|
return { $, resourceUrls, srcsetItems };
|
|
81
44
|
};
|
|
82
|
-
exports.extractResourceUrls = extractResourceUrls;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { shouldSkipValue } from "./skip.js";
|
|
2
|
+
import { resolveUrlValue } from "./url-resolve.js";
|
|
3
|
+
export const rewriteJsText = async (source, resolve, baseUrl) => {
|
|
4
|
+
const replaceSpecifier = async (specifier) => {
|
|
5
|
+
const trimmed = specifier.trim();
|
|
6
|
+
if (shouldSkipValue(trimmed)) {
|
|
7
|
+
return specifier;
|
|
8
|
+
}
|
|
9
|
+
const resolved = resolveUrlValue(trimmed, baseUrl, resolve);
|
|
10
|
+
return resolved ?? specifier;
|
|
11
|
+
};
|
|
12
|
+
const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
|
|
13
|
+
const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
|
|
14
|
+
const dynamicImportPattern = /(\bimport\s*\(\s*)(["'])([^"']+)\2(\s*\))/g;
|
|
15
|
+
let replaced = "";
|
|
16
|
+
let lastIndex = 0;
|
|
17
|
+
for (const match of source.matchAll(importFromPattern)) {
|
|
18
|
+
const index = match.index ?? 0;
|
|
19
|
+
replaced += source.slice(lastIndex, index);
|
|
20
|
+
const prefix = match[1] || "";
|
|
21
|
+
const quote = match[2] || "";
|
|
22
|
+
const specifier = match[3] || "";
|
|
23
|
+
const next = await replaceSpecifier(specifier);
|
|
24
|
+
replaced += `${prefix}${quote}${next}${quote}`;
|
|
25
|
+
lastIndex = index + match[0].length;
|
|
26
|
+
}
|
|
27
|
+
replaced += source.slice(lastIndex);
|
|
28
|
+
let final = "";
|
|
29
|
+
lastIndex = 0;
|
|
30
|
+
for (const match of replaced.matchAll(importSideEffectPattern)) {
|
|
31
|
+
const index = match.index ?? 0;
|
|
32
|
+
final += replaced.slice(lastIndex, index);
|
|
33
|
+
const prefix = match[1] || "";
|
|
34
|
+
const quote = match[2] || "";
|
|
35
|
+
const specifier = match[3] || "";
|
|
36
|
+
const next = await replaceSpecifier(specifier);
|
|
37
|
+
final += `${prefix}${quote}${next}${quote}`;
|
|
38
|
+
lastIndex = index + match[0].length;
|
|
39
|
+
}
|
|
40
|
+
final += replaced.slice(lastIndex);
|
|
41
|
+
let dynamicFinal = "";
|
|
42
|
+
lastIndex = 0;
|
|
43
|
+
for (const match of final.matchAll(dynamicImportPattern)) {
|
|
44
|
+
const index = match.index ?? 0;
|
|
45
|
+
dynamicFinal += final.slice(lastIndex, index);
|
|
46
|
+
const prefix = match[1] || "";
|
|
47
|
+
const quote = match[2] || "";
|
|
48
|
+
const specifier = match[3] || "";
|
|
49
|
+
const suffix = match[4] || "";
|
|
50
|
+
const next = await replaceSpecifier(specifier);
|
|
51
|
+
dynamicFinal += `${prefix}${quote}${next}${quote}${suffix}`;
|
|
52
|
+
lastIndex = index + match[0].length;
|
|
53
|
+
}
|
|
54
|
+
dynamicFinal += final.slice(lastIndex);
|
|
55
|
+
return dynamicFinal;
|
|
56
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export const shouldRewriteLinkHref = ($element) => {
|
|
2
|
+
const rel = ($element.attr("rel") || "").trim().toLowerCase();
|
|
3
|
+
if (!rel) {
|
|
4
|
+
return true;
|
|
5
|
+
}
|
|
6
|
+
return (rel.includes("stylesheet") ||
|
|
7
|
+
rel.includes("preload") ||
|
|
8
|
+
rel.includes("prefetch") ||
|
|
9
|
+
rel.includes("icon"));
|
|
10
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { resolveUrlValue } from "./url-resolve.js";
|
|
2
|
+
export const rewriteMetaRefresh = (content, baseUrl, resolve) => {
|
|
3
|
+
const parts = content.split(";");
|
|
4
|
+
if (parts.length < 2)
|
|
5
|
+
return content;
|
|
6
|
+
const urlPartIndex = parts.findIndex((part) => part.trim().toLowerCase().startsWith("url="));
|
|
7
|
+
if (urlPartIndex === -1)
|
|
8
|
+
return content;
|
|
9
|
+
const urlPart = parts[urlPartIndex];
|
|
10
|
+
let rawUrl = urlPart.split("=").slice(1).join("=").trim();
|
|
11
|
+
if ((rawUrl.startsWith('"') && rawUrl.endsWith('"')) ||
|
|
12
|
+
(rawUrl.startsWith("'") && rawUrl.endsWith("'"))) {
|
|
13
|
+
rawUrl = rawUrl.slice(1, -1).trim();
|
|
14
|
+
}
|
|
15
|
+
const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
|
|
16
|
+
if (!resolved)
|
|
17
|
+
return content;
|
|
18
|
+
const next = `url=${resolved}`;
|
|
19
|
+
const nextParts = parts.slice();
|
|
20
|
+
nextParts[urlPartIndex] = next;
|
|
21
|
+
return nextParts.join(";");
|
|
22
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const shouldSkipValue: (value: string) => boolean;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export const shouldSkipValue = (value) => {
|
|
2
|
+
const trimmed = value.trim();
|
|
3
|
+
return (!trimmed ||
|
|
4
|
+
trimmed.startsWith("data:") ||
|
|
5
|
+
trimmed.startsWith("blob:") ||
|
|
6
|
+
trimmed.startsWith("mailto:") ||
|
|
7
|
+
trimmed.startsWith("tel:") ||
|
|
8
|
+
trimmed.startsWith("javascript:") ||
|
|
9
|
+
trimmed.startsWith("#"));
|
|
10
|
+
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { resolveUrlValue } from "./url-resolve.js";
|
|
2
|
+
const isUnsafeSrcsetValue = (value) => {
|
|
3
|
+
const trimmed = value.trim();
|
|
4
|
+
if (!trimmed) {
|
|
5
|
+
return false;
|
|
6
|
+
}
|
|
7
|
+
const hasFetchTransform = trimmed.includes("/image/fetch/");
|
|
8
|
+
const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
|
|
9
|
+
const hasCommaTokens = trimmed.includes(",w_") ||
|
|
10
|
+
trimmed.includes(", w_") ||
|
|
11
|
+
trimmed.includes(",h_") ||
|
|
12
|
+
trimmed.includes(", h_") ||
|
|
13
|
+
trimmed.includes(",c_") ||
|
|
14
|
+
trimmed.includes(", c_");
|
|
15
|
+
return hasFetchTransform && hasEncodedUrlTail && hasCommaTokens;
|
|
16
|
+
};
|
|
17
|
+
const isDescriptorToken = (token) => {
|
|
18
|
+
const trimmed = token.trim();
|
|
19
|
+
if (!trimmed)
|
|
20
|
+
return false;
|
|
21
|
+
return /^\d+(\.\d+)?x$/i.test(trimmed) || /^\d+w$/i.test(trimmed);
|
|
22
|
+
};
|
|
23
|
+
const parseSrcset = (input) => {
|
|
24
|
+
const rawCandidates = input
|
|
25
|
+
.split(",")
|
|
26
|
+
.map((c) => c.trim())
|
|
27
|
+
.filter(Boolean);
|
|
28
|
+
return rawCandidates.map((candidate) => {
|
|
29
|
+
const tokens = candidate.split(/\s+/).filter(Boolean);
|
|
30
|
+
if (tokens.length === 0) {
|
|
31
|
+
return { url: candidate };
|
|
32
|
+
}
|
|
33
|
+
const last = tokens[tokens.length - 1] ?? "";
|
|
34
|
+
if (tokens.length >= 2 && isDescriptorToken(last)) {
|
|
35
|
+
const descriptor = last;
|
|
36
|
+
const url = candidate.slice(0, candidate.lastIndexOf(descriptor)).trim();
|
|
37
|
+
return { url, descriptor };
|
|
38
|
+
}
|
|
39
|
+
return { url: candidate };
|
|
40
|
+
});
|
|
41
|
+
};
|
|
42
|
+
const stringifySrcset = (candidates) => {
|
|
43
|
+
return candidates
|
|
44
|
+
.map((c) => {
|
|
45
|
+
const url = c.url.trim();
|
|
46
|
+
if (!c.descriptor)
|
|
47
|
+
return url;
|
|
48
|
+
return `${url} ${c.descriptor.trim()}`;
|
|
49
|
+
})
|
|
50
|
+
.filter(Boolean)
|
|
51
|
+
.join(",");
|
|
52
|
+
};
|
|
53
|
+
export const rewriteSrcsetValue = (value, baseUrl, resolve) => {
|
|
54
|
+
if (isUnsafeSrcsetValue(value)) {
|
|
55
|
+
return "";
|
|
56
|
+
}
|
|
57
|
+
const candidates = parseSrcset(value);
|
|
58
|
+
const rewritten = candidates.map((c) => {
|
|
59
|
+
const resolved = resolveUrlValue(c.url, baseUrl, resolve);
|
|
60
|
+
return { url: resolved ?? c.url, descriptor: c.descriptor };
|
|
61
|
+
});
|
|
62
|
+
return stringifySrcset(rewritten);
|
|
63
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { shouldSkipValue } from "./skip.js";
|
|
2
|
+
export const resolveUrlValue = (value, baseUrl, resolve) => {
|
|
3
|
+
if (shouldSkipValue(value)) {
|
|
4
|
+
return null;
|
|
5
|
+
}
|
|
6
|
+
try {
|
|
7
|
+
const absolute = new URL(value, baseUrl).toString();
|
|
8
|
+
return resolve(absolute);
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
};
|
package/dist/rewrite-links.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { rewriteJsText } from "./rewrite-links/js-imports.js";
|
|
2
|
+
import type { ReplaceElementsConfig } from "./types.js";
|
|
2
3
|
type UrlResolver = (absoluteUrl: string) => string | null;
|
|
3
|
-
export
|
|
4
|
+
export { rewriteJsText };
|
|
4
5
|
export declare const rewriteEntryHtml: (input: {
|
|
5
6
|
html: string;
|
|
6
7
|
entryUrl: string;
|
|
@@ -18,4 +19,3 @@ export declare const rewriteEntryHtml: (input: {
|
|
|
18
19
|
html: string;
|
|
19
20
|
title?: string;
|
|
20
21
|
}>;
|
|
21
|
-
export {};
|