@pagepocket/lib 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,18 @@
1
+ export const addMulti = (map, key, value) => {
2
+ const existing = map.get(key);
3
+ if (!existing) {
4
+ map.set(key, value);
5
+ return;
6
+ }
7
+ if (Array.isArray(existing)) {
8
+ existing.push(value);
9
+ return;
10
+ }
11
+ map.set(key, [existing, value]);
12
+ };
13
+ export const toArray = (value) => {
14
+ if (!value) {
15
+ return [];
16
+ }
17
+ return Array.isArray(value) ? value : [value];
18
+ };
@@ -0,0 +1,3 @@
1
+ export declare const makePathnameVariants: (pathname: string) => string[];
2
+ export declare const getBasename: (pathname: string) => string;
3
+ export declare const stripHashFromUrlString: (url: string) => string;
@@ -0,0 +1,54 @@
1
+ import { stripHash, stripTrailingSlash } from "@pagepocket/shared";
2
+ const isLikelyHostname = (value) => {
3
+ if (!value)
4
+ return false;
5
+ if (value === "localhost")
6
+ return true;
7
+ return value.includes(".");
8
+ };
9
+ const encodeEmbeddedUrlTailIfPresent = (pathname) => {
10
+ const raw = String(pathname || "");
11
+ if (!raw.includes("/http")) {
12
+ return null;
13
+ }
14
+ const parts = raw.split("/");
15
+ for (let i = 0; i < parts.length; i += 1) {
16
+ const scheme = parts[i];
17
+ if (scheme !== "http:" && scheme !== "https:") {
18
+ continue;
19
+ }
20
+ const hasDoubleSlash = parts[i + 1] === "";
21
+ const host = parts[i + 2] || "";
22
+ if (!hasDoubleSlash || !isLikelyHostname(host)) {
23
+ continue;
24
+ }
25
+ const embedded = scheme + "//" + parts.slice(i + 2).join("/");
26
+ const encoded = encodeURIComponent(embedded);
27
+ const nextParts = parts.slice(0, i).concat(encoded);
28
+ const rebuilt = nextParts.join("/") || "/";
29
+ return rebuilt.startsWith("/") ? rebuilt : "/" + rebuilt;
30
+ }
31
+ return null;
32
+ };
33
+ export const makePathnameVariants = (pathname) => {
34
+ const variants = new Set();
35
+ const push = (value) => {
36
+ if (!value)
37
+ return;
38
+ variants.add(value);
39
+ };
40
+ push(pathname);
41
+ push(stripTrailingSlash(pathname));
42
+ const encodedTail = encodeEmbeddedUrlTailIfPresent(pathname);
43
+ if (encodedTail && encodedTail !== pathname) {
44
+ push(encodedTail);
45
+ push(stripTrailingSlash(encodedTail));
46
+ }
47
+ return Array.from(variants);
48
+ };
49
+ export const getBasename = (pathname) => {
50
+ const clean = pathname.split("?")[0] || "";
51
+ const parts = clean.split("/").filter(Boolean);
52
+ return parts[parts.length - 1] || "";
53
+ };
54
+ export const stripHashFromUrlString = (url) => stripHash(url);
@@ -1,4 +1,5 @@
1
- import type { ResourceType } from "./types";
1
+ import { type MultiMap } from "./resource-proxy/multimap.js";
2
+ import type { ResourceType } from "./types.js";
2
3
  export type ResourcesPathSnapshotItem = {
3
4
  url: string;
4
5
  path: string;
@@ -16,10 +17,11 @@ type IndexedItem = ResourcesPathSnapshotItem & {
16
17
  pathname: string;
17
18
  pathnameWithSearch: string;
18
19
  basename: string;
20
+ canonicalUrl: string;
19
21
  };
20
- type MultiMap<K, V> = Map<K, V | V[]>;
21
22
  export type ResourceProxyIndex = {
22
23
  byExactUrl: Map<string, IndexedItem>;
24
+ byCanonicalUrl: Map<string, IndexedItem>;
23
25
  byPathnameWithSearch: MultiMap<string, IndexedItem>;
24
26
  byPathname: MultiMap<string, IndexedItem>;
25
27
  byBasename: MultiMap<string, IndexedItem>;
@@ -1,113 +1,15 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.resolveToLocalPath = exports.buildResourceProxyIndex = void 0;
4
- const addMulti = (map, key, value) => {
5
- const existing = map.get(key);
6
- if (!existing) {
7
- map.set(key, value);
8
- return;
9
- }
10
- if (Array.isArray(existing)) {
11
- existing.push(value);
12
- return;
13
- }
14
- map.set(key, [existing, value]);
15
- };
16
- const toArray = (value) => {
17
- if (!value) {
18
- return [];
19
- }
20
- return Array.isArray(value) ? value : [value];
21
- };
22
- const stripHash = (value) => {
23
- const index = value.indexOf("#");
24
- return index === -1 ? value : value.slice(0, index);
25
- };
26
- const stripTrailingSlash = (value) => {
27
- if (!value || value === "/") {
28
- return value;
29
- }
30
- return value.endsWith("/") ? value.slice(0, -1) : value;
31
- };
32
- const looksAlreadyEscapedForStaticServers = (value) => {
33
- // Heuristic: if the path contains "%25XX" patterns, it was likely already
34
- // escaped once ("%" -> "%25") to survive static-server decoding.
35
- //
36
- // This is intentionally conservative; double-escaping breaks lookups.
37
- return /%25[0-9a-fA-F]{2}/.test(value);
38
- };
39
- const escapePercentForStaticServersOnce = (value) => {
40
- if (!value) {
41
- return value;
42
- }
43
- if (looksAlreadyEscapedForStaticServers(value)) {
44
- return value;
45
- }
46
- return value.split("%").join("%25");
47
- };
48
- const isLikelyHostname = (value) => {
49
- // Keep this loose; we only use it as a guard for embedded-URL detection.
50
- if (!value)
51
- return false;
52
- if (value === "localhost")
53
- return true;
54
- return value.includes(".");
55
- };
56
- const encodeEmbeddedUrlTailIfPresent = (pathname) => {
57
- // Some CDNs embed a full absolute URL into a single path segment using
58
- // encodeURIComponent (e.g. ".../https%3A%2F%2Fexample.com%2Fa.png").
59
- //
60
- // Other runtimes may request the *decoded* form in-path (e.g.
61
- // ".../https://example.com/a.png"), which changes path segments.
62
- //
63
- // To be resilient, detect an embedded absolute URL tail (http(s)://...) and
64
- // produce an alternate pathname with that tail collapsed into one encoded
65
- // segment.
66
- const raw = String(pathname || "");
67
- if (!raw.includes("/http")) {
68
- return null;
69
- }
70
- const parts = raw.split("/");
71
- for (let i = 0; i < parts.length; i += 1) {
72
- const scheme = parts[i];
73
- if (scheme !== "http:" && scheme !== "https:") {
74
- continue;
75
- }
76
- // A real absolute URL in-path is typically split like:
77
- // ["...", "https:", "", "example.com", "a", "b.png"]
78
- const hasDoubleSlash = parts[i + 1] === "";
79
- const host = parts[i + 2] || "";
80
- if (!hasDoubleSlash || !isLikelyHostname(host)) {
81
- continue;
82
- }
83
- const embedded = scheme + "//" + parts.slice(i + 2).join("/");
84
- const encoded = encodeURIComponent(embedded);
85
- const nextParts = parts.slice(0, i).concat(encoded);
86
- const rebuilt = nextParts.join("/") || "/";
87
- return rebuilt.startsWith("/") ? rebuilt : "/" + rebuilt;
88
- }
89
- return null;
90
- };
91
- const makePathnameVariants = (pathname) => {
92
- const variants = new Set();
93
- const push = (value) => {
94
- if (!value)
95
- return;
96
- variants.add(value);
97
- };
98
- push(pathname);
99
- push(stripTrailingSlash(pathname));
100
- const encodedTail = encodeEmbeddedUrlTailIfPresent(pathname);
101
- if (encodedTail && encodedTail !== pathname) {
102
- push(encodedTail);
103
- push(stripTrailingSlash(encodedTail));
104
- }
105
- return Array.from(variants);
106
- };
107
- const getBasename = (pathname) => {
108
- const clean = pathname.split("?")[0] || "";
109
- const parts = clean.split("/").filter(Boolean);
110
- return parts[parts.length - 1] || "";
1
+ import { stripHash } from "@pagepocket/shared";
2
+ import { escapePercentForStaticServersOnce } from "./resource-proxy/escape-percent.js";
3
+ import { addMulti, toArray } from "./resource-proxy/multimap.js";
4
+ import { getBasename, makePathnameVariants } from "./resource-proxy/pathname-variants.js";
5
+ import { urlEquivalent } from "./utils.js";
6
+ const canonicalizeHttpUrlForIndex = (url) => {
7
+ // Keep the host and full path/search stable, but ignore http/https scheme.
8
+ // This is used only as a secondary exact-match key.
9
+ if (url.protocol === "http:" || url.protocol === "https:") {
10
+ return `//${url.host}${url.pathname}${url.search}`;
11
+ }
12
+ return url.toString();
111
13
  };
112
14
  const toUrlOrNull = (value) => {
113
15
  try {
@@ -117,8 +19,9 @@ const toUrlOrNull = (value) => {
117
19
  return null;
118
20
  }
119
21
  };
120
- const buildResourceProxyIndex = (snapshot) => {
22
+ export const buildResourceProxyIndex = (snapshot) => {
121
23
  const byExactUrl = new Map();
24
+ const byCanonicalUrl = new Map();
122
25
  const byPathnameWithSearch = new Map();
123
26
  const byPathname = new Map();
124
27
  const byBasename = new Map();
@@ -138,11 +41,18 @@ const buildResourceProxyIndex = (snapshot) => {
138
41
  parsed,
139
42
  pathname,
140
43
  pathnameWithSearch,
141
- basename
44
+ basename,
45
+ canonicalUrl: canonicalizeHttpUrlForIndex(parsed)
142
46
  };
143
47
  // Prefer first-seen item for exact URL.
144
- if (!byExactUrl.has(parsed.toString())) {
145
- byExactUrl.set(parsed.toString(), indexed);
48
+ const exactKey = parsed.toString();
49
+ if (!byExactUrl.has(exactKey)) {
50
+ byExactUrl.set(exactKey, indexed);
51
+ }
52
+ // Secondary exact key: ignore http/https protocol differences.
53
+ const canonicalKey = indexed.canonicalUrl;
54
+ if (!byCanonicalUrl.has(canonicalKey)) {
55
+ byCanonicalUrl.set(canonicalKey, indexed);
146
56
  }
147
57
  addMulti(byPathnameWithSearch, pathnameWithSearch, indexed);
148
58
  addMulti(byPathname, pathname, indexed);
@@ -152,12 +62,12 @@ const buildResourceProxyIndex = (snapshot) => {
152
62
  }
153
63
  return {
154
64
  byExactUrl,
65
+ byCanonicalUrl,
155
66
  byPathnameWithSearch,
156
67
  byPathname,
157
68
  byBasename
158
69
  };
159
70
  };
160
- exports.buildResourceProxyIndex = buildResourceProxyIndex;
161
71
  const uniqByPath = (items) => {
162
72
  const seen = new Set();
163
73
  const out = [];
@@ -216,7 +126,7 @@ const makeSuffixes = (pathname) => {
216
126
  }
217
127
  return out;
218
128
  };
219
- const resolveToLocalPath = (options) => {
129
+ export const resolveToLocalPath = (options) => {
220
130
  const { requestUrl, baseUrl, index } = options;
221
131
  if (!requestUrl) {
222
132
  return undefined;
@@ -236,12 +146,31 @@ const resolveToLocalPath = (options) => {
236
146
  if (exact) {
237
147
  return escapePercentForStaticServersOnce(exact.path);
238
148
  }
149
+ // Scheme-insensitive exact match for http/https.
150
+ // This is a safe optimization and also fixes snapshots where the runtime URL
151
+ // differs only by protocol from the recorded one.
152
+ const canonicalAbs = canonicalizeHttpUrlForIndex(abs);
153
+ const canonicalExact = index.byCanonicalUrl.get(canonicalAbs);
154
+ if (canonicalExact) {
155
+ return escapePercentForStaticServersOnce(canonicalExact.path);
156
+ }
239
157
  const withoutHash = stripHash(absString);
240
158
  if (withoutHash !== absString) {
241
159
  const found = index.byExactUrl.get(withoutHash);
242
160
  if (found) {
243
161
  return escapePercentForStaticServersOnce(found.path);
244
162
  }
163
+ try {
164
+ const withoutHashUrl = new URL(withoutHash);
165
+ const canonicalWithoutHash = canonicalizeHttpUrlForIndex(withoutHashUrl);
166
+ const canonicalFound = index.byCanonicalUrl.get(canonicalWithoutHash);
167
+ if (canonicalFound) {
168
+ return escapePercentForStaticServersOnce(canonicalFound.path);
169
+ }
170
+ }
171
+ catch {
172
+ // ignore
173
+ }
245
174
  }
246
175
  const pathname = abs.pathname || "/";
247
176
  const pathnameVariants = makePathnameVariants(pathname);
@@ -258,6 +187,9 @@ const resolveToLocalPath = (options) => {
258
187
  const items = toArray(index.byPathname.get(key));
259
188
  const match = tryCandidates(items, baseUrl, 99);
260
189
  if (match) {
190
+ if (!urlEquivalent(requestUrl, match.url, { baseUrl })) {
191
+ continue;
192
+ }
261
193
  return escapePercentForStaticServersOnce(match.path);
262
194
  }
263
195
  }
@@ -281,4 +213,3 @@ const resolveToLocalPath = (options) => {
281
213
  }
282
214
  return undefined;
283
215
  };
284
- exports.resolveToLocalPath = resolveToLocalPath;
package/dist/resources.js CHANGED
@@ -1,41 +1,5 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.extractResourceUrls = exports.toAbsoluteUrl = void 0;
37
- const cheerio = __importStar(require("cheerio"));
38
- const toAbsoluteUrl = (baseUrl, resourceUrl) => {
1
+ import * as cheerio from "cheerio";
2
+ export const toAbsoluteUrl = (baseUrl, resourceUrl) => {
39
3
  try {
40
4
  return new URL(resourceUrl, baseUrl).toString();
41
5
  }
@@ -43,8 +7,7 @@ const toAbsoluteUrl = (baseUrl, resourceUrl) => {
43
7
  return resourceUrl;
44
8
  }
45
9
  };
46
- exports.toAbsoluteUrl = toAbsoluteUrl;
47
- const extractResourceUrls = (html, baseUrl) => {
10
+ export const extractResourceUrls = (html, baseUrl) => {
48
11
  const $ = cheerio.load(html);
49
12
  const urls = [];
50
13
  const collect = (selector, attr) => {
@@ -74,9 +37,8 @@ const extractResourceUrls = (html, baseUrl) => {
74
37
  return {
75
38
  attr,
76
39
  element,
77
- url: (0, exports.toAbsoluteUrl)(baseUrl, value)
40
+ url: toAbsoluteUrl(baseUrl, value)
78
41
  };
79
42
  });
80
43
  return { $, resourceUrls, srcsetItems };
81
44
  };
82
- exports.extractResourceUrls = extractResourceUrls;
@@ -0,0 +1,3 @@
1
+ type UrlResolver = (absoluteUrl: string) => string | null;
2
+ export declare const rewriteJsText: (source: string, resolve: UrlResolver, baseUrl: string) => Promise<string>;
3
+ export {};
@@ -0,0 +1,56 @@
1
+ import { shouldSkipValue } from "./skip.js";
2
+ import { resolveUrlValue } from "./url-resolve.js";
3
+ export const rewriteJsText = async (source, resolve, baseUrl) => {
4
+ const replaceSpecifier = async (specifier) => {
5
+ const trimmed = specifier.trim();
6
+ if (shouldSkipValue(trimmed)) {
7
+ return specifier;
8
+ }
9
+ const resolved = resolveUrlValue(trimmed, baseUrl, resolve);
10
+ return resolved ?? specifier;
11
+ };
12
+ const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
13
+ const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
14
+ const dynamicImportPattern = /(\bimport\s*\(\s*)(["'])([^"']+)\2(\s*\))/g;
15
+ let replaced = "";
16
+ let lastIndex = 0;
17
+ for (const match of source.matchAll(importFromPattern)) {
18
+ const index = match.index ?? 0;
19
+ replaced += source.slice(lastIndex, index);
20
+ const prefix = match[1] || "";
21
+ const quote = match[2] || "";
22
+ const specifier = match[3] || "";
23
+ const next = await replaceSpecifier(specifier);
24
+ replaced += `${prefix}${quote}${next}${quote}`;
25
+ lastIndex = index + match[0].length;
26
+ }
27
+ replaced += source.slice(lastIndex);
28
+ let final = "";
29
+ lastIndex = 0;
30
+ for (const match of replaced.matchAll(importSideEffectPattern)) {
31
+ const index = match.index ?? 0;
32
+ final += replaced.slice(lastIndex, index);
33
+ const prefix = match[1] || "";
34
+ const quote = match[2] || "";
35
+ const specifier = match[3] || "";
36
+ const next = await replaceSpecifier(specifier);
37
+ final += `${prefix}${quote}${next}${quote}`;
38
+ lastIndex = index + match[0].length;
39
+ }
40
+ final += replaced.slice(lastIndex);
41
+ let dynamicFinal = "";
42
+ lastIndex = 0;
43
+ for (const match of final.matchAll(dynamicImportPattern)) {
44
+ const index = match.index ?? 0;
45
+ dynamicFinal += final.slice(lastIndex, index);
46
+ const prefix = match[1] || "";
47
+ const quote = match[2] || "";
48
+ const specifier = match[3] || "";
49
+ const suffix = match[4] || "";
50
+ const next = await replaceSpecifier(specifier);
51
+ dynamicFinal += `${prefix}${quote}${next}${quote}${suffix}`;
52
+ lastIndex = index + match[0].length;
53
+ }
54
+ dynamicFinal += final.slice(lastIndex);
55
+ return dynamicFinal;
56
+ };
@@ -0,0 +1,2 @@
1
+ import * as cheerio from "cheerio";
2
+ export declare const shouldRewriteLinkHref: ($element: cheerio.Cheerio<any>) => boolean;
@@ -0,0 +1,10 @@
1
+ export const shouldRewriteLinkHref = ($element) => {
2
+ const rel = ($element.attr("rel") || "").trim().toLowerCase();
3
+ if (!rel) {
4
+ return true;
5
+ }
6
+ return (rel.includes("stylesheet") ||
7
+ rel.includes("preload") ||
8
+ rel.includes("prefetch") ||
9
+ rel.includes("icon"));
10
+ };
@@ -0,0 +1,3 @@
1
+ type UrlResolver = (absoluteUrl: string) => string | null;
2
+ export declare const rewriteMetaRefresh: (content: string, baseUrl: string, resolve: UrlResolver) => string;
3
+ export {};
@@ -0,0 +1,22 @@
1
+ import { resolveUrlValue } from "./url-resolve.js";
2
+ export const rewriteMetaRefresh = (content, baseUrl, resolve) => {
3
+ const parts = content.split(";");
4
+ if (parts.length < 2)
5
+ return content;
6
+ const urlPartIndex = parts.findIndex((part) => part.trim().toLowerCase().startsWith("url="));
7
+ if (urlPartIndex === -1)
8
+ return content;
9
+ const urlPart = parts[urlPartIndex];
10
+ let rawUrl = urlPart.split("=").slice(1).join("=").trim();
11
+ if ((rawUrl.startsWith('"') && rawUrl.endsWith('"')) ||
12
+ (rawUrl.startsWith("'") && rawUrl.endsWith("'"))) {
13
+ rawUrl = rawUrl.slice(1, -1).trim();
14
+ }
15
+ const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
16
+ if (!resolved)
17
+ return content;
18
+ const next = `url=${resolved}`;
19
+ const nextParts = parts.slice();
20
+ nextParts[urlPartIndex] = next;
21
+ return nextParts.join(";");
22
+ };
@@ -0,0 +1 @@
1
+ export declare const shouldSkipValue: (value: string) => boolean;
@@ -0,0 +1,10 @@
1
+ export const shouldSkipValue = (value) => {
2
+ const trimmed = value.trim();
3
+ return (!trimmed ||
4
+ trimmed.startsWith("data:") ||
5
+ trimmed.startsWith("blob:") ||
6
+ trimmed.startsWith("mailto:") ||
7
+ trimmed.startsWith("tel:") ||
8
+ trimmed.startsWith("javascript:") ||
9
+ trimmed.startsWith("#"));
10
+ };
@@ -0,0 +1,3 @@
1
+ type UrlResolver = (absoluteUrl: string) => string | null;
2
+ export declare const rewriteSrcsetValue: (value: string, baseUrl: string, resolve: UrlResolver) => string;
3
+ export {};
@@ -0,0 +1,63 @@
1
+ import { resolveUrlValue } from "./url-resolve.js";
2
+ const isUnsafeSrcsetValue = (value) => {
3
+ const trimmed = value.trim();
4
+ if (!trimmed) {
5
+ return false;
6
+ }
7
+ const hasFetchTransform = trimmed.includes("/image/fetch/");
8
+ const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
9
+ const hasCommaTokens = trimmed.includes(",w_") ||
10
+ trimmed.includes(", w_") ||
11
+ trimmed.includes(",h_") ||
12
+ trimmed.includes(", h_") ||
13
+ trimmed.includes(",c_") ||
14
+ trimmed.includes(", c_");
15
+ return hasFetchTransform && hasEncodedUrlTail && hasCommaTokens;
16
+ };
17
+ const isDescriptorToken = (token) => {
18
+ const trimmed = token.trim();
19
+ if (!trimmed)
20
+ return false;
21
+ return /^\d+(\.\d+)?x$/i.test(trimmed) || /^\d+w$/i.test(trimmed);
22
+ };
23
+ const parseSrcset = (input) => {
24
+ const rawCandidates = input
25
+ .split(",")
26
+ .map((c) => c.trim())
27
+ .filter(Boolean);
28
+ return rawCandidates.map((candidate) => {
29
+ const tokens = candidate.split(/\s+/).filter(Boolean);
30
+ if (tokens.length === 0) {
31
+ return { url: candidate };
32
+ }
33
+ const last = tokens[tokens.length - 1] ?? "";
34
+ if (tokens.length >= 2 && isDescriptorToken(last)) {
35
+ const descriptor = last;
36
+ const url = candidate.slice(0, candidate.lastIndexOf(descriptor)).trim();
37
+ return { url, descriptor };
38
+ }
39
+ return { url: candidate };
40
+ });
41
+ };
42
+ const stringifySrcset = (candidates) => {
43
+ return candidates
44
+ .map((c) => {
45
+ const url = c.url.trim();
46
+ if (!c.descriptor)
47
+ return url;
48
+ return `${url} ${c.descriptor.trim()}`;
49
+ })
50
+ .filter(Boolean)
51
+ .join(",");
52
+ };
53
+ export const rewriteSrcsetValue = (value, baseUrl, resolve) => {
54
+ if (isUnsafeSrcsetValue(value)) {
55
+ return "";
56
+ }
57
+ const candidates = parseSrcset(value);
58
+ const rewritten = candidates.map((c) => {
59
+ const resolved = resolveUrlValue(c.url, baseUrl, resolve);
60
+ return { url: resolved ?? c.url, descriptor: c.descriptor };
61
+ });
62
+ return stringifySrcset(rewritten);
63
+ };
@@ -0,0 +1,3 @@
1
+ type UrlResolver = (absoluteUrl: string) => string | null;
2
+ export declare const resolveUrlValue: (value: string, baseUrl: string, resolve: UrlResolver) => string | null;
3
+ export {};
@@ -0,0 +1,13 @@
1
+ import { shouldSkipValue } from "./skip.js";
2
+ export const resolveUrlValue = (value, baseUrl, resolve) => {
3
+ if (shouldSkipValue(value)) {
4
+ return null;
5
+ }
6
+ try {
7
+ const absolute = new URL(value, baseUrl).toString();
8
+ return resolve(absolute);
9
+ }
10
+ catch {
11
+ return null;
12
+ }
13
+ };
@@ -1,6 +1,7 @@
1
- import type { ReplaceElementsConfig } from "./types";
1
+ import { rewriteJsText } from "./rewrite-links/js-imports.js";
2
+ import type { ReplaceElementsConfig } from "./types.js";
2
3
  type UrlResolver = (absoluteUrl: string) => string | null;
3
- export declare const rewriteJsText: (source: string, resolve: UrlResolver, baseUrl: string) => Promise<string>;
4
+ export { rewriteJsText };
4
5
  export declare const rewriteEntryHtml: (input: {
5
6
  html: string;
6
7
  entryUrl: string;
@@ -18,4 +19,3 @@ export declare const rewriteEntryHtml: (input: {
18
19
  html: string;
19
20
  title?: string;
20
21
  }>;
21
- export {};