@pagepocket/lib 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -1,238 +1,30 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.rewriteEntryHtml = exports.rewriteJsText = void 0;
37
- const cheerio = __importStar(require("cheerio"));
38
- const css_rewrite_1 = require("./css-rewrite");
39
- const hack_html_1 = require("./hack-html");
40
- const replace_elements_1 = require("./replace-elements");
41
- const shouldSkipValue = (value) => {
42
- const trimmed = value.trim();
43
- return (!trimmed ||
44
- trimmed.startsWith("data:") ||
45
- trimmed.startsWith("blob:") ||
46
- trimmed.startsWith("mailto:") ||
47
- trimmed.startsWith("tel:") ||
48
- trimmed.startsWith("javascript:") ||
49
- trimmed.startsWith("#"));
50
- };
51
- const resolveUrlValue = (value, baseUrl, resolve) => {
52
- if (shouldSkipValue(value)) {
53
- return null;
54
- }
55
- try {
56
- const absolute = new URL(value, baseUrl).toString();
57
- return resolve(absolute);
58
- }
59
- catch {
60
- return null;
61
- }
62
- };
63
- const isUnsafeSrcsetValue = (value) => {
64
- const trimmed = value.trim();
65
- if (!trimmed) {
66
- return false;
67
- }
68
- // Some sites (notably Substack) emit image transform URLs that contain commas
69
- // inside the URL itself (e.g. "/image/fetch/...,$w_40,$h_40,.../https%3A...").
70
- //
71
- // In the HTML srcset grammar, commas separate candidates, so unescaped commas
72
- // inside a URL make the srcset invalid. Browsers will parse it into garbage
73
- // URLs like "https%3A%2F%2F...png" and try to fetch them.
74
- //
75
- // For offline snapshots, it's better to drop srcset entirely and rely on
76
- // the already-rewritten img[src].
77
- const hasFetchTransform = trimmed.includes("/image/fetch/");
78
- const hasEncodedUrlTail = trimmed.includes("https%3A%2F%2F");
79
- const hasCommaTokens = trimmed.includes(",w_") ||
80
- trimmed.includes(", w_") ||
81
- trimmed.includes(",h_") ||
82
- trimmed.includes(", h_") ||
83
- trimmed.includes(",c_") ||
84
- trimmed.includes(", c_");
85
- return hasFetchTransform && hasEncodedUrlTail && hasCommaTokens;
86
- };
87
- const isDescriptorToken = (token) => {
88
- const trimmed = token.trim();
89
- if (!trimmed)
90
- return false;
91
- // Common srcset descriptors: 1x, 2x, 320w
92
- return /^\d+(\.\d+)?x$/i.test(trimmed) || /^\d+w$/i.test(trimmed);
93
- };
94
- const parseSrcset = (input) => {
95
- // Minimal srcset parser:
96
- // - Candidates are separated by commas.
97
- // - Each candidate is "<url> [descriptor]".
98
- // - URLs may contain spaces/commas (e.g. CDN transform strings). To avoid
99
- // breaking those, we locate the descriptor from the *end* of the candidate.
100
- const rawCandidates = input
101
- .split(",")
102
- .map((c) => c.trim())
103
- .filter(Boolean);
104
- return rawCandidates.map((candidate) => {
105
- const tokens = candidate.split(/\s+/).filter(Boolean);
106
- if (tokens.length === 0) {
107
- return { url: candidate };
108
- }
109
- const last = tokens[tokens.length - 1] ?? "";
110
- if (tokens.length >= 2 && isDescriptorToken(last)) {
111
- const descriptor = last;
112
- const url = candidate.slice(0, candidate.lastIndexOf(descriptor)).trim();
113
- return { url, descriptor };
114
- }
115
- return { url: candidate };
116
- });
117
- };
118
- const stringifySrcset = (candidates) => {
119
- return (candidates
120
- .map((c) => {
121
- const url = c.url.trim();
122
- if (!c.descriptor)
123
- return url;
124
- return `${url} ${c.descriptor.trim()}`;
125
- })
126
- .filter(Boolean)
127
- // Don't introduce spaces after commas inside URL tokens.
128
- .join(","));
129
- };
130
- const rewriteSrcsetValue = (value, baseUrl, resolve) => {
131
- if (isUnsafeSrcsetValue(value)) {
132
- return "";
133
- }
134
- const candidates = parseSrcset(value);
135
- const rewritten = candidates.map((c) => {
136
- const resolved = resolveUrlValue(c.url, baseUrl, resolve);
137
- return { url: resolved ?? c.url, descriptor: c.descriptor };
138
- });
139
- return stringifySrcset(rewritten);
140
- };
141
- const rewriteMetaRefresh = (content, baseUrl, resolve) => {
142
- const parts = content.split(";");
143
- if (parts.length < 2)
144
- return content;
145
- const urlPartIndex = parts.findIndex((part) => part.trim().toLowerCase().startsWith("url="));
146
- if (urlPartIndex === -1)
147
- return content;
148
- const urlPart = parts[urlPartIndex];
149
- let rawUrl = urlPart.split("=").slice(1).join("=").trim();
150
- // Some pages quote the URL value (url="/next" or url='/next').
151
- // Strip a single pair of surrounding quotes to improve rewrite coverage.
152
- if ((rawUrl.startsWith('"') && rawUrl.endsWith('"')) ||
153
- (rawUrl.startsWith("'") && rawUrl.endsWith("'"))) {
154
- rawUrl = rawUrl.slice(1, -1).trim();
155
- }
156
- const resolved = resolveUrlValue(rawUrl, baseUrl, resolve);
157
- if (!resolved)
158
- return content;
159
- const next = `url=${resolved}`;
160
- const nextParts = parts.slice();
161
- nextParts[urlPartIndex] = next;
162
- return nextParts.join(";");
163
- };
164
- const shouldRewriteLinkHref = ($element) => {
165
- const rel = ($element.attr("rel") || "").trim().toLowerCase();
166
- if (!rel) {
167
- return true;
168
- }
169
- // Only rewrite link rels that are expected to load a resource.
170
- // Avoid rewriting navigational/SEO links like canonical, preconnect, etc.
171
- return (rel.includes("stylesheet") ||
172
- rel.includes("preload") ||
173
- rel.includes("prefetch") ||
174
- rel.includes("icon"));
175
- };
176
- const rewriteJsText = async (source, resolve, baseUrl) => {
177
- const replaceSpecifier = async (specifier) => {
178
- const trimmed = specifier.trim();
179
- if (shouldSkipValue(trimmed)) {
180
- return specifier;
181
- }
182
- const resolved = resolveUrlValue(trimmed, baseUrl, resolve);
183
- return resolved ?? specifier;
184
- };
185
- const importFromPattern = /(\bimport\s+[^'"]*?\sfrom\s+)(["'])([^"']+)\2/g;
186
- const importSideEffectPattern = /(\bimport\s+)(["'])([^"']+)\2/g;
187
- const dynamicImportPattern = /(\bimport\s*\(\s*)(["'])([^"']+)\2(\s*\))/g;
188
- let replaced = "";
189
- let lastIndex = 0;
190
- for (const match of source.matchAll(importFromPattern)) {
191
- const index = match.index ?? 0;
192
- replaced += source.slice(lastIndex, index);
193
- const prefix = match[1] || "";
194
- const quote = match[2] || "";
195
- const specifier = match[3] || "";
196
- const next = await replaceSpecifier(specifier);
197
- replaced += `${prefix}${quote}${next}${quote}`;
198
- lastIndex = index + match[0].length;
199
- }
200
- replaced += source.slice(lastIndex);
201
- let final = "";
202
- lastIndex = 0;
203
- for (const match of replaced.matchAll(importSideEffectPattern)) {
204
- const index = match.index ?? 0;
205
- final += replaced.slice(lastIndex, index);
206
- const prefix = match[1] || "";
207
- const quote = match[2] || "";
208
- const specifier = match[3] || "";
209
- const next = await replaceSpecifier(specifier);
210
- final += `${prefix}${quote}${next}${quote}`;
211
- lastIndex = index + match[0].length;
212
- }
213
- final += replaced.slice(lastIndex);
214
- let dynamicFinal = "";
215
- lastIndex = 0;
216
- for (const match of final.matchAll(dynamicImportPattern)) {
217
- const index = match.index ?? 0;
218
- dynamicFinal += final.slice(lastIndex, index);
219
- const prefix = match[1] || "";
220
- const quote = match[2] || "";
221
- const specifier = match[3] || "";
222
- const suffix = match[4] || "";
223
- const next = await replaceSpecifier(specifier);
224
- dynamicFinal += `${prefix}${quote}${next}${quote}${suffix}`;
225
- lastIndex = index + match[0].length;
226
- }
227
- dynamicFinal += final.slice(lastIndex);
228
- return dynamicFinal;
229
- };
230
- exports.rewriteJsText = rewriteJsText;
231
- const rewriteEntryHtml = async (input) => {
232
- const $ = cheerio.load(input.html);
1
+ import * as cheerio from "cheerio";
2
+ import { rewriteCssText } from "./css-rewrite.js";
3
+ import { hackHtml } from "./hack-html.js";
4
+ import { applyReplaceElements } from "./replace-elements.js";
5
+ import { rewriteJsText } from "./rewrite-links/js-imports.js";
6
+ import { shouldRewriteLinkHref } from "./rewrite-links/link-rel.js";
7
+ import { rewriteMetaRefresh } from "./rewrite-links/meta-refresh.js";
8
+ import { rewriteSrcsetValue } from "./rewrite-links/srcset.js";
9
+ import { resolveUrlValue } from "./rewrite-links/url-resolve.js";
10
+ export { rewriteJsText };
11
+ export const rewriteEntryHtml = async (input) => {
233
12
  const baseUrl = input.entryUrl;
234
13
  const resolve = input.resolve;
235
14
  const shouldRewriteLinks = input.rewriteLinks !== false;
15
+ if (input.html.includes("__pagepocketPatched") ||
16
+ input.html.includes("__pagepocketOriginalFetch")) {
17
+ const $title = cheerio.load(input.html)("title").first().text() || undefined;
18
+ return { html: input.html, title: $title };
19
+ }
20
+ const hasPreloadMarker = input.html.includes("__pagepocketPatched");
21
+ const hasReplayMarker = input.html.includes("__pagepocketOriginalFetch");
22
+ const hasReplaceElements = Array.isArray(input.replaceElements) && input.replaceElements.length > 0;
23
+ if (!shouldRewriteLinks && !hasReplaceElements && hasPreloadMarker && hasReplayMarker) {
24
+ const $title = cheerio.load(input.html)("title").first().text() || undefined;
25
+ return { html: input.html, title: $title };
26
+ }
27
+ const $ = cheerio.load(input.html);
236
28
  const rewriteAttr = (selector, attr) => {
237
29
  $(selector).each((_, element) => {
238
30
  const value = $(element).attr(attr);
@@ -304,7 +96,7 @@ const rewriteEntryHtml = async (input) => {
304
96
  const cssText = $(element).html();
305
97
  if (!cssText)
306
98
  continue;
307
- const rewritten = await (0, css_rewrite_1.rewriteCssText)({
99
+ const rewritten = await rewriteCssText({
308
100
  cssText,
309
101
  cssUrl: baseUrl,
310
102
  resolveUrl: resolve
@@ -318,7 +110,7 @@ const rewriteEntryHtml = async (input) => {
318
110
  const styleText = $(element).attr("style");
319
111
  if (!styleText)
320
112
  continue;
321
- const rewritten = await (0, css_rewrite_1.rewriteCssText)({
113
+ const rewritten = await rewriteCssText({
322
114
  cssText: styleText,
323
115
  cssUrl: baseUrl,
324
116
  resolveUrl: resolve
@@ -337,18 +129,18 @@ const rewriteEntryHtml = async (input) => {
337
129
  const original = $(element).html();
338
130
  if (!original)
339
131
  continue;
340
- const rewritten = await (0, exports.rewriteJsText)(original, resolve, baseUrl);
132
+ const rewritten = await rewriteJsText(original, resolve, baseUrl);
341
133
  if (rewritten !== original) {
342
134
  $(element).html(rewritten);
343
135
  }
344
136
  }
345
137
  }
346
- (0, hack_html_1.hackHtml)({
138
+ hackHtml({
347
139
  $,
348
- baseUrl: baseUrl,
140
+ baseUrl,
349
141
  apiPath: input.apiPath
350
142
  });
351
- await (0, replace_elements_1.applyReplaceElements)({
143
+ await applyReplaceElements({
352
144
  $,
353
145
  entryUrl: input.snapshotEntryUrl ?? baseUrl,
354
146
  url: baseUrl,
@@ -356,6 +148,5 @@ const rewriteEntryHtml = async (input) => {
356
148
  isEntryDocument: input.isEntryDocument ?? true
357
149
  });
358
150
  const title = $("title").first().text() || undefined;
359
- return { html: $.html(), title };
151
+ return { html: `${$.html()}\n`, title };
360
152
  };
361
- exports.rewriteEntryHtml = rewriteEntryHtml;
@@ -0,0 +1,3 @@
1
+ import type { ApiSnapshot } from "../types.js";
2
+ import type { ApiEntry } from "./types.js";
3
+ export declare const buildApiSnapshot: (url: string, createdAt: number, entries: ApiEntry[]) => ApiSnapshot;
@@ -0,0 +1,6 @@
1
+ export const buildApiSnapshot = (url, createdAt, entries) => ({
2
+ version: "1.0",
3
+ url,
4
+ createdAt,
5
+ records: entries.map((entry) => entry.record)
6
+ });
@@ -0,0 +1,3 @@
1
+ import type { FileTree } from "../core/file-tree.js";
2
+ import type { BuildOptions } from "./types.js";
3
+ export declare const buildSnapshot: (input: BuildOptions) => Promise<FileTree>;
@@ -0,0 +1,138 @@
1
+ import { createDefaultPathResolver, resolveCrossOrigin, withPrefixPathResolver } from "../path-resolver.js";
2
+ import { ensureLeadingSlash, sanitizePosixPath } from "../utils.js";
3
+ import { buildApiSnapshot } from "./api.js";
4
+ import { indexCapture } from "./capture-index/index.js";
5
+ import { emitDocumentFile } from "./emit-document.js";
6
+ import { groupResources } from "./grouping.js";
7
+ import { responseMimeType } from "./http.js";
8
+ import { docDirFromUrl, resolveSnapshotPath } from "./path-map.js";
9
+ import { buildResourcesPathSnapshot } from "./resources-path.js";
10
+ import { maybeRewriteScript, maybeRewriteStylesheet } from "./rewrite-resource.js";
11
+ // NOTE: helpers were extracted into snapshot-builder/* modules.
12
+ export const buildSnapshot = async (input) => {
13
+ const warnings = input.warnings;
14
+ const contentStore = input.capture.contentStore;
15
+ const indexed = await indexCapture({
16
+ capture: input.capture,
17
+ filter: input.filter,
18
+ limits: input.limits,
19
+ warnings
20
+ });
21
+ const groups = groupResources({
22
+ entryUrl: input.entryUrl,
23
+ resources: indexed.resources,
24
+ apiEntries: indexed.apiEntries,
25
+ warnings
26
+ });
27
+ const multiDoc = groups.length > 1;
28
+ const files = [];
29
+ let entryPath = "";
30
+ for (const group of groups) {
31
+ const docDir = multiDoc ? docDirFromUrl(group.url) : "";
32
+ const baseResolver = input.pathResolver ?? createDefaultPathResolver();
33
+ const resolver = multiDoc ? withPrefixPathResolver(baseResolver, docDir) : baseResolver;
34
+ const urlToPath = new Map();
35
+ for (const resource of group.resources) {
36
+ const path = resolver.resolve({
37
+ url: resource.request.url,
38
+ resourceType: resource.request.resourceType,
39
+ mimeType: resource.mimeType,
40
+ suggestedFilename: undefined,
41
+ isCrossOrigin: resolveCrossOrigin(resource.request.url, group.url),
42
+ entryUrl: group.url
43
+ });
44
+ urlToPath.set(resource.request.url, path);
45
+ }
46
+ const resolve = (absoluteUrl) => resolveSnapshotPath(urlToPath, absoluteUrl);
47
+ const apiPath = ensureLeadingSlash(multiDoc ? `${sanitizePosixPath(docDir)}/api.json` : "/api.json");
48
+ for (const resource of group.resources) {
49
+ if (resource.request.resourceType === "document") {
50
+ const path = urlToPath.get(resource.request.url) ?? "/index.html";
51
+ const { file } = await emitDocumentFile({
52
+ resource,
53
+ path,
54
+ entryUrl: input.entryUrl,
55
+ groupUrl: group.url,
56
+ apiPath,
57
+ resolve,
58
+ rewriteEntry: input.rewriteEntry,
59
+ replaceElements: input.replaceElements,
60
+ contentStore,
61
+ snapshotEntryUrl: input.entryUrl
62
+ });
63
+ files.push(file);
64
+ if (resource.request.url === input.entryUrl || !entryPath) {
65
+ entryPath = path;
66
+ }
67
+ continue;
68
+ }
69
+ const afterCss = await maybeRewriteStylesheet({
70
+ resource,
71
+ resolve,
72
+ contentStore,
73
+ rewriteCSS: input.rewriteCSS
74
+ });
75
+ const afterJs = await maybeRewriteScript({
76
+ resource: { ...resource, contentRef: afterCss.contentRef, size: afterCss.size },
77
+ resolve,
78
+ contentStore
79
+ });
80
+ const path = urlToPath.get(resource.request.url) ??
81
+ resolver.resolve({
82
+ url: resource.request.url,
83
+ resourceType: resource.request.resourceType,
84
+ mimeType: resourceMimeType(resource),
85
+ suggestedFilename: undefined,
86
+ isCrossOrigin: resolveCrossOrigin(resource.request.url, group.url),
87
+ entryUrl: group.url
88
+ });
89
+ files.push({
90
+ path,
91
+ mimeType: resourceMimeType(resource),
92
+ size: afterJs.size,
93
+ source: afterJs.contentRef,
94
+ originalUrl: resource.request.url,
95
+ resourceType: resource.request.resourceType,
96
+ headers: resource.response.headers
97
+ });
98
+ }
99
+ const apiSnapshot = buildApiSnapshot(group.url, input.createdAt, group.apiEntries);
100
+ const apiBytes = new TextEncoder().encode(`${JSON.stringify(apiSnapshot, null, 2)}\n`);
101
+ const apiRef = await contentStore.put({ kind: "buffer", data: apiBytes }, { url: apiPath, mimeType: "application/json", sizeHint: apiBytes.byteLength });
102
+ files.push({
103
+ path: apiPath,
104
+ mimeType: "application/json",
105
+ size: apiBytes.byteLength,
106
+ source: apiRef,
107
+ originalUrl: apiPath
108
+ });
109
+ }
110
+ {
111
+ const resourcesPath = buildResourcesPathSnapshot(input.createdAt, files);
112
+ const bytes = new TextEncoder().encode(`${JSON.stringify(resourcesPath, null, 2)}\n`);
113
+ files.push({
114
+ path: "/resources_path.json",
115
+ mimeType: "application/json",
116
+ size: bytes.byteLength,
117
+ source: { kind: "memory", data: bytes }
118
+ });
119
+ }
120
+ return {
121
+ root: {
122
+ kind: "directory",
123
+ path: "",
124
+ entries: files.map((file) => ({
125
+ kind: "file",
126
+ path: file.path,
127
+ source: { kind: "content-ref", ref: file.source }
128
+ }))
129
+ },
130
+ content: {
131
+ open: (ref) => contentStore.open(ref),
132
+ dispose: async () => {
133
+ await contentStore.dispose?.();
134
+ }
135
+ }
136
+ };
137
+ };
138
+ const resourceMimeType = (resource) => resource.mimeType || responseMimeType(resource.response) || undefined;
@@ -0,0 +1,13 @@
1
+ import type { CaptureArtifacts } from "../../core/capture/types.js";
2
+ import type { ResourceFilter } from "../../types.js";
3
+ import type { BuildLimits } from "../types.js";
4
+ import type { ApiEntry, StoredResource } from "./types.js";
5
+ export declare const indexCapture: (input: {
6
+ capture: CaptureArtifacts;
7
+ filter: ResourceFilter;
8
+ limits?: BuildLimits;
9
+ warnings: string[];
10
+ }) => Promise<{
11
+ resources: StoredResource[];
12
+ apiEntries: ApiEntry[];
13
+ }>;
@@ -0,0 +1,168 @@
1
+ import { streamToUint8Array } from "../../utils/streams.js";
2
+ import { bodyToTextOrBase64 } from "../../utils.js";
3
+ import { headersListToRecord, parseContentLength, responseMimeType } from "../http.js";
4
+ const isApiResource = (request) => {
5
+ const type = request?.resourceType;
6
+ return type === "fetch" || type === "xhr";
7
+ };
8
+ const buildByRequestId = (events) => {
9
+ const byId = new Map();
10
+ const ensure = (requestId) => {
11
+ const existing = byId.get(requestId);
12
+ if (existing)
13
+ return existing;
14
+ const created = {};
15
+ byId.set(requestId, created);
16
+ return created;
17
+ };
18
+ for (const event of events) {
19
+ if (event.type === "http.request") {
20
+ ensure(event.requestId).request = event;
21
+ continue;
22
+ }
23
+ if (event.type === "http.response") {
24
+ ensure(event.requestId).response = event;
25
+ continue;
26
+ }
27
+ if (event.type === "http.failed") {
28
+ ensure(event.requestId).failed = event;
29
+ }
30
+ }
31
+ return byId;
32
+ };
33
+ export const indexCapture = async (input) => {
34
+ const byId = buildByRequestId(input.capture.events);
35
+ const resources = [];
36
+ const apiEntries = [];
37
+ const apiSeen = new Set();
38
+ let totalBytes = 0;
39
+ const recordApiFailure = (request, failed) => {
40
+ if (apiSeen.has(request.requestId))
41
+ return;
42
+ apiSeen.add(request.requestId);
43
+ apiEntries.push({
44
+ request,
45
+ record: {
46
+ url: request.url,
47
+ method: request.method,
48
+ requestHeaders: request.headers,
49
+ error: failed.errorText,
50
+ timestamp: failed.timestamp
51
+ }
52
+ });
53
+ };
54
+ const recordApiResponse = async (request, response, bodyRef) => {
55
+ if (apiSeen.has(request.requestId))
56
+ return;
57
+ apiSeen.add(request.requestId);
58
+ const record = {
59
+ url: request.url,
60
+ method: request.method,
61
+ requestHeaders: request.headers,
62
+ status: response.status,
63
+ statusText: response.statusText,
64
+ responseHeaders: response.headers,
65
+ timestamp: response.timestamp
66
+ };
67
+ if (bodyRef) {
68
+ const stream = await input.capture.contentStore.open(bodyRef);
69
+ const bytes = await streamToUint8Array(stream);
70
+ if (bytes.byteLength > 0) {
71
+ const mimeType = responseMimeType(response);
72
+ const decoded = bodyToTextOrBase64(bytes, mimeType);
73
+ if (decoded.encoding === "text") {
74
+ record.responseBody = decoded.text;
75
+ record.responseEncoding = "text";
76
+ }
77
+ else {
78
+ record.responseBodyBase64 = decoded.base64;
79
+ record.responseEncoding = "base64";
80
+ }
81
+ }
82
+ }
83
+ apiEntries.push({ request, record });
84
+ };
85
+ for (const record of byId.values()) {
86
+ if (!record.request || record.request.type !== "http.request") {
87
+ continue;
88
+ }
89
+ const requestEvent = record.request;
90
+ const request = {
91
+ type: "request",
92
+ requestId: requestEvent.requestId,
93
+ url: requestEvent.url,
94
+ method: requestEvent.method,
95
+ headers: headersListToRecord(requestEvent.headers),
96
+ timestamp: requestEvent.timestamp,
97
+ frameId: requestEvent.frameId,
98
+ resourceType: requestEvent.resourceType,
99
+ initiator: requestEvent.initiator
100
+ };
101
+ if (record.failed && record.failed.type === "http.failed") {
102
+ const failedEvent = record.failed;
103
+ const failed = {
104
+ type: "failed",
105
+ requestId: failedEvent.requestId,
106
+ url: failedEvent.url,
107
+ errorText: failedEvent.errorText,
108
+ timestamp: failedEvent.timestamp
109
+ };
110
+ if (isApiResource(request)) {
111
+ recordApiFailure(request, failed);
112
+ }
113
+ continue;
114
+ }
115
+ if (!record.response || record.response.type !== "http.response") {
116
+ continue;
117
+ }
118
+ const responseEvent = record.response;
119
+ const response = {
120
+ type: "response",
121
+ requestId: responseEvent.requestId,
122
+ url: responseEvent.url,
123
+ status: responseEvent.status,
124
+ statusText: responseEvent.statusText,
125
+ headers: headersListToRecord(responseEvent.headers),
126
+ timestamp: responseEvent.timestamp,
127
+ mimeType: responseEvent.mimeType,
128
+ fromDiskCache: responseEvent.fromDiskCache,
129
+ fromServiceWorker: responseEvent.fromServiceWorker,
130
+ body: undefined
131
+ };
132
+ const bodyRef = responseEvent.bodyRef;
133
+ if (isApiResource(request)) {
134
+ await recordApiResponse(request, response, bodyRef);
135
+ }
136
+ if (!input.filter.shouldSave(request, response)) {
137
+ continue;
138
+ }
139
+ if (!bodyRef) {
140
+ input.warnings.push(`Missing body for ${request.url}`);
141
+ continue;
142
+ }
143
+ const stream = await input.capture.contentStore.open(bodyRef);
144
+ const bytes = await streamToUint8Array(stream);
145
+ const byteLength = bytes.byteLength;
146
+ if (input.limits?.maxSingleResourceBytes && byteLength > input.limits.maxSingleResourceBytes) {
147
+ input.warnings.push(`Resource too large: ${request.url}`);
148
+ continue;
149
+ }
150
+ if (input.limits?.maxResources && resources.length >= input.limits.maxResources) {
151
+ input.warnings.push(`Resource limit reached at ${request.url}`);
152
+ continue;
153
+ }
154
+ if (input.limits?.maxTotalBytes && totalBytes + byteLength > input.limits.maxTotalBytes) {
155
+ input.warnings.push(`Total byte limit reached at ${request.url}`);
156
+ continue;
157
+ }
158
+ totalBytes += byteLength;
159
+ resources.push({
160
+ request,
161
+ response,
162
+ contentRef: bodyRef,
163
+ size: parseContentLength(response.headers || {}) ?? byteLength,
164
+ mimeType: responseMimeType(response)
165
+ });
166
+ }
167
+ return { resources, apiEntries };
168
+ };
@@ -0,0 +1,2 @@
1
+ export { indexCapture } from "./index-capture.js";
2
+ export type { ApiEntry, StoredResource } from "./types.js";
@@ -0,0 +1 @@
1
+ export { indexCapture } from "./index-capture.js";
@@ -0,0 +1,12 @@
1
+ import type { ApiRecord, ContentRef, NetworkRequestEvent, NetworkResponseEvent } from "../../types.js";
2
+ export type StoredResource = {
3
+ request: NetworkRequestEvent;
4
+ response: NetworkResponseEvent;
5
+ contentRef: ContentRef;
6
+ size: number;
7
+ mimeType?: string;
8
+ };
9
+ export type ApiEntry = {
10
+ record: ApiRecord;
11
+ request: NetworkRequestEvent;
12
+ };
@@ -0,0 +1 @@
1
+ export {};