@pagepocket/lib 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -4,7 +4,6 @@ exports.buildSnapshot = void 0;
4
4
  const css_rewrite_1 = require("./css-rewrite");
5
5
  const path_resolver_1 = require("./path-resolver");
6
6
  const rewrite_links_1 = require("./rewrite-links");
7
- const snapshot_1 = require("./snapshot");
8
7
  const utils_1 = require("./utils");
9
8
  const escapePercentForStaticServers = (value) => {
10
9
  // Many static servers decode percent-encoding in the request path before
@@ -18,6 +17,197 @@ const escapePercentForStaticServers = (value) => {
18
17
  // filename on disk.
19
18
  return value.split("%").join("%25");
20
19
  };
20
+ const isApiResource = (request) => {
21
+ const type = request?.resourceType;
22
+ return type === "fetch" || type === "xhr";
23
+ };
24
+ const headersListToRecord = (headers) => {
25
+ const out = {};
26
+ for (const h of headers) {
27
+ out[h.name] = h.value;
28
+ }
29
+ return out;
30
+ };
31
+ const getHeaderValue = (headers, name) => {
32
+ const target = name.toLowerCase();
33
+ for (const key in headers) {
34
+ if (key.toLowerCase() === target) {
35
+ return headers[key];
36
+ }
37
+ }
38
+ return undefined;
39
+ };
40
+ const parseContentLength = (headers) => {
41
+ const raw = getHeaderValue(headers, "content-length");
42
+ if (!raw) {
43
+ return undefined;
44
+ }
45
+ const parsed = Number(raw);
46
+ if (!Number.isFinite(parsed) || parsed <= 0) {
47
+ return undefined;
48
+ }
49
+ return parsed;
50
+ };
51
+ const responseMimeType = (response) => response.mimeType || getHeaderValue(response.headers || {}, "content-type");
52
+ const buildIndexFromCapture = async (input) => {
53
+ const byId = new Map();
54
+ const ensure = (requestId) => {
55
+ const existing = byId.get(requestId);
56
+ if (existing)
57
+ return existing;
58
+ const created = {};
59
+ byId.set(requestId, created);
60
+ return created;
61
+ };
62
+ for (const event of input.capture.events) {
63
+ if (event.type === "http.request") {
64
+ ensure(event.requestId).request = event;
65
+ continue;
66
+ }
67
+ if (event.type === "http.response") {
68
+ ensure(event.requestId).response = event;
69
+ continue;
70
+ }
71
+ if (event.type === "http.failed") {
72
+ ensure(event.requestId).failed = event;
73
+ }
74
+ }
75
+ const resources = [];
76
+ const apiEntries = [];
77
+ const apiSeen = new Set();
78
+ let totalBytes = 0;
79
+ const recordApiFailure = (request, failed) => {
80
+ if (apiSeen.has(request.requestId))
81
+ return;
82
+ apiSeen.add(request.requestId);
83
+ apiEntries.push({
84
+ request,
85
+ record: {
86
+ url: request.url,
87
+ method: request.method,
88
+ requestHeaders: request.headers,
89
+ error: failed.errorText,
90
+ timestamp: failed.timestamp
91
+ }
92
+ });
93
+ };
94
+ const recordApiResponse = async (request, response, bodyRef) => {
95
+ if (apiSeen.has(request.requestId))
96
+ return;
97
+ apiSeen.add(request.requestId);
98
+ const record = {
99
+ url: request.url,
100
+ method: request.method,
101
+ requestHeaders: request.headers,
102
+ status: response.status,
103
+ statusText: response.statusText,
104
+ responseHeaders: response.headers,
105
+ timestamp: response.timestamp
106
+ };
107
+ if (bodyRef) {
108
+ const stream = await input.capture.contentStore.open(bodyRef);
109
+ const bytes = await streamToUint8Array(stream);
110
+ if (bytes.byteLength > 0) {
111
+ const mimeType = responseMimeType(response);
112
+ const decoded = (0, utils_1.bodyToTextOrBase64)(bytes, mimeType);
113
+ if (decoded.encoding === "text") {
114
+ record.responseBody = decoded.text;
115
+ record.responseEncoding = "text";
116
+ }
117
+ else {
118
+ record.responseBodyBase64 = decoded.base64;
119
+ record.responseEncoding = "base64";
120
+ }
121
+ }
122
+ }
123
+ apiEntries.push({ request, record });
124
+ };
125
+ for (const record of byId.values()) {
126
+ if (!record.request || record.request.type !== "http.request") {
127
+ continue;
128
+ }
129
+ const requestEvent = record.request;
130
+ const request = {
131
+ type: "request",
132
+ requestId: requestEvent.requestId,
133
+ url: requestEvent.url,
134
+ method: requestEvent.method,
135
+ headers: headersListToRecord(requestEvent.headers),
136
+ timestamp: requestEvent.timestamp,
137
+ frameId: requestEvent.frameId,
138
+ resourceType: requestEvent.resourceType,
139
+ initiator: requestEvent.initiator
140
+ };
141
+ if (record.failed && record.failed.type === "http.failed") {
142
+ const failedEvent = record.failed;
143
+ const failed = {
144
+ type: "failed",
145
+ requestId: failedEvent.requestId,
146
+ url: failedEvent.url,
147
+ errorText: failedEvent.errorText,
148
+ timestamp: failedEvent.timestamp
149
+ };
150
+ if (isApiResource(request)) {
151
+ recordApiFailure(request, failed);
152
+ }
153
+ continue;
154
+ }
155
+ if (!record.response || record.response.type !== "http.response") {
156
+ continue;
157
+ }
158
+ const responseEvent = record.response;
159
+ const response = {
160
+ type: "response",
161
+ requestId: responseEvent.requestId,
162
+ url: responseEvent.url,
163
+ status: responseEvent.status,
164
+ statusText: responseEvent.statusText,
165
+ headers: headersListToRecord(responseEvent.headers),
166
+ timestamp: responseEvent.timestamp,
167
+ mimeType: responseEvent.mimeType,
168
+ fromDiskCache: responseEvent.fromDiskCache,
169
+ fromServiceWorker: responseEvent.fromServiceWorker,
170
+ body: undefined
171
+ };
172
+ const bodyRef = responseEvent.bodyRef;
173
+ const isApi = isApiResource(request);
174
+ if (isApi) {
175
+ await recordApiResponse(request, response, bodyRef);
176
+ }
177
+ const shouldSave = input.filter.shouldSave(request, response);
178
+ if (!shouldSave) {
179
+ continue;
180
+ }
181
+ if (!bodyRef) {
182
+ input.warnings.push(`Missing body for ${request.url}`);
183
+ continue;
184
+ }
185
+ const stream = await input.capture.contentStore.open(bodyRef);
186
+ const bytes = await streamToUint8Array(stream);
187
+ const byteLength = bytes.byteLength;
188
+ if (input.limits?.maxSingleResourceBytes && byteLength > input.limits.maxSingleResourceBytes) {
189
+ input.warnings.push(`Resource too large: ${request.url}`);
190
+ continue;
191
+ }
192
+ if (input.limits?.maxResources && resources.length >= input.limits.maxResources) {
193
+ input.warnings.push(`Resource limit reached at ${request.url}`);
194
+ continue;
195
+ }
196
+ if (input.limits?.maxTotalBytes && totalBytes + byteLength > input.limits.maxTotalBytes) {
197
+ input.warnings.push(`Total byte limit reached at ${request.url}`);
198
+ continue;
199
+ }
200
+ totalBytes += byteLength;
201
+ resources.push({
202
+ request,
203
+ response,
204
+ contentRef: bodyRef,
205
+ size: parseContentLength(response.headers || {}) ?? byteLength,
206
+ mimeType: responseMimeType(response)
207
+ });
208
+ }
209
+ return { resources, apiEntries };
210
+ };
21
211
  const streamToUint8Array = async (stream) => {
22
212
  const reader = stream.getReader();
23
213
  const chunks = [];
@@ -161,6 +351,17 @@ const buildResourcesPathSnapshot = (createdAt, files) => {
161
351
  size: file.size
162
352
  });
163
353
  }
354
+ items.sort((left, right) => {
355
+ if (left.path < right.path)
356
+ return -1;
357
+ if (left.path > right.path)
358
+ return 1;
359
+ if (left.url < right.url)
360
+ return -1;
361
+ if (left.url > right.url)
362
+ return 1;
363
+ return 0;
364
+ });
164
365
  return {
165
366
  version: "1.0",
166
367
  createdAt,
@@ -169,10 +370,19 @@ const buildResourcesPathSnapshot = (createdAt, files) => {
169
370
  };
170
371
  const buildSnapshot = async (input) => {
171
372
  const warnings = input.warnings;
373
+ const shouldRewriteLinks = input.rewriteEntry !== false;
374
+ const contentStore = input.capture.contentStore;
375
+ const indexed = await buildIndexFromCapture({
376
+ capture: input.capture,
377
+ filter: input.filter,
378
+ entryUrl: input.entryUrl,
379
+ limits: input.limits,
380
+ warnings
381
+ });
172
382
  const groups = groupResources({
173
383
  entryUrl: input.entryUrl,
174
- resources: input.resources,
175
- apiEntries: input.apiEntries,
384
+ resources: indexed.resources,
385
+ apiEntries: indexed.apiEntries,
176
386
  warnings
177
387
  });
178
388
  const multiDoc = groups.length > 1;
@@ -211,9 +421,26 @@ const buildSnapshot = async (input) => {
211
421
  for (const resource of group.resources) {
212
422
  if (resource.request.resourceType === "document") {
213
423
  const path = urlToPath.get(resource.request.url) ?? "/index.html";
214
- const stream = await input.contentStore.open(resource.contentRef);
424
+ const stream = await contentStore.open(resource.contentRef);
215
425
  const bytes = await streamToUint8Array(stream);
216
- const decoded = (0, utils_1.decodeUtf8)(bytes) ?? "";
426
+ const decoded = (0, utils_1.decodeUtf8)(bytes);
427
+ if (decoded === null) {
428
+ // Be defensive: if the document body can't be decoded, don't attempt rewrite.
429
+ // Still emit a file so snapshot layout stays consistent.
430
+ files.push({
431
+ path,
432
+ mimeType: resource.mimeType ?? "text/html",
433
+ size: bytes.byteLength,
434
+ source: resource.contentRef,
435
+ originalUrl: resource.request.url,
436
+ resourceType: resource.request.resourceType,
437
+ headers: resource.response.headers
438
+ });
439
+ if (resource.request.url === input.entryUrl || !entryPath) {
440
+ entryPath = path;
441
+ }
442
+ continue;
443
+ }
217
444
  let html = decoded;
218
445
  const rewritten = await (0, rewrite_links_1.rewriteEntryHtml)({
219
446
  html,
@@ -230,7 +457,7 @@ const buildSnapshot = async (input) => {
230
457
  title = rewritten.title;
231
458
  }
232
459
  const encoded = new TextEncoder().encode(html);
233
- const contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, { url: resource.request.url, mimeType: resource.mimeType, sizeHint: encoded.byteLength });
460
+ const contentRef = await contentStore.put({ kind: "buffer", data: encoded }, { url: resource.request.url, mimeType: resource.mimeType, sizeHint: encoded.byteLength });
234
461
  files.push({
235
462
  path,
236
463
  mimeType: resource.mimeType ?? "text/html",
@@ -248,7 +475,7 @@ const buildSnapshot = async (input) => {
248
475
  let contentRef = resource.contentRef;
249
476
  let size = resource.size;
250
477
  if (resource.request.resourceType === "stylesheet" && input.rewriteCSS) {
251
- const stream = await input.contentStore.open(resource.contentRef);
478
+ const stream = await contentStore.open(resource.contentRef);
252
479
  const bytes = await streamToUint8Array(stream);
253
480
  const decoded = (0, utils_1.decodeUtf8)(bytes);
254
481
  if (decoded !== null) {
@@ -259,7 +486,7 @@ const buildSnapshot = async (input) => {
259
486
  });
260
487
  if (rewritten !== decoded) {
261
488
  const encoded = new TextEncoder().encode(rewritten);
262
- contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, {
489
+ contentRef = await contentStore.put({ kind: "buffer", data: encoded }, {
263
490
  url: resource.request.url,
264
491
  mimeType: resource.mimeType,
265
492
  sizeHint: encoded.byteLength
@@ -269,14 +496,14 @@ const buildSnapshot = async (input) => {
269
496
  }
270
497
  }
271
498
  if (resource.request.resourceType === "script") {
272
- const stream = await input.contentStore.open(contentRef);
499
+ const stream = await contentStore.open(contentRef);
273
500
  const bytes = await streamToUint8Array(stream);
274
501
  const decoded = (0, utils_1.decodeUtf8)(bytes);
275
502
  if (decoded !== null) {
276
503
  const rewritten = await (0, rewrite_links_1.rewriteJsText)(decoded, resolve, resource.request.url);
277
504
  if (rewritten !== decoded) {
278
505
  const encoded = new TextEncoder().encode(rewritten);
279
- contentRef = await input.contentStore.put({ kind: "buffer", data: encoded }, {
506
+ contentRef = await contentStore.put({ kind: "buffer", data: encoded }, {
280
507
  url: resource.request.url,
281
508
  mimeType: resource.mimeType,
282
509
  sizeHint: encoded.byteLength
@@ -305,8 +532,8 @@ const buildSnapshot = async (input) => {
305
532
  });
306
533
  }
307
534
  const apiSnapshot = buildApiSnapshot(group.url, input.createdAt, group.apiEntries);
308
- const apiBytes = new TextEncoder().encode(JSON.stringify(apiSnapshot, null, 2));
309
- const apiRef = await input.contentStore.put({ kind: "buffer", data: apiBytes }, { url: apiPath, mimeType: "application/json", sizeHint: apiBytes.byteLength });
535
+ const apiBytes = new TextEncoder().encode(`${JSON.stringify(apiSnapshot, null, 2)}\n`);
536
+ const apiRef = await contentStore.put({ kind: "buffer", data: apiBytes }, { url: apiPath, mimeType: "application/json", sizeHint: apiBytes.byteLength });
310
537
  files.push({
311
538
  path: apiPath,
312
539
  mimeType: "application/json",
@@ -317,7 +544,7 @@ const buildSnapshot = async (input) => {
317
544
  }
318
545
  {
319
546
  const resourcesPath = buildResourcesPathSnapshot(input.createdAt, files);
320
- const bytes = new TextEncoder().encode(JSON.stringify(resourcesPath, null, 2));
547
+ const bytes = new TextEncoder().encode(`${JSON.stringify(resourcesPath, null, 2)}\n`);
321
548
  // Snapshot-local artifact. It intentionally has no originalUrl.
322
549
  files.push({
323
550
  path: "/resources_path.json",
@@ -329,24 +556,22 @@ const buildSnapshot = async (input) => {
329
556
  const totalBytes = files.reduce((sum, file) => sum + (file.size ?? 0), 0);
330
557
  const totalFiles = files.length;
331
558
  const snapshotUrl = input.entryUrl || groups[0]?.url || "";
332
- return (0, snapshot_1.createPageSnapshot)({
333
- version: "1.0",
334
- createdAt: input.createdAt,
335
- url: snapshotUrl,
336
- title,
337
- entry: entryPath || "/index.html",
338
- files,
339
- meta: {
340
- totalBytes,
341
- totalFiles,
342
- warnings: warnings.length ? warnings : undefined
559
+ return {
560
+ root: {
561
+ kind: "directory",
562
+ path: "",
563
+ entries: files.map((file) => ({
564
+ kind: "file",
565
+ path: file.path,
566
+ source: { kind: "content-ref", ref: file.source }
567
+ }))
343
568
  },
344
569
  content: {
345
- open: (ref) => input.contentStore.open(ref),
570
+ open: (ref) => contentStore.open(ref),
346
571
  dispose: async () => {
347
- await input.contentStore.dispose?.();
572
+ await contentStore.dispose?.();
348
573
  }
349
574
  }
350
- });
575
+ };
351
576
  };
352
577
  exports.buildSnapshot = buildSnapshot;
package/dist/types.d.ts CHANGED
@@ -1,7 +1,121 @@
1
- import type { BodySource, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkResponseEvent, ResourceType } from "@pagepocket/interceptor";
1
+ /**
2
+ * Network types used by the plugin runner and capture plugins.
3
+ *
4
+ * NOTE: This repo previously sourced these from a dedicated `@pagepocket/interceptor` package.
5
+ * That package has been removed; the contracts live in `@pagepocket/lib` now.
6
+ */
2
7
  import type { Cheerio, CheerioAPI } from "cheerio";
3
- export type { BodySource, InterceptOptions, InterceptSession, InterceptTarget, InterceptorActions, InterceptorCapabilities, NavigateOptions, NetworkEvent, NetworkEventHandlers, NetworkInterceptorAdapter, NetworkRequestEvent, NetworkRequestFailedEvent, NetworkResponseEvent, ResourceType, TriggerAction } from "@pagepocket/interceptor";
4
- import type { NetworkEvent } from "@pagepocket/interceptor";
8
+ export type ResourceType = "document" | "stylesheet" | "script" | "image" | "font" | "media" | "xhr" | "fetch" | "other" | (string & {});
9
+ export type BodySource = {
10
+ kind: "buffer";
11
+ data: Uint8Array;
12
+ } | {
13
+ kind: "stream";
14
+ stream: ReadableStream<Uint8Array>;
15
+ } | {
16
+ kind: "late";
17
+ read: () => Promise<Uint8Array>;
18
+ };
19
+ export interface NetworkRequestEvent {
20
+ type: "request";
21
+ requestId: string;
22
+ url: string;
23
+ method: string;
24
+ headers: Record<string, string>;
25
+ frameId?: string;
26
+ resourceType?: ResourceType;
27
+ initiator?: {
28
+ type?: string;
29
+ url?: string;
30
+ };
31
+ timestamp: number;
32
+ }
33
+ export interface NetworkResponseEvent {
34
+ type: "response";
35
+ requestId: string;
36
+ url: string;
37
+ status: number;
38
+ statusText?: string;
39
+ headers: Record<string, string>;
40
+ mimeType?: string;
41
+ fromDiskCache?: boolean;
42
+ fromServiceWorker?: boolean;
43
+ timestamp: number;
44
+ body?: BodySource;
45
+ }
46
+ export interface NetworkRequestFailedEvent {
47
+ type: "failed";
48
+ requestId: string;
49
+ url: string;
50
+ errorText: string;
51
+ timestamp: number;
52
+ }
53
+ export type NetworkEvent = NetworkRequestEvent | NetworkResponseEvent | NetworkRequestFailedEvent;
54
+ export interface NetworkEventHandlers {
55
+ onEvent(event: NetworkEvent): void;
56
+ onError?(error: Error): void;
57
+ onLog?(msg: string, meta?: unknown): void;
58
+ }
59
+ export interface InterceptorCapabilities {
60
+ canGetResponseBody: boolean;
61
+ canStreamResponseBody: boolean;
62
+ canGetRequestBody: boolean;
63
+ providesResourceType: boolean;
64
+ /** Adapter can provide a primary-document HTML milestone. */
65
+ canWaitForHtml: boolean;
66
+ /** Adapter implements a meaningful staged capture boundary. */
67
+ supportsStagedCapture: boolean;
68
+ }
69
+ export type InterceptTarget = {
70
+ kind: "url";
71
+ url: string;
72
+ } | {
73
+ kind: "puppeteer-page";
74
+ page: unknown;
75
+ } | {
76
+ kind: "cdp-tab";
77
+ tabId: number;
78
+ } | {
79
+ kind: "html";
80
+ htmlString: string;
81
+ baseUrl: string;
82
+ url?: string;
83
+ };
84
+ export type InterceptOptions = Record<string, unknown>;
85
+ export type NavigateOptions = Record<string, unknown>;
86
+ export interface InterceptSession {
87
+ navigate?(url: string, options?: NavigateOptions): Promise<void>;
88
+ waitForHtml(): Promise<{
89
+ htmlString: string;
90
+ baseUrl: string;
91
+ url?: string;
92
+ contentType?: string;
93
+ }>;
94
+ /**
95
+ * Returns a snapshot of the current DOM HTML.
96
+ *
97
+ * This is intended to be called after capture completion (e.g. after network
98
+ * idle) when the page has finished client-side rendering.
99
+ */
100
+ getDomHtml?(): Promise<{
101
+ htmlString: string;
102
+ baseUrl: string;
103
+ url?: string;
104
+ contentType?: string;
105
+ }>;
106
+ startCapture(): Promise<void>;
107
+ stop(): Promise<void>;
108
+ }
109
+ export type InterceptorActions = {
110
+ triggerActions?: TriggerAction[];
111
+ timeoutMs?: number;
112
+ maxDurationMs?: number;
113
+ };
114
+ export interface NetworkInterceptorAdapter {
115
+ readonly name: string;
116
+ readonly capabilities: InterceptorCapabilities;
117
+ start(target: InterceptTarget, handlers: NetworkEventHandlers, options?: InterceptOptions & InterceptorActions): Promise<InterceptSession>;
118
+ }
5
119
  export interface PathResolver {
6
120
  resolve(input: {
7
121
  url: string;
@@ -50,43 +164,11 @@ export interface CompletionStrategy {
50
164
  export interface PagePocketOptions {
51
165
  }
52
166
  export type NetworkEventStream = AsyncIterable<NetworkEvent>;
53
- export interface CaptureOptions {
54
- interceptor: NetworkInterceptorAdapter;
55
- completion?: CompletionStrategy | CompletionStrategy[];
56
- /**
57
- * Network idle duration (ms) used to determine capture completion.
58
- *
59
- * If `completion` is not provided, PagePocket will wait until the network has
60
- * been idle (no inflight requests) for this duration.
61
- *
62
- * Note: this is NOT a wall-clock timeout from capture start.
63
- */
64
- timeoutMs?: number;
65
- /**
66
- * Hard wall-clock limit (ms) for the overall capture session.
67
- *
68
- * When `completion` is not provided, PagePocket will stop after either:
69
- * - network has been idle for `timeoutMs`, OR
70
- * - `maxDurationMs` has elapsed.
71
- */
72
- maxDurationMs?: number;
73
- filter?: ResourceFilter;
74
- pathResolver?: PathResolver;
75
- contentStore?: ContentStore;
76
- rewriteEntry?: boolean;
77
- rewriteCSS?: boolean;
78
- blacklist?: RegExp[];
79
- /**
80
- * Replace parts of the captured HTML (Document response body) during the HTML
81
- * rewrite stage (Cheerio).
82
- */
83
- replaceElements?: ReplaceElementsConfig;
84
- limits?: {
85
- maxTotalBytes?: number;
86
- maxSingleResourceBytes?: number;
87
- maxResources?: number;
88
- };
89
- }
167
+ export declare const TriggerActionValues: {
168
+ readonly HOVER: "HOVER";
169
+ readonly SCROLL_TO_END: "SCROLL_TO_END";
170
+ };
171
+ export type TriggerAction = (typeof TriggerActionValues)[keyof typeof TriggerActionValues];
90
172
  export type ReplaceElementsConfig = Array<ReplaceElementRule | ReplaceElementFn | ReplaceElementFnWithQuery>;
91
173
  export type MatchQuery = string | {
92
174
  selector?: string;
@@ -146,44 +228,6 @@ export interface SnapshotFile {
146
228
  resourceType?: ResourceType;
147
229
  headers?: Record<string, string>;
148
230
  }
149
- export interface PageSnapshot {
150
- version: "1.0";
151
- createdAt: number;
152
- url: string;
153
- title?: string;
154
- entry: string;
155
- files: SnapshotFile[];
156
- meta?: {
157
- totalBytes?: number;
158
- totalFiles?: number;
159
- warnings?: string[];
160
- };
161
- content: ContentStoreHandle;
162
- toDirectory(outDir: string, options?: WriteFSOptions): Promise<WriteResult>;
163
- toZip(options?: ZipOptions): Promise<ZipResult>;
164
- }
165
- export interface WriteFSOptions {
166
- clearCache?: boolean;
167
- overwrite?: boolean;
168
- suffix?: string;
169
- }
170
- export interface WriteResult {
171
- filesWritten: number;
172
- totalBytes: number;
173
- outputDir?: string;
174
- }
175
- export interface ZipOptions {
176
- asBlob?: boolean;
177
- clearCache?: boolean;
178
- overwrite?: boolean;
179
- suffix?: string;
180
- outputPath?: string;
181
- }
182
- export interface ZipWriteResult {
183
- data: Uint8Array | Blob;
184
- outputPath: string;
185
- }
186
- export type ZipResult = Uint8Array | Blob | ZipWriteResult;
187
231
  export interface ApiRecord {
188
232
  url: string;
189
233
  method: string;
package/dist/types.js CHANGED
@@ -1,2 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
1
+ export const TriggerActionValues = {
2
+ HOVER: "HOVER",
3
+ SCROLL_TO_END: "SCROLL_TO_END"
4
+ };
@@ -0,0 +1,76 @@
1
+ import type { ChannelToken, ReplaceElementsConfig } from "@pagepocket/contracts";
2
+ export type CaptureResult = {
3
+ kind: "raw";
4
+ outputDir: string;
5
+ meta?: unknown;
6
+ } | {
7
+ kind: "zip";
8
+ zip: {
9
+ data: Uint8Array | Blob;
10
+ outputPath: string;
11
+ };
12
+ meta?: unknown;
13
+ } | {
14
+ kind: "html";
15
+ html: string;
16
+ meta?: unknown;
17
+ } | {
18
+ kind: "text";
19
+ text: string;
20
+ meta?: unknown;
21
+ } | {
22
+ kind: "custom";
23
+ value: unknown;
24
+ };
25
+ export type DeferredHandle = Promise<unknown>;
26
+ export type UnitValue = Record<string, unknown>;
27
+ export type UnitContext<T = UnitValue> = {
28
+ value: T;
29
+ };
30
+ export type UnitContributeContext<TValue = UnitValue, TSetupValue = unknown> = UnitContext<TValue> & {
31
+ setupValue: TSetupValue | undefined;
32
+ };
33
+ export declare const TERMINAL_RESULT_KEY: "__pagepocketResult";
34
+ export type UnitPatch = UnitValue & {
35
+ [TERMINAL_RESULT_KEY]?: CaptureResult;
36
+ };
37
+ export interface ElementPatchRegistry {
38
+ contribute(source: {
39
+ type: "unit" | "plugin";
40
+ name: string;
41
+ }, rules: ReplaceElementsConfig): void;
42
+ contributeLazy(source: {
43
+ type: "unit" | "plugin";
44
+ name: string;
45
+ }, build: () => ReplaceElementsConfig | Promise<ReplaceElementsConfig>): void;
46
+ compile(): Promise<ReplaceElementsConfig>;
47
+ }
48
+ export interface UnitRuntime {
49
+ readonly entry: import("./types.js").EntryInfo;
50
+ readonly options: import("./types.js").CaptureOptions;
51
+ readonly pocketOptions: import("./types.js").PagePocketOptions;
52
+ publish<T>(t: ChannelToken<T>, value: T): void;
53
+ subscribe<T>(t: ChannelToken<T>): AsyncIterable<T>;
54
+ hasPublisher(t: ChannelToken<unknown>): boolean;
55
+ readonly elements: ElementPatchRegistry;
56
+ defer(promise: DeferredHandle): void;
57
+ }
58
+ export interface PluginHost {
59
+ readonly entry: import("./types.js").EntryInfo;
60
+ readonly options: import("./types.js").CaptureOptions;
61
+ subscribe<T>(t: ChannelToken<T>): AsyncIterable<T>;
62
+ hasPublisher(t: ChannelToken<unknown>): boolean;
63
+ readonly elements: ElementPatchRegistry;
64
+ defer(promise: DeferredHandle): void;
65
+ }
66
+ export declare abstract class Unit {
67
+ abstract readonly id: string;
68
+ abstract readonly kind: string;
69
+ abstract run(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
70
+ merge(returnValue: UnitPatch, pluginContributedValue?: UnitPatch): UnitPatch;
71
+ }
72
+ export interface Plugin<TSetupValue = unknown> {
73
+ readonly name: string;
74
+ setup(host: PluginHost): TSetupValue | void | Promise<TSetupValue | void>;
75
+ contribute?(ctx: UnitContributeContext<UnitValue, TSetupValue>, rt: UnitRuntime): Promise<UnitPatch>;
76
+ }
@@ -0,0 +1,6 @@
1
+ export const TERMINAL_RESULT_KEY = "__pagepocketResult";
2
+ export class Unit {
3
+ merge(returnValue, pluginContributedValue = {}) {
4
+ return { ...returnValue, ...pluginContributedValue };
5
+ }
6
+ }
@@ -0,0 +1,4 @@
1
+ export type { CaptureOptions, EntryInfo } from "./types.js";
2
+ export type { CaptureResult, Plugin, PluginHost, UnitRuntime } from "./contracts-bridge.js";
3
+ export { Unit } from "./contracts-bridge.js";
4
+ export { runCapture } from "./runner.js";
@@ -0,0 +1,2 @@
1
+ export { Unit } from "./contracts-bridge.js";
2
+ export { runCapture } from "./runner.js";