@pagepocket/lib 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,225 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.BuildWarcPlugin = void 0;
4
+ const normalizeHeaders = (headers) => {
5
+ const normalized = headers.map((h) => ({ name: h.name.toLowerCase(), value: h.value }));
6
+ normalized.sort((a, b) => {
7
+ if (a.name < b.name)
8
+ return -1;
9
+ if (a.name > b.name)
10
+ return 1;
11
+ return 0;
12
+ });
13
+ return normalized;
14
+ };
15
+ const headersToText = (headers) => headers.map((h) => `${h.name}: ${h.value}`).join("\r\n") + "\r\n";
16
+ const urlToRequestTarget = (url) => {
17
+ try {
18
+ const parsed = new URL(url);
19
+ const path = parsed.pathname || "/";
20
+ const query = parsed.search || "";
21
+ return `${path}${query}`;
22
+ }
23
+ catch {
24
+ return url;
25
+ }
26
+ };
27
+ const statusTextFromStatus = (status) => {
28
+ // Small stable mapping; unknown -> empty.
29
+ switch (status) {
30
+ case 200:
31
+ return "OK";
32
+ case 201:
33
+ return "Created";
34
+ case 204:
35
+ return "No Content";
36
+ case 301:
37
+ return "Moved Permanently";
38
+ case 302:
39
+ return "Found";
40
+ case 304:
41
+ return "Not Modified";
42
+ case 400:
43
+ return "Bad Request";
44
+ case 401:
45
+ return "Unauthorized";
46
+ case 403:
47
+ return "Forbidden";
48
+ case 404:
49
+ return "Not Found";
50
+ case 500:
51
+ return "Internal Server Error";
52
+ default:
53
+ return "";
54
+ }
55
+ };
56
+ const streamToUint8Array = async (stream) => {
57
+ const reader = stream.getReader();
58
+ const chunks = [];
59
+ let total = 0;
60
+ while (true) {
61
+ const result = await reader.read();
62
+ if (result.done)
63
+ break;
64
+ if (result.value) {
65
+ chunks.push(result.value);
66
+ total += result.value.byteLength;
67
+ }
68
+ }
69
+ const output = new Uint8Array(total);
70
+ let offset = 0;
71
+ for (const chunk of chunks) {
72
+ output.set(chunk, offset);
73
+ offset += chunk.byteLength;
74
+ }
75
+ return output;
76
+ };
77
+ const encodeText = (text) => new TextEncoder().encode(text);
78
+ const concatBytes = (chunks) => {
79
+ const total = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
80
+ const output = new Uint8Array(total);
81
+ let offset = 0;
82
+ for (const chunk of chunks) {
83
+ output.set(chunk, offset);
84
+ offset += chunk.byteLength;
85
+ }
86
+ return output;
87
+ };
88
+ const toWarcDate = (timestamp) => new Date(timestamp).toISOString();
89
+ const randomWarcRecordId = () => {
90
+ // Best-effort UUIDv4 using crypto if available.
91
+ const g = globalThis;
92
+ const bytes = new Uint8Array(16);
93
+ if (g.crypto?.getRandomValues) {
94
+ g.crypto.getRandomValues(bytes);
95
+ }
96
+ else {
97
+ for (let i = 0; i < bytes.length; i += 1) {
98
+ bytes[i] = Math.floor(Math.random() * 256);
99
+ }
100
+ }
101
+ bytes[6] = (bytes[6] & 0x0f) | 0x40;
102
+ bytes[8] = (bytes[8] & 0x3f) | 0x80;
103
+ const hex = Array.from(bytes)
104
+ .map((b) => b.toString(16).padStart(2, "0"))
105
+ .join("");
106
+ const uuid = `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
107
+ return `<urn:uuid:${uuid}>`;
108
+ };
109
+ const makeWarcHeaders = (headers) => {
110
+ const lines = Object.entries(headers)
111
+ .map(([k, v]) => `${k}: ${v}`)
112
+ .join("\r\n");
113
+ return `${lines}\r\n\r\n`;
114
+ };
115
+ const makeWarcRecord = (input) => {
116
+ const baseHeaders = {
117
+ "WARC-Version": "WARC/1.1",
118
+ "WARC-Type": input.warcType,
119
+ "WARC-Record-ID": input.recordId,
120
+ "WARC-Date": input.date,
121
+ ...(input.targetUri ? { "WARC-Target-URI": input.targetUri } : {}),
122
+ "Content-Type": input.contentType,
123
+ "Content-Length": String(input.payload.byteLength),
124
+ ...(input.extraWarcHeaders ?? {})
125
+ };
126
+ const headerBytes = encodeText(makeWarcHeaders(baseHeaders));
127
+ const endBytes = encodeText("\r\n");
128
+ return concatBytes([headerBytes, input.payload, endBytes]);
129
+ };
130
+ const groupByRequestId = (events) => {
131
+ const requests = new Map();
132
+ const responses = new Map();
133
+ for (const e of events) {
134
+ if (e.type === "http.request") {
135
+ requests.set(e.requestId, e);
136
+ continue;
137
+ }
138
+ if (e.type === "http.response") {
139
+ responses.set(e.requestId, e);
140
+ }
141
+ }
142
+ return { requests, responses };
143
+ };
144
+ class BuildWarcPlugin {
145
+ constructor() {
146
+ this.name = "plugin:build-warc";
147
+ }
148
+ apply(ctx) {
149
+ ctx.onFinalize(async () => {
150
+ const capture = ctx.capture;
151
+ if (!capture) {
152
+ throw new Error("BuildWarcPlugin requires ctx.capture");
153
+ }
154
+ const { requests, responses } = groupByRequestId(capture.events);
155
+ const warcinfoPayload = encodeText([
156
+ "software: pagepocket",
157
+ `format: WARC File Format 1.1 (best-effort)`,
158
+ `pagepocket:headersNormalized=true`,
159
+ `pagepocket:bodyMode=decoded`,
160
+ `pagepocket:capabilities=${JSON.stringify(capture.capabilities)}`
161
+ ].join("\n") + "\n");
162
+ const records = [];
163
+ records.push(makeWarcRecord({
164
+ warcType: "warcinfo",
165
+ date: toWarcDate(Date.now()),
166
+ recordId: randomWarcRecordId(),
167
+ contentType: "application/warc-fields",
168
+ payload: warcinfoPayload
169
+ }));
170
+ for (const [requestId, reqEvent] of requests.entries()) {
171
+ if (reqEvent.type !== "http.request") {
172
+ continue;
173
+ }
174
+ const resEvent = responses.get(requestId);
175
+ if (!resEvent || resEvent.type !== "http.response") {
176
+ continue;
177
+ }
178
+ const reqHeaders = normalizeHeaders(reqEvent.headers);
179
+ const requestTarget = urlToRequestTarget(reqEvent.url);
180
+ const requestStartLine = `${reqEvent.method} ${requestTarget} HTTP/1.1\r\n`;
181
+ const requestHttpBytes = encodeText(requestStartLine + headersToText(reqHeaders) + "\r\n");
182
+ const resHeaders = normalizeHeaders(resEvent.headers);
183
+ const statusText = resEvent.statusText ?? statusTextFromStatus(resEvent.status);
184
+ const responseStartLine = `HTTP/1.1 ${resEvent.status} ${statusText}\r\n`;
185
+ const responseHeadersBytes = encodeText(responseStartLine + headersToText(resHeaders) + "\r\n");
186
+ const bodyBytes = resEvent.bodyRef
187
+ ? await streamToUint8Array(await capture.contentStore.open(resEvent.bodyRef))
188
+ : new Uint8Array();
189
+ const responseHttpBytes = concatBytes([responseHeadersBytes, bodyBytes]);
190
+ records.push(makeWarcRecord({
191
+ warcType: "request",
192
+ targetUri: reqEvent.url,
193
+ date: toWarcDate(reqEvent.timestamp),
194
+ recordId: randomWarcRecordId(),
195
+ contentType: "application/http; msgtype=request",
196
+ payload: requestHttpBytes
197
+ }));
198
+ records.push(makeWarcRecord({
199
+ warcType: "response",
200
+ targetUri: resEvent.url,
201
+ date: toWarcDate(resEvent.timestamp),
202
+ recordId: randomWarcRecordId(),
203
+ contentType: "application/http; msgtype=response",
204
+ payload: responseHttpBytes
205
+ }));
206
+ }
207
+ const warcBytes = concatBytes(records);
208
+ const files = {
209
+ root: {
210
+ kind: "directory",
211
+ path: "",
212
+ entries: [
213
+ {
214
+ kind: "file",
215
+ path: "capture.warc",
216
+ source: { kind: "bytes", data: warcBytes }
217
+ }
218
+ ]
219
+ }
220
+ };
221
+ ctx.files = files;
222
+ });
223
+ }
224
+ }
225
+ exports.BuildWarcPlugin = BuildWarcPlugin;
@@ -0,0 +1,2 @@
1
+ import type { PagePocketPlugin } from "../types";
2
+ export declare const manifestPlugin: () => PagePocketPlugin;
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.manifestPlugin = void 0;
4
+ const manifestFromContext = (ctx) => {
5
+ const config = ctx.config;
6
+ return {
7
+ version: "1.0",
8
+ createdAt: ctx.bundle.createdAt,
9
+ entryUrl: ctx.bundle.entryUrl,
10
+ finalUrl: ctx.bundle.finalUrl,
11
+ emit: config.emit ?? [],
12
+ skip: config.skip,
13
+ plugins: config.plugins ?? [],
14
+ bundle: {
15
+ totals: ctx.bundle.totals,
16
+ warnings: ctx.bundle.warnings
17
+ },
18
+ snapshot: config.snapshot
19
+ ? {
20
+ built: config.snapshot.built,
21
+ options: config.snapshot.options,
22
+ meta: ctx.snapshot?.meta
23
+ }
24
+ : undefined
25
+ };
26
+ };
27
+ const manifestPlugin = () => {
28
+ return {
29
+ name: "built-in:manifest",
30
+ outputs: [
31
+ {
32
+ id: "manifest",
33
+ requires: "bundle",
34
+ async write(ctx) {
35
+ const manifest = manifestFromContext(ctx);
36
+ await ctx.sink.writeJson("manifest.json", manifest);
37
+ }
38
+ }
39
+ ]
40
+ };
41
+ };
42
+ exports.manifestPlugin = manifestPlugin;
@@ -0,0 +1,2 @@
1
+ import type { PagePocketPlugin } from "../types";
2
+ export declare const snapshotDirectoryPlugin: () => PagePocketPlugin;
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.snapshotDirectoryPlugin = void 0;
4
+ const snapshotDirectoryPlugin = () => {
5
+ return {
6
+ name: "built-in:snapshot-directory",
7
+ outputs: [
8
+ {
9
+ id: "directory",
10
+ requires: "snapshot",
11
+ async write(ctx) {
12
+ if (!ctx.snapshot) {
13
+ throw new Error("directory output requires ctx.snapshot");
14
+ }
15
+ if (!ctx.outputDir) {
16
+ throw new Error("directory output requires ctx.outputDir");
17
+ }
18
+ await ctx.snapshot.toDirectory(ctx.outputDir);
19
+ }
20
+ }
21
+ ]
22
+ };
23
+ };
24
+ exports.snapshotDirectoryPlugin = snapshotDirectoryPlugin;
@@ -0,0 +1,2 @@
1
+ import type { PagePocketPlugin } from "../types";
2
+ export declare const snapshotZipPlugin: () => PagePocketPlugin;
@@ -0,0 +1,25 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.snapshotZipPlugin = void 0;
4
+ const snapshotZipPlugin = () => {
5
+ return {
6
+ name: "built-in:snapshot-zip",
7
+ outputs: [
8
+ {
9
+ id: "zip",
10
+ requires: "snapshot",
11
+ async write(ctx) {
12
+ if (!ctx.snapshot) {
13
+ throw new Error("zip output requires ctx.snapshot");
14
+ }
15
+ const zipped = await ctx.snapshot.toZip();
16
+ if (!(zipped instanceof Uint8Array)) {
17
+ throw new Error("zip output expects snapshot.toZip() to return Uint8Array in node");
18
+ }
19
+ await ctx.sink.writeFile("snapshot.zip", zipped, "application/zip");
20
+ }
21
+ }
22
+ ]
23
+ };
24
+ };
25
+ exports.snapshotZipPlugin = snapshotZipPlugin;
@@ -0,0 +1,5 @@
1
+ import type { PagePocketContext, PagePocketPlugin } from "../plugin/types";
2
+ export declare class CaptureHttpLighterceptorPlugin implements PagePocketPlugin {
3
+ readonly name = "plugin:capture-http-lighterceptor";
4
+ apply(ctx: PagePocketContext): void;
5
+ }
@@ -0,0 +1,85 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CaptureHttpLighterceptorPlugin = void 0;
4
+ const memory_content_store_1 = require("../capture/memory-content-store");
5
+ const headersRecordToList = (headers) => {
6
+ if (!headers) {
7
+ return [];
8
+ }
9
+ const out = [];
10
+ for (const name of Object.keys(headers)) {
11
+ out.push({ name, value: headers[name] });
12
+ }
13
+ return out;
14
+ };
15
+ class CaptureHttpLighterceptorPlugin {
16
+ constructor() {
17
+ this.name = "plugin:capture-http-lighterceptor";
18
+ }
19
+ apply(ctx) {
20
+ const contentStore = (0, memory_content_store_1.createMemoryContentStore)("capture-http-lighterceptor");
21
+ const events = [];
22
+ const capabilities = {
23
+ requestHeaders: "approx",
24
+ responseHeaders: "approx",
25
+ requestBodies: false,
26
+ responseBodies: "decoded",
27
+ httpVersion: false,
28
+ remoteIp: false,
29
+ headerOrderPreserved: false
30
+ };
31
+ ctx.capture = {
32
+ events,
33
+ contentStore,
34
+ capabilities
35
+ };
36
+ ctx.onNetworkEvent(async (event) => {
37
+ if (event.type === "request") {
38
+ events.push({
39
+ type: "http.request",
40
+ requestId: event.requestId,
41
+ url: event.url,
42
+ method: event.method,
43
+ headers: headersRecordToList(event.headers),
44
+ timestamp: event.timestamp,
45
+ frameId: event.frameId,
46
+ resourceType: event.resourceType,
47
+ initiator: event.initiator
48
+ });
49
+ return;
50
+ }
51
+ if (event.type === "failed") {
52
+ events.push({
53
+ type: "http.failed",
54
+ requestId: event.requestId,
55
+ url: event.url,
56
+ errorText: event.errorText,
57
+ timestamp: event.timestamp
58
+ });
59
+ return;
60
+ }
61
+ const bodyRef = event.body
62
+ ? await contentStore.put(event.body, {
63
+ url: event.url,
64
+ mimeType: event.mimeType,
65
+ sizeHint: undefined
66
+ })
67
+ : undefined;
68
+ events.push({
69
+ type: "http.response",
70
+ requestId: event.requestId,
71
+ url: event.url,
72
+ status: event.status,
73
+ statusText: event.statusText,
74
+ headers: headersRecordToList(event.headers),
75
+ timestamp: event.timestamp,
76
+ mimeType: event.mimeType,
77
+ fromDiskCache: event.fromDiskCache,
78
+ fromServiceWorker: event.fromServiceWorker,
79
+ bodyRef,
80
+ bodySize: undefined
81
+ });
82
+ });
83
+ }
84
+ }
85
+ exports.CaptureHttpLighterceptorPlugin = CaptureHttpLighterceptorPlugin;
@@ -0,0 +1,5 @@
1
+ import type { PagePocketContext, PagePocketPlugin } from "../plugin/types";
2
+ export declare class CaptureHttpPuppeteerPlugin implements PagePocketPlugin {
3
+ readonly name = "plugin:capture-http-puppeteer";
4
+ apply(ctx: PagePocketContext): void;
5
+ }
@@ -0,0 +1,85 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CaptureHttpPuppeteerPlugin = void 0;
4
+ const memory_content_store_1 = require("../capture/memory-content-store");
5
+ const headersRecordToList = (headers) => {
6
+ if (!headers) {
7
+ return [];
8
+ }
9
+ const out = [];
10
+ for (const name of Object.keys(headers)) {
11
+ out.push({ name, value: headers[name] });
12
+ }
13
+ return out;
14
+ };
15
+ class CaptureHttpPuppeteerPlugin {
16
+ constructor() {
17
+ this.name = "plugin:capture-http-puppeteer";
18
+ }
19
+ apply(ctx) {
20
+ const contentStore = (0, memory_content_store_1.createMemoryContentStore)("capture-http-puppeteer");
21
+ const events = [];
22
+ const capabilities = {
23
+ requestHeaders: "approx",
24
+ responseHeaders: "approx",
25
+ requestBodies: false,
26
+ responseBodies: "decoded",
27
+ httpVersion: false,
28
+ remoteIp: false,
29
+ headerOrderPreserved: false
30
+ };
31
+ ctx.capture = {
32
+ events,
33
+ contentStore,
34
+ capabilities
35
+ };
36
+ ctx.onNetworkEvent(async (event) => {
37
+ if (event.type === "request") {
38
+ events.push({
39
+ type: "http.request",
40
+ requestId: event.requestId,
41
+ url: event.url,
42
+ method: event.method,
43
+ headers: headersRecordToList(event.headers),
44
+ timestamp: event.timestamp,
45
+ frameId: event.frameId,
46
+ resourceType: event.resourceType,
47
+ initiator: event.initiator
48
+ });
49
+ return;
50
+ }
51
+ if (event.type === "failed") {
52
+ events.push({
53
+ type: "http.failed",
54
+ requestId: event.requestId,
55
+ url: event.url,
56
+ errorText: event.errorText,
57
+ timestamp: event.timestamp
58
+ });
59
+ return;
60
+ }
61
+ const bodyRef = event.body
62
+ ? await contentStore.put(event.body, {
63
+ url: event.url,
64
+ mimeType: event.mimeType,
65
+ sizeHint: undefined
66
+ })
67
+ : undefined;
68
+ events.push({
69
+ type: "http.response",
70
+ requestId: event.requestId,
71
+ url: event.url,
72
+ status: event.status,
73
+ statusText: event.statusText,
74
+ headers: headersRecordToList(event.headers),
75
+ timestamp: event.timestamp,
76
+ mimeType: event.mimeType,
77
+ fromDiskCache: event.fromDiskCache,
78
+ fromServiceWorker: event.fromServiceWorker,
79
+ bodyRef,
80
+ bodySize: undefined
81
+ });
82
+ });
83
+ }
84
+ }
85
+ exports.CaptureHttpPuppeteerPlugin = CaptureHttpPuppeteerPlugin;
@@ -0,0 +1,37 @@
1
+ import type { ReplaceElementsConfig } from "../types";
2
+ import type { OutputWriter, PagePocketPlugin, PluginContext } from "./types";
3
+ export type OutputPlan = {
4
+ /** Output IDs requested by the user (multiple allowed). */
5
+ emit: string[];
6
+ /** Output IDs to skip even if present in emit. */
7
+ skip?: string[];
8
+ /** Abort on first plugin failure. Default false. */
9
+ strict?: boolean;
10
+ };
11
+ export type ResolvedOutput = {
12
+ id: string;
13
+ requires: "bundle" | "snapshot";
14
+ write: OutputWriter["write"];
15
+ /** For diagnostics / ordering */
16
+ pluginName: string;
17
+ };
18
+ export interface PluginHost {
19
+ plugins(): PagePocketPlugin[];
20
+ init(ctx: PluginContext): Promise<void>;
21
+ /**
22
+ * Resolve all outputs by id and filter by emit/skip.
23
+ * MUST throw on:
24
+ * - duplicate output IDs
25
+ * - unknown emit IDs
26
+ */
27
+ resolveOutputs(plan: OutputPlan): {
28
+ outputs: ResolvedOutput[];
29
+ needsSnapshot: boolean;
30
+ };
31
+ /** Merge replaceElements from plugins in deterministic order. */
32
+ collectReplaceElements(ctx: PluginContext): Promise<ReplaceElementsConfig | undefined>;
33
+ shutdown(ctx: PluginContext): Promise<void>;
34
+ }
35
+ export declare const createPluginHost: (input: {
36
+ plugins: PagePocketPlugin[];
37
+ }) => PluginHost;
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createPluginHost = void 0;
4
+ const ordering_1 = require("./ordering");
5
+ const collectAllOutputs = (plugins) => {
6
+ const items = [];
7
+ for (const plugin of plugins) {
8
+ const outputs = plugin.outputs ?? [];
9
+ for (const writer of outputs) {
10
+ items.push({ plugin, writer });
11
+ }
12
+ }
13
+ return items;
14
+ };
15
+ const formatAvailableOutputs = (plugins) => {
16
+ const byId = new Map();
17
+ for (const { plugin, writer } of collectAllOutputs(plugins)) {
18
+ byId.set(writer.id, { requires: writer.requires, pluginName: plugin.name });
19
+ }
20
+ const ids = Array.from(byId.keys()).sort();
21
+ return ids
22
+ .map((id) => {
23
+ const meta = byId.get(id);
24
+ return `- ${id} (${meta?.requires ?? "?"}) [${meta?.pluginName ?? "?"}]`;
25
+ })
26
+ .join("\n");
27
+ };
28
+ const createPluginHost = (input) => {
29
+ const ordered = (0, ordering_1.sortPluginsDeterministically)(input.plugins);
30
+ const outputsById = () => {
31
+ const map = new Map();
32
+ const duplicates = [];
33
+ for (const { plugin, writer } of collectAllOutputs(ordered)) {
34
+ if (map.has(writer.id)) {
35
+ duplicates.push(writer.id);
36
+ continue;
37
+ }
38
+ map.set(writer.id, { plugin, writer });
39
+ }
40
+ if (duplicates.length > 0) {
41
+ const unique = Array.from(new Set(duplicates)).sort();
42
+ throw new Error(`Duplicate output IDs: ${unique.join(", ")}`);
43
+ }
44
+ return map;
45
+ };
46
+ return {
47
+ plugins() {
48
+ return ordered.slice();
49
+ },
50
+ async init(ctx) {
51
+ for (const plugin of ordered) {
52
+ await plugin.hooks?.init?.(ctx);
53
+ }
54
+ },
55
+ resolveOutputs(plan) {
56
+ const emit = plan.emit ?? [];
57
+ const skip = new Set(plan.skip ?? []);
58
+ const all = outputsById();
59
+ const unknown = emit.filter((id) => !all.has(id));
60
+ if (unknown.length > 0) {
61
+ const available = formatAvailableOutputs(ordered);
62
+ throw new Error(`Unknown output IDs: ${unknown.sort().join(", ")}\n\nAvailable outputs:\n${available}`);
63
+ }
64
+ const resolved = [];
65
+ let needsSnapshot = false;
66
+ // Deterministic execution order: plugin order; within plugin outputs array order.
67
+ for (const plugin of ordered) {
68
+ for (const writer of plugin.outputs ?? []) {
69
+ if (!emit.includes(writer.id))
70
+ continue;
71
+ if (skip.has(writer.id))
72
+ continue;
73
+ if (writer.requires === "snapshot") {
74
+ needsSnapshot = true;
75
+ }
76
+ resolved.push({
77
+ id: writer.id,
78
+ requires: writer.requires,
79
+ write: writer.write,
80
+ pluginName: plugin.name
81
+ });
82
+ }
83
+ }
84
+ return { outputs: resolved, needsSnapshot };
85
+ },
86
+ async collectReplaceElements(ctx) {
87
+ const merged = [];
88
+ for (const plugin of ordered) {
89
+ if (!plugin.replaceElements)
90
+ continue;
91
+ const contributed = await plugin.replaceElements(ctx);
92
+ if (contributed && contributed.length > 0) {
93
+ merged.push(...contributed);
94
+ }
95
+ }
96
+ return merged.length ? merged : undefined;
97
+ },
98
+ async shutdown(ctx) {
99
+ for (const plugin of ordered) {
100
+ await plugin.hooks?.shutdown?.(ctx);
101
+ }
102
+ }
103
+ };
104
+ };
105
+ exports.createPluginHost = createPluginHost;
@@ -0,0 +1,6 @@
1
+ export type { ArtifactSink, LogLevel, OutputRequirement, OutputWriter, PagePocketPlugin, PluginContext, PluginOrder, RunContext } from "./types";
2
+ export type { OutputPlan, ResolvedOutput, PluginHost } from "./host";
3
+ export { createPluginHost } from "./host";
4
+ export { manifestPlugin } from "./builtins/manifest";
5
+ export { snapshotDirectoryPlugin } from "./builtins/snapshot-directory";
6
+ export { snapshotZipPlugin } from "./builtins/snapshot-zip";
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.snapshotZipPlugin = exports.snapshotDirectoryPlugin = exports.manifestPlugin = exports.createPluginHost = void 0;
4
+ var host_1 = require("./host");
5
+ Object.defineProperty(exports, "createPluginHost", { enumerable: true, get: function () { return host_1.createPluginHost; } });
6
+ var manifest_1 = require("./builtins/manifest");
7
+ Object.defineProperty(exports, "manifestPlugin", { enumerable: true, get: function () { return manifest_1.manifestPlugin; } });
8
+ var snapshot_directory_1 = require("./builtins/snapshot-directory");
9
+ Object.defineProperty(exports, "snapshotDirectoryPlugin", { enumerable: true, get: function () { return snapshot_directory_1.snapshotDirectoryPlugin; } });
10
+ var snapshot_zip_1 = require("./builtins/snapshot-zip");
11
+ Object.defineProperty(exports, "snapshotZipPlugin", { enumerable: true, get: function () { return snapshot_zip_1.snapshotZipPlugin; } });
@@ -0,0 +1,2 @@
1
+ import type { PagePocketPlugin } from "./types";
2
+ export declare const sortPluginsDeterministically: (plugins: PagePocketPlugin[]) => PagePocketPlugin[];