@pagepocket/lib 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,11 @@
1
+ import type { ChannelToken } from "@pagepocket/contracts";
2
+ import { type CaptureResult, type Plugin, type Unit } from "./contracts-bridge.js";
3
+ import type { CaptureOptions, EntryInfo, PagePocketOptions } from "./types.js";
4
+ export declare const runCapture: (input: {
5
+ entry: EntryInfo;
6
+ pocketOptions: PagePocketOptions;
7
+ options: CaptureOptions;
8
+ units: Unit[];
9
+ plugins?: Plugin[];
10
+ declaredChannels?: ChannelToken<unknown>[];
11
+ }) => Promise<CaptureResult>;
@@ -0,0 +1,270 @@
1
+ import { TERMINAL_RESULT_KEY } from "./contracts-bridge.js";
2
+ const emptyAsyncIterable = () => ({
3
+ [Symbol.asyncIterator]() {
4
+ return {
5
+ next: async () => ({ value: undefined, done: true })
6
+ };
7
+ }
8
+ });
9
+ class AsyncQueue {
10
+ constructor() {
11
+ this.values = [];
12
+ this.pending = [];
13
+ this.done = false;
14
+ }
15
+ push(value) {
16
+ if (this.done) {
17
+ return;
18
+ }
19
+ const resolve = this.pending.shift();
20
+ if (resolve) {
21
+ resolve({ value, done: false });
22
+ return;
23
+ }
24
+ this.values.push(value);
25
+ }
26
+ close() {
27
+ if (this.done) {
28
+ return;
29
+ }
30
+ this.done = true;
31
+ for (const resolve of this.pending) {
32
+ resolve({ value: undefined, done: true });
33
+ }
34
+ this.pending = [];
35
+ }
36
+ async *iterate() {
37
+ while (true) {
38
+ if (this.values.length > 0) {
39
+ const v = this.values.shift();
40
+ if (v === undefined) {
41
+ continue;
42
+ }
43
+ yield v;
44
+ continue;
45
+ }
46
+ if (this.done) {
47
+ return;
48
+ }
49
+ const next = await new Promise((resolve) => {
50
+ this.pending.push(resolve);
51
+ });
52
+ if (next.done) {
53
+ return;
54
+ }
55
+ yield next.value;
56
+ }
57
+ }
58
+ }
59
+ class DeferredTracker {
60
+ constructor() {
61
+ this.promises = [];
62
+ }
63
+ add(p) {
64
+ this.promises.push(p);
65
+ }
66
+ async awaitAll() {
67
+ const ps = this.promises;
68
+ this.promises = [];
69
+ await Promise.allSettled(ps);
70
+ }
71
+ }
72
+ class ElementPatchRegistryImpl {
73
+ constructor() {
74
+ this.items = [];
75
+ }
76
+ contribute(_source, rules) {
77
+ this.items.push({ rules });
78
+ }
79
+ contributeLazy(_source, build) {
80
+ this.items.push({ build });
81
+ }
82
+ async compile() {
83
+ const out = [];
84
+ for (const item of this.items) {
85
+ if (item.rules) {
86
+ out.push(...item.rules);
87
+ continue;
88
+ }
89
+ if (item.build) {
90
+ const rules = await item.build();
91
+ out.push(...rules);
92
+ }
93
+ }
94
+ return out;
95
+ }
96
+ }
97
+ class RuntimeImpl {
98
+ constructor(input) {
99
+ this.channels = new Map();
100
+ this.deferred = new DeferredTracker();
101
+ this.elements = new ElementPatchRegistryImpl();
102
+ this.entry = input.entry;
103
+ this.options = input.options;
104
+ this.pocketOptions = input.pocketOptions;
105
+ }
106
+ publish(t, value) {
107
+ const state = this.channels.get(t.id);
108
+ if (!state || state.closed) {
109
+ // No subscribers or closed: drop.
110
+ return;
111
+ }
112
+ state.hasPublisher = true;
113
+ for (const sub of state.subs) {
114
+ sub.push(value);
115
+ }
116
+ }
117
+ subscribe(t) {
118
+ // subscribe MUST be safe.
119
+ // Important: subscribe must also be able to receive future publishes.
120
+ // If no publisher ever publishes, the iterator will just complete on close.
121
+ if (!this.channels.has(t.id)) {
122
+ this.channels.set(t.id, { hasPublisher: false, subs: new Set(), closed: false });
123
+ }
124
+ const state = this.channels.get(t.id);
125
+ if (!state || state.closed) {
126
+ return emptyAsyncIterable();
127
+ }
128
+ const q = new AsyncQueue();
129
+ const sub = {
130
+ push: (v) => q.push(v),
131
+ close: () => q.close()
132
+ };
133
+ state.subs.add(sub);
134
+ const owner = this;
135
+ return (async function* () {
136
+ try {
137
+ for await (const v of q.iterate()) {
138
+ yield v;
139
+ }
140
+ }
141
+ finally {
142
+ // best-effort cleanup
143
+ const s = owner.channels.get(t.id);
144
+ s?.subs.delete(sub);
145
+ q.close();
146
+ }
147
+ })();
148
+ }
149
+ hasPublisher(t) {
150
+ return this.channels.get(t.id)?.hasPublisher === true;
151
+ }
152
+ defer(promise) {
153
+ this.deferred.add(promise);
154
+ }
155
+ _ensureChannel(t) {
156
+ if (this.channels.has(t.id)) {
157
+ return;
158
+ }
159
+ this.channels.set(t.id, { hasPublisher: false, subs: new Set(), closed: false });
160
+ }
161
+ async _closeAllChannels() {
162
+ for (const state of this.channels.values()) {
163
+ if (state.closed) {
164
+ continue;
165
+ }
166
+ state.closed = true;
167
+ for (const sub of state.subs) {
168
+ sub.close();
169
+ }
170
+ state.subs.clear();
171
+ }
172
+ }
173
+ async _awaitDeferred() {
174
+ await this.deferred.awaitAll();
175
+ }
176
+ }
177
+ export const runCapture = async (input) => {
178
+ const rt = new RuntimeImpl({
179
+ entry: input.entry,
180
+ options: input.options,
181
+ pocketOptions: input.pocketOptions
182
+ });
183
+ for (const ch of input.declaredChannels ?? []) {
184
+ rt._ensureChannel(ch);
185
+ }
186
+ const pluginHost = {
187
+ entry: rt.entry,
188
+ options: rt.options,
189
+ subscribe: (t) => rt.subscribe(t),
190
+ hasPublisher: (t) => rt.hasPublisher(t),
191
+ elements: rt.elements,
192
+ defer: (p) => rt.defer(p)
193
+ };
194
+ const pluginSetupValues = new Map();
195
+ for (const plugin of input.plugins ?? []) {
196
+ const v = await plugin.setup(pluginHost);
197
+ if (typeof v !== "undefined") {
198
+ pluginSetupValues.set(plugin, v);
199
+ }
200
+ }
201
+ const mergePatch = (ctx, patch) => {
202
+ const next = { value: {} };
203
+ for (const [k, v] of Object.entries(patch)) {
204
+ if (k === TERMINAL_RESULT_KEY) {
205
+ continue;
206
+ }
207
+ next.value[k] = v;
208
+ }
209
+ return next;
210
+ };
211
+ let ctx = { value: {} };
212
+ let result;
213
+ try {
214
+ for (const unit of input.units) {
215
+ if (result) {
216
+ break;
217
+ }
218
+ const baseCtx = ctx;
219
+ const boundPlugins = (input.plugins ?? []).filter((plugin) => {
220
+ const unitId = plugin.constructor?.unitId;
221
+ return unitId === unit.id;
222
+ });
223
+ let pluginContributedValue = {};
224
+ if (boundPlugins.length > 0) {
225
+ const pluginResults = await Promise.allSettled(boundPlugins
226
+ .filter((p) => typeof p.contribute === "function")
227
+ .map(async (p) => p.contribute({ value: ctx.value, setupValue: pluginSetupValues.get(p) }, rt)));
228
+ for (const r of pluginResults) {
229
+ if (r.status !== "fulfilled") {
230
+ continue;
231
+ }
232
+ const patch = r.value;
233
+ if (!patch || typeof patch !== "object") {
234
+ continue;
235
+ }
236
+ const patchValue = patch;
237
+ pluginContributedValue = {
238
+ ...pluginContributedValue,
239
+ ...mergePatch({ value: {} }, patchValue).value
240
+ };
241
+ const terminal = patchValue[TERMINAL_RESULT_KEY];
242
+ if (terminal) {
243
+ result = terminal;
244
+ }
245
+ }
246
+ }
247
+ if (result) {
248
+ break;
249
+ }
250
+ const out = (await unit.run(baseCtx, rt)) ?? {};
251
+ const unitReturnValue = out && typeof out === "object" ? out : {};
252
+ const merged = unit.merge(unitReturnValue, pluginContributedValue);
253
+ const mergedPatch = merged && typeof merged === "object" ? merged : {};
254
+ ctx = { value: mergePatch({ value: {} }, mergedPatch).value };
255
+ const terminal = mergedPatch[TERMINAL_RESULT_KEY];
256
+ if (terminal) {
257
+ result = terminal;
258
+ break;
259
+ }
260
+ }
261
+ }
262
+ finally {
263
+ await rt._closeAllChannels();
264
+ await rt._awaitDeferred();
265
+ }
266
+ if (!result) {
267
+ throw new Error("No terminal result. Ensure a terminal unit returns a CaptureResult or calls finish().");
268
+ }
269
+ return result;
270
+ };
@@ -0,0 +1,39 @@
1
+ import type { CompletionStrategy, ContentStore, PathResolver, ResourceFilter } from "../types.js";
2
+ export type EntryInfo = {
3
+ kind: "url";
4
+ url: string;
5
+ } | {
6
+ kind: "puppeteer-page";
7
+ page: unknown;
8
+ url?: string;
9
+ } | {
10
+ kind: "cdp-tab";
11
+ tabId: number;
12
+ url?: string;
13
+ } | {
14
+ kind: "html-string";
15
+ htmlString: string;
16
+ baseUrl: string;
17
+ url?: string;
18
+ } | {
19
+ kind: "document";
20
+ baseUrl: string;
21
+ url?: string;
22
+ };
23
+ export interface CaptureOptions {
24
+ completion?: CompletionStrategy | CompletionStrategy[];
25
+ timeoutMs?: number;
26
+ maxDurationMs?: number;
27
+ filter?: ResourceFilter;
28
+ blacklist?: RegExp[];
29
+ pathResolver?: PathResolver;
30
+ contentStore?: ContentStore;
31
+ rewriteEntry?: boolean;
32
+ rewriteCSS?: boolean;
33
+ limits?: {
34
+ maxTotalBytes?: number;
35
+ maxSingleResourceBytes?: number;
36
+ maxResources?: number;
37
+ };
38
+ }
39
+ export type PagePocketOptions = import("../types.js").PagePocketOptions;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,2 @@
1
+ export declare const streamToUint8Array: (stream: ReadableStream<Uint8Array>) => Promise<Uint8Array<ArrayBuffer>>;
2
+ export declare const streamFromUint8Array: (bytes: Uint8Array) => ReadableStream<Uint8Array>;
@@ -0,0 +1,29 @@
1
+ export const streamToUint8Array = async (stream) => {
2
+ const reader = stream.getReader();
3
+ const chunks = [];
4
+ let total = 0;
5
+ while (true) {
6
+ const result = await reader.read();
7
+ if (result.done) {
8
+ break;
9
+ }
10
+ const value = result.value;
11
+ if (value) {
12
+ chunks.push(value);
13
+ total += value.byteLength;
14
+ }
15
+ }
16
+ const output = new Uint8Array(total);
17
+ let offset = 0;
18
+ for (const chunk of chunks) {
19
+ output.set(chunk, offset);
20
+ offset += chunk.byteLength;
21
+ }
22
+ return output;
23
+ };
24
+ export const streamFromUint8Array = (bytes) => new ReadableStream({
25
+ start(controller) {
26
+ controller.enqueue(bytes);
27
+ controller.close();
28
+ }
29
+ });
package/dist/utils.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { BodySource } from "./types";
1
+ import type { BodySource } from "./types.js";
2
2
  export declare const sleep: (ms: number) => Promise<unknown>;
3
3
  export declare const hashString: (value: string) => string;
4
4
  export declare const stripLeadingSlash: (value: string) => string;
@@ -17,3 +17,37 @@ export declare const bodyToTextOrBase64: (bytes: Uint8Array, mimeType?: string)
17
17
  base64: string;
18
18
  text?: undefined;
19
19
  };
20
+ type UrlEquivalentOptions = {
21
+ /**
22
+ * Base URL used to resolve relative URLs.
23
+ *
24
+ * If omitted, both inputs must already be absolute URLs.
25
+ */
26
+ baseUrl?: string;
27
+ /**
28
+ * Ignore URL hash (fragment) when comparing.
29
+ *
30
+ * Default: true
31
+ */
32
+ ignoreHash?: boolean;
33
+ /**
34
+ * Treat default ports as equivalent (http:80, https:443).
35
+ *
36
+ * Default: true
37
+ */
38
+ ignoreDefaultPort?: boolean;
39
+ /**
40
+ * Normalize trailing slashes in pathname when comparing.
41
+ *
42
+ * Default: true
43
+ */
44
+ normalizeTrailingSlash?: boolean;
45
+ };
46
+ /**
47
+ * Compare two URLs for semantic equivalence under conservative normalization.
48
+ *
49
+ * This is intended to be shared by both static resource mapping (resources_path.json)
50
+ * and API matching (api.json), so it only returns boolean and avoids lossy heuristics.
51
+ */
52
+ export declare const urlEquivalent: (a: string, b: string, options?: UrlEquivalentOptions) => boolean;
53
+ export {};
package/dist/utils.js CHANGED
@@ -1,12 +1,8 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.bodyToTextOrBase64 = exports.toUint8Array = exports.isUtf8Text = exports.decodeUtf8 = exports.bytesToBase64 = exports.sanitizePosixPath = exports.ensureLeadingSlash = exports.stripLeadingSlash = exports.hashString = exports.sleep = void 0;
4
- const content_type_1 = require("./content-type");
1
+ import { isTextResponse } from "./content-type.js";
5
2
  const FNV_OFFSET = 0x811c9dc5;
6
3
  const FNV_PRIME = 0x01000193;
7
- const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
8
- exports.sleep = sleep;
9
- const hashString = (value) => {
4
+ export const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
5
+ export const hashString = (value) => {
10
6
  let hash = FNV_OFFSET;
11
7
  for (let i = 0; i < value.length; i += 1) {
12
8
  hash ^= value.charCodeAt(i);
@@ -14,12 +10,9 @@ const hashString = (value) => {
14
10
  }
15
11
  return hash.toString(16).padStart(8, "0");
16
12
  };
17
- exports.hashString = hashString;
18
- const stripLeadingSlash = (value) => value.replace(/^\/+/, "");
19
- exports.stripLeadingSlash = stripLeadingSlash;
20
- const ensureLeadingSlash = (value) => (value.startsWith("/") ? value : `/${value}`);
21
- exports.ensureLeadingSlash = ensureLeadingSlash;
22
- const sanitizePosixPath = (value) => {
13
+ export const stripLeadingSlash = (value) => value.replace(/^\/+/, "");
14
+ export const ensureLeadingSlash = (value) => (value.startsWith("/") ? value : `/${value}`);
15
+ export const sanitizePosixPath = (value) => {
23
16
  const parts = value.split("/").filter(Boolean);
24
17
  const clean = [];
25
18
  for (const part of parts) {
@@ -31,11 +24,10 @@ const sanitizePosixPath = (value) => {
31
24
  }
32
25
  return clean.join("/");
33
26
  };
34
- exports.sanitizePosixPath = sanitizePosixPath;
35
27
  const getGlobalBuffer = () => {
36
28
  return globalThis.Buffer;
37
29
  };
38
- const bytesToBase64 = (bytes) => {
30
+ export const bytesToBase64 = (bytes) => {
39
31
  const BufferCtor = getGlobalBuffer();
40
32
  if (BufferCtor) {
41
33
  return BufferCtor.from(bytes).toString("base64");
@@ -48,8 +40,7 @@ const bytesToBase64 = (bytes) => {
48
40
  }
49
41
  return btoa(binary);
50
42
  };
51
- exports.bytesToBase64 = bytesToBase64;
52
- const decodeUtf8 = (bytes) => {
43
+ export const decodeUtf8 = (bytes) => {
53
44
  try {
54
45
  const decoder = new TextDecoder("utf-8", { fatal: true });
55
46
  return decoder.decode(bytes);
@@ -58,19 +49,17 @@ const decodeUtf8 = (bytes) => {
58
49
  return null;
59
50
  }
60
51
  };
61
- exports.decodeUtf8 = decodeUtf8;
62
- const isUtf8Text = (bytes, mimeType) => {
63
- const decoded = (0, exports.decodeUtf8)(bytes);
52
+ export const isUtf8Text = (bytes, mimeType) => {
53
+ const decoded = decodeUtf8(bytes);
64
54
  if (decoded === null) {
65
55
  return false;
66
56
  }
67
57
  if (mimeType) {
68
- return (0, content_type_1.isTextResponse)(mimeType);
58
+ return isTextResponse(mimeType);
69
59
  }
70
60
  return true;
71
61
  };
72
- exports.isUtf8Text = isUtf8Text;
73
- const toUint8Array = async (body) => {
62
+ export const toUint8Array = async (body) => {
74
63
  if (body.kind === "buffer") {
75
64
  return body.data;
76
65
  }
@@ -98,12 +87,101 @@ const toUint8Array = async (body) => {
98
87
  }
99
88
  return output;
100
89
  };
101
- exports.toUint8Array = toUint8Array;
102
- const bodyToTextOrBase64 = (bytes, mimeType) => {
103
- if ((0, exports.isUtf8Text)(bytes, mimeType)) {
104
- const text = (0, exports.decodeUtf8)(bytes) ?? "";
90
+ // Prefer streamFromUint8Array from "./utils/streams".
91
+ export const bodyToTextOrBase64 = (bytes, mimeType) => {
92
+ if (isUtf8Text(bytes, mimeType)) {
93
+ const text = decodeUtf8(bytes) ?? "";
105
94
  return { encoding: "text", text };
106
95
  }
107
- return { encoding: "base64", base64: (0, exports.bytesToBase64)(bytes) };
96
+ return { encoding: "base64", base64: bytesToBase64(bytes) };
97
+ };
98
+ const toUrlOrNull = (input, baseUrl) => {
99
+ try {
100
+ if (baseUrl) {
101
+ return new URL(input, baseUrl);
102
+ }
103
+ return new URL(input);
104
+ }
105
+ catch {
106
+ return null;
107
+ }
108
+ };
109
+ const stripTrailingSlash = (value) => value.replace(/\/+$/, "");
110
+ const normalizePortForCompare = (url, ignoreDefaultPort) => {
111
+ if (!ignoreDefaultPort) {
112
+ return url.port;
113
+ }
114
+ const protocol = url.protocol;
115
+ const port = url.port;
116
+ if (!port) {
117
+ return "";
118
+ }
119
+ if (protocol === "http:" && port === "80") {
120
+ return "";
121
+ }
122
+ if (protocol === "https:" && port === "443") {
123
+ return "";
124
+ }
125
+ return port;
126
+ };
127
+ const areProtocolsEquivalent = (a, b) => {
128
+ if (a === b) {
129
+ return true;
130
+ }
131
+ // Treat http/https as equivalent for matching.
132
+ if ((a === "http:" || a === "https:") && (b === "http:" || b === "https:")) {
133
+ return true;
134
+ }
135
+ return false;
136
+ };
137
+ /**
138
+ * Compare two URLs for semantic equivalence under conservative normalization.
139
+ *
140
+ * This is intended to be shared by both static resource mapping (resources_path.json)
141
+ * and API matching (api.json), so it only returns boolean and avoids lossy heuristics.
142
+ */
143
+ export const urlEquivalent = (a, b, options = {}) => {
144
+ const { baseUrl, ignoreHash = true, ignoreDefaultPort = true, normalizeTrailingSlash: shouldNormalizeTrailingSlash = true } = options;
145
+ const left = toUrlOrNull(String(a), baseUrl);
146
+ const right = toUrlOrNull(String(b), baseUrl);
147
+ if (!left || !right) {
148
+ return false;
149
+ }
150
+ // Exclude non-network schemes from equivalence checks.
151
+ if (left.protocol === "data:" || left.protocol === "blob:") {
152
+ return false;
153
+ }
154
+ if (right.protocol === "data:" || right.protocol === "blob:") {
155
+ return false;
156
+ }
157
+ if (!areProtocolsEquivalent(left.protocol, right.protocol)) {
158
+ return false;
159
+ }
160
+ // Compare host and port (with optional default-port normalization).
161
+ const leftHost = left.hostname;
162
+ const rightHost = right.hostname;
163
+ if (leftHost !== rightHost) {
164
+ return false;
165
+ }
166
+ const leftPort = normalizePortForCompare(left, ignoreDefaultPort);
167
+ const rightPort = normalizePortForCompare(right, ignoreDefaultPort);
168
+ if (leftPort !== rightPort) {
169
+ return false;
170
+ }
171
+ const leftPath = shouldNormalizeTrailingSlash ? stripTrailingSlash(left.pathname) : left.pathname;
172
+ const rightPath = shouldNormalizeTrailingSlash
173
+ ? stripTrailingSlash(right.pathname)
174
+ : right.pathname;
175
+ if (leftPath !== rightPath) {
176
+ return false;
177
+ }
178
+ if (left.search !== right.search) {
179
+ return false;
180
+ }
181
+ if (!ignoreHash) {
182
+ if (left.hash !== right.hash) {
183
+ return false;
184
+ }
185
+ }
186
+ return true;
108
187
  };
109
- exports.bodyToTextOrBase64 = bodyToTextOrBase64;
@@ -0,0 +1,69 @@
1
+ import type { ArtifactToken, ChannelToken, ReplaceElementsConfig } from "@pagepocket/contracts";
2
+ export type CaptureResult = {
3
+ kind: "raw";
4
+ outputDir: string;
5
+ meta?: unknown;
6
+ } | {
7
+ kind: "zip";
8
+ zip: {
9
+ data: Uint8Array | Blob;
10
+ outputPath: string;
11
+ };
12
+ meta?: unknown;
13
+ } | {
14
+ kind: "html";
15
+ html: string;
16
+ meta?: unknown;
17
+ } | {
18
+ kind: "text";
19
+ text: string;
20
+ meta?: unknown;
21
+ } | {
22
+ kind: "custom";
23
+ value: unknown;
24
+ };
25
+ export type DeferredHandle = Promise<unknown>;
26
+ export interface ElementPatchRegistry {
27
+ contribute(source: {
28
+ type: "unit" | "plugin";
29
+ name: string;
30
+ }, rules: ReplaceElementsConfig): void;
31
+ contributeLazy(source: {
32
+ type: "unit" | "plugin";
33
+ name: string;
34
+ }, build: () => ReplaceElementsConfig | Promise<ReplaceElementsConfig>): void;
35
+ compile(): Promise<ReplaceElementsConfig>;
36
+ }
37
+ export interface UnitRuntime {
38
+ readonly entry: import("./types.js").EntryInfo;
39
+ readonly options: import("./types.js").CaptureOptions;
40
+ readonly pocketOptions: import("./types.js").PagePocketOptions;
41
+ provide<T>(t: ArtifactToken<T>, value: T): void;
42
+ get<T>(t: ArtifactToken<T>): T | undefined;
43
+ require<T>(t: ArtifactToken<T>): T;
44
+ publish<T>(t: ChannelToken<T>, value: T): void;
45
+ subscribe<T>(t: ChannelToken<T>): AsyncIterable<T>;
46
+ hasPublisher(t: ChannelToken<unknown>): boolean;
47
+ readonly elements: ElementPatchRegistry;
48
+ finish(result: CaptureResult): void;
49
+ defer(promise: DeferredHandle): void;
50
+ }
51
+ export interface PluginHost {
52
+ readonly entry: import("./types.js").EntryInfo;
53
+ readonly options: import("./types.js").CaptureOptions;
54
+ subscribe<T>(t: ChannelToken<T>): AsyncIterable<T>;
55
+ hasPublisher(t: ChannelToken<unknown>): boolean;
56
+ readonly elements: ElementPatchRegistry;
57
+ defer(promise: DeferredHandle): void;
58
+ }
59
+ export declare abstract class Unit {
60
+ abstract readonly id: string;
61
+ abstract readonly kind: string;
62
+ protected rt: UnitRuntime;
63
+ attach(rt: UnitRuntime): void;
64
+ abstract run(): Promise<void | CaptureResult>;
65
+ }
66
+ export interface Plugin {
67
+ readonly name: string;
68
+ setup(host: PluginHost): void | Promise<void>;
69
+ }
@@ -0,0 +1,5 @@
1
+ export class Unit {
2
+ attach(rt) {
3
+ this.rt = rt;
4
+ }
5
+ }
@@ -0,0 +1,4 @@
1
+ export type { CaptureOptions, EntryInfo } from "./types.js";
2
+ export type { CaptureResult, Plugin, PluginHost, UnitRuntime } from "./contracts-bridge.js";
3
+ export { Unit } from "./contracts-bridge.js";
4
+ export { runCaptureV3 as runCapture } from "./runner.js";
@@ -0,0 +1,2 @@
1
+ export { Unit } from "./contracts-bridge.js";
2
+ export { runCaptureV3 as runCapture } from "./runner.js";
@@ -0,0 +1,20 @@
1
+ import type { ChannelToken } from "@pagepocket/contracts";
2
+ import type { Plugin, Unit } from "./contracts-bridge.js";
3
+ import { type CaptureResult } from "./contracts-bridge.js";
4
+ import type { CaptureOptions, EntryInfo, PagePocketOptions } from "./types.js";
5
+ export declare const runCaptureV3: (input: {
6
+ entry: EntryInfo;
7
+ pocketOptions: PagePocketOptions;
8
+ options: CaptureOptions;
9
+ units: Unit[];
10
+ plugins?: Plugin[];
11
+ declaredChannels?: ChannelToken<unknown>[];
12
+ }) => Promise<CaptureResult>;
13
+ export declare const runCapture: (input: {
14
+ entry: EntryInfo;
15
+ pocketOptions: PagePocketOptions;
16
+ options: CaptureOptions;
17
+ units: Unit[];
18
+ plugins?: Plugin[];
19
+ declaredChannels?: ChannelToken<unknown>[];
20
+ }) => Promise<CaptureResult>;