@pagepocket/lib 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,108 @@
1
+ import type { NetworkEvent, NetworkInterceptorAdapter } from "@pagepocket/interceptor";
2
+ import type { ContentStore, PagePocketOptions, PathResolver, ReplaceElementsConfig, ResourceFilter } from "../../../types";
3
+ export type HtmlArtifact = {
4
+ baseUrl: string;
5
+ url?: string;
6
+ contentType?: string;
7
+ htmlString: string;
8
+ };
9
+ export type EntryInfo = {
10
+ kind: "url";
11
+ url: string;
12
+ } | {
13
+ kind: "puppeteer-page";
14
+ page: unknown;
15
+ url?: string;
16
+ } | {
17
+ kind: "cdp-tab";
18
+ tabId: number;
19
+ url?: string;
20
+ } | {
21
+ kind: "html-string";
22
+ baseUrl: string;
23
+ url?: string;
24
+ } | {
25
+ kind: "document";
26
+ baseUrl: string;
27
+ url?: string;
28
+ };
29
+ export type PagePocketCaptureResult = {
30
+ kind: "raw";
31
+ outputDir: string;
32
+ meta?: unknown;
33
+ } | {
34
+ kind: "zip";
35
+ zip: {
36
+ data: Uint8Array | Blob;
37
+ outputPath: string;
38
+ };
39
+ meta?: unknown;
40
+ } | {
41
+ kind: "html";
42
+ html: string;
43
+ meta?: unknown;
44
+ } | {
45
+ kind: "text";
46
+ text: string;
47
+ meta?: unknown;
48
+ } | {
49
+ kind: "custom";
50
+ value: unknown;
51
+ };
52
+ export interface CaptureWithPluginsOptions {
53
+ /** Needed only if plugins rely on network interception. */
54
+ interceptor?: NetworkInterceptorAdapter;
55
+ /** Plugins are executed sequentially in this order. */
56
+ plugins: PagePocketPlugin[];
57
+ /** Network completion. */
58
+ completion?: import("../../../types").CompletionStrategy | import("../../../types").CompletionStrategy[];
59
+ timeoutMs?: number;
60
+ maxDurationMs?: number;
61
+ /** Snapshot-related config (used by BuildSnapshotPlugin). */
62
+ filter?: ResourceFilter;
63
+ blacklist?: RegExp[];
64
+ pathResolver?: PathResolver;
65
+ contentStore?: ContentStore;
66
+ rewriteEntry?: boolean;
67
+ rewriteCSS?: boolean;
68
+ limits?: {
69
+ maxTotalBytes?: number;
70
+ maxSingleResourceBytes?: number;
71
+ maxResources?: number;
72
+ };
73
+ }
74
+ export interface PagePocketPlugin {
75
+ readonly name: string;
76
+ enabled?: boolean;
77
+ /** Register hook handlers on the context. */
78
+ apply(ctx: PagePocketContext): void | Promise<void>;
79
+ }
80
+ export interface PagePocketContext extends Record<string, unknown> {
81
+ entry: EntryInfo;
82
+ options: CaptureWithPluginsOptions;
83
+ pocketOptions: PagePocketOptions;
84
+ /** plugin shared mutable bag */
85
+ state: Record<string, unknown>;
86
+ html?: HtmlArtifact;
87
+ whenHtml(): Promise<HtmlArtifact>;
88
+ whenHtmlHooksDone(): Promise<void>;
89
+ onInit(handler: () => void | Promise<void>): void;
90
+ onHtml(handler: (payload: {
91
+ html: HtmlArtifact;
92
+ }) => void | Promise<void>): void;
93
+ onBeforeNetwork(handler: () => void | Promise<void>): void;
94
+ onNetworkEvent(handler: (event: NetworkEvent) => void | Promise<void>): void;
95
+ onAfterNetwork(handler: () => void | Promise<void>): void;
96
+ onFinalize(handler: () => void | PagePocketCaptureResult | Promise<void | PagePocketCaptureResult>): void;
97
+ /** Allow plugins to update the HTML artifact. */
98
+ setHtml(next: HtmlArtifact): void;
99
+ /** Register replaceElements rules for snapshot build. */
100
+ addReplaceElements(rules: ReplaceElementsConfig): void;
101
+ /** optional: broadcast network events */
102
+ network?: {
103
+ subscribe(): AsyncIterable<NetworkEvent>;
104
+ };
105
+ result?: PagePocketCaptureResult;
106
+ finish(result: PagePocketCaptureResult): void;
107
+ defer(promise: Promise<unknown>): void;
108
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,9 @@
1
+ import type { PagePocketContextInternal } from "./context.js";
2
+ import type { PagePocketPlugin } from "./types.js";
3
+ export declare const runHandlersSequentially: <TArgs extends unknown[]>(handlers: Array<(...args: TArgs) => void | Promise<void>>, args: TArgs, ctx: PagePocketContextInternal) => Promise<void>;
4
+ export declare const runFinalizeHandlers: (ctx: PagePocketContextInternal) => Promise<void>;
5
+ export declare const pluginLabel: (plugin: PagePocketPlugin) => {
6
+ name: string;
7
+ ctorName: string | undefined;
8
+ enabled: boolean;
9
+ };
@@ -0,0 +1,29 @@
1
+ export const runHandlersSequentially = async (handlers, args, ctx) => {
2
+ for (const handler of handlers) {
3
+ if (ctx._isFinished()) {
4
+ return;
5
+ }
6
+ await handler(...args);
7
+ }
8
+ };
9
+ export const runFinalizeHandlers = async (ctx) => {
10
+ for (const handler of ctx._hooks.finalize) {
11
+ if (ctx._isFinished()) {
12
+ return;
13
+ }
14
+ const result = await handler();
15
+ if (result) {
16
+ ctx.finish(result);
17
+ return;
18
+ }
19
+ }
20
+ };
21
+ export const pluginLabel = (plugin) => {
22
+ const ctor = plugin.constructor;
23
+ const ctorName = ctor && typeof ctor.name === "string" ? ctor.name : undefined;
24
+ return {
25
+ name: plugin.name,
26
+ ctorName,
27
+ enabled: true
28
+ };
29
+ };
@@ -0,0 +1,12 @@
1
+ import type { NetworkEvent } from "../../types.js";
2
+ import type { CaptureWithPluginsOptions, PagePocketCaptureResult } from "./types.js";
3
+ type RunnerInput = {
4
+ pocket: {
5
+ interceptedRequestEvents(): AsyncIterable<NetworkEvent>;
6
+ };
7
+ pocketOptions: import("../../types.js").PagePocketOptions;
8
+ target: import("../pagepocket.js").CaptureTarget;
9
+ options: CaptureWithPluginsOptions;
10
+ };
11
+ export declare const captureWithPlugins: (input: RunnerInput) => Promise<PagePocketCaptureResult>;
12
+ export {};
@@ -0,0 +1,118 @@
1
+ import { debugLog } from "../debug.js";
2
+ import { createPagePocketContext, resolveEnabledPlugins } from "./context.js";
3
+ import { pluginLabel, runFinalizeHandlers, runHandlersSequentially } from "./runner-utils.js";
4
+ // helpers moved to ./runner-utils
5
+ export const captureWithPlugins = async (input) => {
6
+ debugLog("[pagepocket][plugin-runner] capture start", {
7
+ targetKind: input.target.kind,
8
+ pluginCount: input.options.plugins.length
9
+ });
10
+ const plugins = input.options.plugins;
11
+ const ctx = createPagePocketContext({
12
+ entry: input.target.kind === "url"
13
+ ? { kind: "url", url: input.target.url }
14
+ : input.target.kind === "puppeteer-page"
15
+ ? { kind: "puppeteer-page", page: input.target.page }
16
+ : input.target.kind === "cdp-tab"
17
+ ? { kind: "cdp-tab", tabId: input.target.tabId }
18
+ : input.target.kind === "html"
19
+ ? { kind: "html-string", baseUrl: input.target.baseUrl, url: input.target.url }
20
+ : (() => {
21
+ throw new Error(`Unsupported target kind: ${input.target.kind}`);
22
+ })(),
23
+ options: input.options,
24
+ pocketOptions: input.pocketOptions
25
+ });
26
+ // For pure-HTML entrypoints, provide the HTML artifact immediately so plugins
27
+ // can rely on ctx.whenHtml() during init.
28
+ if (input.target.kind === "html") {
29
+ ctx.setHtml({
30
+ htmlString: input.target.htmlString,
31
+ baseUrl: input.target.baseUrl,
32
+ url: input.target.url,
33
+ contentType: "text/html"
34
+ });
35
+ }
36
+ // Allow http capture plugins to broadcast events into both:
37
+ // - ctx.onNetworkEvent hooks
38
+ // - PagePocket.interceptedRequestEvents() subscribers
39
+ ctx.emitNetworkEvent = (event) => {
40
+ void runHandlersSequentially(ctx._hooks.networkEvent, [event], ctx);
41
+ input.pocket._emitNetworkEvent?.(event);
42
+ };
43
+ debugLog("[pagepocket][plugin-runner] plugins configured", plugins.map(pluginLabel));
44
+ const allPlugins = await resolveEnabledPlugins(plugins, ctx);
45
+ debugLog("[pagepocket][plugin-runner] plugins resolved", allPlugins.map(pluginLabel));
46
+ for (const plugin of allPlugins) {
47
+ if (ctx._isFinished()) {
48
+ break;
49
+ }
50
+ debugLog("[pagepocket][plugin-runner] plugin.apply start", pluginLabel(plugin));
51
+ await plugin.apply(ctx);
52
+ debugLog("[pagepocket][plugin-runner] plugin.apply done", pluginLabel(plugin));
53
+ }
54
+ debugLog("[pagepocket][plugin-runner] phase=init start", { count: ctx._hooks.init.length });
55
+ await runHandlersSequentially(ctx._hooks.init, [], ctx);
56
+ debugLog("[pagepocket][plugin-runner] phase=init done");
57
+ debugLog("[pagepocket][plugin-runner] phase=waitForHtml start");
58
+ const html = await ctx.whenHtml();
59
+ debugLog("[pagepocket][plugin-runner] phase=waitForHtml done", {
60
+ url: html.url,
61
+ baseUrl: html.baseUrl,
62
+ contentType: html.contentType,
63
+ htmlBytes: html.htmlString.length
64
+ });
65
+ debugLog("[pagepocket][plugin-runner] phase=html start", { count: ctx._hooks.html.length });
66
+ try {
67
+ await runHandlersSequentially(ctx._hooks.html, [{ html }], ctx);
68
+ ctx._htmlHooksDoneBarrier.resolve();
69
+ }
70
+ catch (error) {
71
+ const err = error instanceof Error ? error : new Error(String(error));
72
+ ctx._htmlHooksDoneBarrier.reject(err);
73
+ throw err;
74
+ }
75
+ debugLog("[pagepocket][plugin-runner] phase=html done");
76
+ if (ctx._isFinished()) {
77
+ debugLog("[pagepocket][plugin-runner] early finish after html", {
78
+ resultKind: ctx.result?.kind
79
+ });
80
+ debugLog("[pagepocket][plugin-runner] phase=finalize start", {
81
+ count: ctx._hooks.finalize.length
82
+ });
83
+ await runFinalizeHandlers(ctx);
84
+ debugLog("[pagepocket][plugin-runner] phase=finalize done", { resultKind: ctx.result?.kind });
85
+ debugLog("[pagepocket][plugin-runner] awaiting deferred work");
86
+ await ctx._deferred.awaitAll();
87
+ if (!ctx.result) {
88
+ throw new Error("No plugin produced a terminal result.");
89
+ }
90
+ return ctx.result;
91
+ }
92
+ debugLog("[pagepocket][plugin-runner] phase=beforeNetwork start", {
93
+ count: ctx._hooks.beforeNetwork.length
94
+ });
95
+ await runHandlersSequentially(ctx._hooks.beforeNetwork, [], ctx);
96
+ debugLog("[pagepocket][plugin-runner] phase=beforeNetwork done");
97
+ // Note: completion waiting and inflight tracking are now the responsibility
98
+ // of the http capture plugins (they own interception).
99
+ debugLog("[pagepocket][plugin-runner] phase=afterNetwork start", {
100
+ count: ctx._hooks.afterNetwork.length
101
+ });
102
+ await runHandlersSequentially(ctx._hooks.afterNetwork, [], ctx);
103
+ debugLog("[pagepocket][plugin-runner] phase=afterNetwork done");
104
+ debugLog("[pagepocket][plugin-runner] phase=finalize start", {
105
+ count: ctx._hooks.finalize.length
106
+ });
107
+ await runFinalizeHandlers(ctx);
108
+ debugLog("[pagepocket][plugin-runner] phase=finalize done", { resultKind: ctx.result?.kind });
109
+ debugLog("[pagepocket][plugin-runner] awaiting deferred work");
110
+ await ctx._deferred.awaitAll();
111
+ if (!ctx.result) {
112
+ throw new Error("No plugin produced a terminal result.");
113
+ }
114
+ debugLog("[pagepocket][plugin-runner] capture done", { resultKind: ctx.result.kind });
115
+ // Close any external observers.
116
+ input.pocket._closeNetworkEventStream?.();
117
+ return ctx.result;
118
+ };
@@ -0,0 +1,117 @@
1
+ import type { NetworkEvent } from "../../types.js";
2
+ import type { ContentStore, PagePocketOptions, PathResolver, ReplaceElementsConfig, ResourceFilter } from "../../types.js";
3
+ export type CaptureTarget = import("../pagepocket.js").CaptureTarget;
4
+ export type HtmlArtifact = {
5
+ baseUrl: string;
6
+ url?: string;
7
+ contentType?: string;
8
+ htmlString: string;
9
+ };
10
+ export type EntryInfo = {
11
+ kind: "url";
12
+ url: string;
13
+ } | {
14
+ kind: "puppeteer-page";
15
+ page: unknown;
16
+ url?: string;
17
+ } | {
18
+ kind: "cdp-tab";
19
+ tabId: number;
20
+ url?: string;
21
+ } | {
22
+ kind: "html-string";
23
+ baseUrl: string;
24
+ url?: string;
25
+ } | {
26
+ kind: "document";
27
+ baseUrl: string;
28
+ url?: string;
29
+ };
30
+ export type PagePocketCaptureResult = {
31
+ kind: "raw";
32
+ outputDir: string;
33
+ meta?: unknown;
34
+ } | {
35
+ kind: "zip";
36
+ zip: {
37
+ data: Uint8Array | Blob;
38
+ outputPath: string;
39
+ };
40
+ meta?: unknown;
41
+ } | {
42
+ kind: "html";
43
+ html: string;
44
+ meta?: unknown;
45
+ } | {
46
+ kind: "text";
47
+ text: string;
48
+ meta?: unknown;
49
+ } | {
50
+ kind: "custom";
51
+ value: unknown;
52
+ };
53
+ export interface CaptureWithPluginsOptions {
54
+ /** Plugins are executed sequentially in this order. */
55
+ plugins: PagePocketPlugin[];
56
+ /**
57
+ * Break-change: Units/Plugins API (v3 runner).
58
+ *
59
+ * If `units` is provided (or `pluginsV3`), PagePocket.capture() will run the
60
+ * units runner and ignore legacy `plugins`.
61
+ */
62
+ units?: import("../../v3/contracts-bridge.js").Unit[];
63
+ /** v3 plugins (isolated, contributes replace-elements only). */
64
+ pluginsV3?: import("../../v3/contracts-bridge.js").Plugin[];
65
+ /** Network completion. */
66
+ completion?: import("../../types.js").CompletionStrategy | import("../../types.js").CompletionStrategy[];
67
+ timeoutMs?: number;
68
+ maxDurationMs?: number;
69
+ /** Snapshot-related config (used by BuildSnapshotPlugin). */
70
+ filter?: ResourceFilter;
71
+ blacklist?: RegExp[];
72
+ pathResolver?: PathResolver;
73
+ contentStore?: ContentStore;
74
+ rewriteEntry?: boolean;
75
+ rewriteCSS?: boolean;
76
+ limits?: {
77
+ maxTotalBytes?: number;
78
+ maxSingleResourceBytes?: number;
79
+ maxResources?: number;
80
+ };
81
+ }
82
+ export interface PagePocketPlugin {
83
+ readonly name: string;
84
+ /** Register hook handlers on the context. */
85
+ apply(ctx: PagePocketContext): void | Promise<void>;
86
+ }
87
+ export interface PagePocketContext extends Record<string, unknown> {
88
+ entry: EntryInfo;
89
+ options: CaptureWithPluginsOptions;
90
+ pocketOptions: PagePocketOptions;
91
+ /** plugin shared mutable bag */
92
+ state: Record<string, unknown>;
93
+ html?: HtmlArtifact;
94
+ whenHtml(): Promise<HtmlArtifact>;
95
+ whenHtmlHooksDone(): Promise<void>;
96
+ onInit(handler: () => void | Promise<void>): void;
97
+ onHtml(handler: (payload: {
98
+ html: HtmlArtifact;
99
+ }) => void | Promise<void>): void;
100
+ onBeforeNetwork(handler: () => void | Promise<void>): void;
101
+ onNetworkEvent(handler: (event: NetworkEvent) => void | Promise<void>): void;
102
+ onAfterNetwork(handler: () => void | Promise<void>): void;
103
+ onFinalize(handler: () => void | PagePocketCaptureResult | Promise<void | PagePocketCaptureResult>): void;
104
+ /** Allow plugins to update the HTML artifact. */
105
+ setHtml(next: HtmlArtifact): void;
106
+ /** Register replaceElements rules for snapshot build. */
107
+ addReplaceElements(rules: ReplaceElementsConfig): void;
108
+ /** optional: broadcast network events */
109
+ network?: {
110
+ subscribe(): AsyncIterable<NetworkEvent>;
111
+ };
112
+ /** optional: emit a network event to observers */
113
+ emitNetworkEvent?: (event: NetworkEvent) => void;
114
+ result?: PagePocketCaptureResult;
115
+ finish(result: PagePocketCaptureResult): void;
116
+ defer(promise: Promise<unknown>): void;
117
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,14 @@
1
+ import type { CaptureCapabilities, CaptureEvent } from "../capture/types";
2
+ import type { HtmlArtifact } from "../plugin/types";
3
+ export type CaptureRuntimeSession = {
4
+ /** Resolve the HTML milestone for this capture run. */
5
+ waitForHtml(): Promise<HtmlArtifact>;
6
+ /** Begin network capture. Runner calls this after beforeNetwork hooks. */
7
+ startCapture(): Promise<void>;
8
+ /** Stream of capture events produced by the runtime. */
9
+ events(): AsyncIterable<CaptureEvent>;
10
+ /** Stop the runtime and release any held resources. */
11
+ stop(): Promise<void>;
12
+ /** Optional fidelity/capabilities metadata for downstream plugins. */
13
+ capabilities?: CaptureCapabilities;
14
+ };
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,6 +1,3 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.rewriteCssText = void 0;
4
1
  const URL_PATTERN = /url\(\s*(['"]?)([^'")]+)\1\s*\)/g;
5
2
  const IMPORT_PATTERN = /@import\s+(?:url\()?['"]?([^'")]+)['"]?\)?/g;
6
3
  const shouldSkipValue = (value) => {
@@ -13,7 +10,7 @@ const shouldSkipValue = (value) => {
13
10
  trimmed.startsWith("javascript:") ||
14
11
  trimmed.startsWith("#"));
15
12
  };
16
- const rewriteCssText = async (input) => {
13
+ export const rewriteCssText = async (input) => {
17
14
  const { cssText, cssUrl, resolveUrl } = input;
18
15
  let updated = "";
19
16
  let lastIndex = 0;
@@ -72,4 +69,3 @@ const rewriteCssText = async (input) => {
72
69
  final += updated.slice(lastIndex);
73
70
  return final;
74
71
  };
75
- exports.rewriteCssText = rewriteCssText;
package/dist/debug.d.ts CHANGED
@@ -1,2 +1 @@
1
- export declare const debug_log: (...args: unknown[]) => void;
2
1
  export declare const debugLog: (...args: unknown[]) => void;
package/dist/debug.js CHANGED
@@ -1,18 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.debugLog = exports.debug_log = void 0;
3
+ exports.debugLog = void 0;
4
4
  const isDebugEnabled = () => {
5
5
  const globalProcess = globalThis
6
6
  .process;
7
7
  const value = globalProcess?.env?.PAGEPOCKET_DEBUG;
8
8
  return Boolean(value);
9
9
  };
10
- const debug_log = (...args) => {
10
+ const debugLog = (...args) => {
11
11
  if (!isDebugEnabled()) {
12
12
  return;
13
13
  }
14
- // eslint-disable-next-line no-console
15
14
  console.log(...args);
16
15
  };
17
- exports.debug_log = debug_log;
18
- exports.debugLog = exports.debug_log;
16
+ exports.debugLog = debugLog;
@@ -0,0 +1,41 @@
1
+ import type { ContentRef } from "../types";
2
+ export type FileTreeSource = {
3
+ kind: "bytes";
4
+ data: Uint8Array;
5
+ } | {
6
+ kind: "text";
7
+ text: string;
8
+ } | {
9
+ kind: "content-ref";
10
+ ref: ContentRef;
11
+ };
12
+ export type FileTreeFile = {
13
+ kind: "file";
14
+ /** Path relative to the root (posix-ish, may include '/'). */
15
+ path: string;
16
+ source: FileTreeSource;
17
+ };
18
+ export type FileTreeDirectory = {
19
+ kind: "directory";
20
+ /** Path relative to the root (posix-ish, may include '/'). */
21
+ path: string;
22
+ entries: FileTreeEntry[];
23
+ };
24
+ export type FileTreeEntry = FileTreeFile | FileTreeDirectory;
25
+ /**
26
+ * Generic, plugin-agnostic filesystem tree.
27
+ *
28
+ * Capture/build plugins should populate `ctx.files` using this shape.
29
+ * Writer plugins (raw/zip) should consume it.
30
+ */
31
+ export type FileTree = {
32
+ /** Virtual root directory; its path should be "". */
33
+ root: FileTreeDirectory;
34
+ /**
35
+ * Optional content store handle used when entries reference `{ kind: "content-ref" }`.
36
+ */
37
+ content?: {
38
+ open(ref: ContentRef): Promise<ReadableStream<Uint8Array>>;
39
+ dispose?(): Promise<void>;
40
+ };
41
+ };
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/dist/hack-html.js CHANGED
@@ -1,20 +1,28 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.hackHtml = void 0;
4
- const preload_1 = require("./preload");
5
- const replay_script_1 = require("./replay-script");
6
- const hackHtml = (input) => {
7
- const replayScript = (0, replay_script_1.buildReplayScript)(input.apiPath, input.baseUrl);
8
- const preloadScript = `<script>${(0, preload_1.buildPreloadScript)()}</script>`;
1
+ import { buildPreloadScript } from "./preload.js";
2
+ import { buildReplayScript } from "./replay-script.js";
3
+ export const hackHtml = (input) => {
4
+ const replayScript = buildReplayScript(input.apiPath, input.baseUrl);
5
+ const preloadScript = `<script>${buildPreloadScript()}</script>`;
9
6
  const head = input.$("head");
10
7
  const root = input.$.root();
8
+ const scriptNodes = input.$("script").toArray();
9
+ const hasPreload = scriptNodes.some((node) => (input.$(node).html() || "").includes("__pagepocketPatched"));
10
+ const hasReplay = scriptNodes.some((node) => (input.$(node).html() || "").includes("__pagepocketOriginalFetch"));
11
11
  if (head.length) {
12
- head.prepend(replayScript);
13
- head.prepend(preloadScript);
12
+ if (!hasReplay) {
13
+ head.prepend(replayScript);
14
+ }
15
+ if (!hasPreload) {
16
+ head.prepend(preloadScript);
17
+ }
14
18
  }
15
19
  else {
16
- root.prepend(replayScript);
17
- root.prepend(preloadScript);
20
+ if (!hasReplay) {
21
+ root.prepend(replayScript);
22
+ }
23
+ if (!hasPreload) {
24
+ root.prepend(preloadScript);
25
+ }
18
26
  }
19
27
  if (input.faviconDataUrl) {
20
28
  const existingIcon = input.$('link[rel="icon"]');
@@ -29,4 +37,3 @@ const hackHtml = (input) => {
29
37
  }
30
38
  }
31
39
  };
32
- exports.hackHtml = hackHtml;
@@ -1,3 +1,3 @@
1
- import type { ScriptHacker } from "./types";
1
+ import type { ScriptHacker } from "./types.js";
2
2
  export declare const preloadHackers: ScriptHacker[];
3
3
  export declare const replayHackers: ScriptHacker[];
@@ -1,28 +1,25 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.replayHackers = exports.preloadHackers = void 0;
4
- const preload_fetch_1 = require("./preload-fetch");
5
- const preload_xhr_1 = require("./preload-xhr");
6
- const replay_beacon_1 = require("./replay-beacon");
7
- const replay_block_text_fragment_1 = require("./replay-block-text-fragment");
8
- const replay_css_proxy_1 = require("./replay-css-proxy");
9
- const replay_dom_rewrite_1 = require("./replay-dom-rewrite");
10
- const replay_eventsource_1 = require("./replay-eventsource");
11
- const replay_fetch_1 = require("./replay-fetch");
12
- const replay_history_path_1 = require("./replay-history-path");
13
- const replay_svg_image_1 = require("./replay-svg-image");
14
- const replay_websocket_1 = require("./replay-websocket");
15
- const replay_xhr_1 = require("./replay-xhr");
16
- exports.preloadHackers = [preload_fetch_1.preloadFetchRecorder, preload_xhr_1.preloadXhrRecorder];
17
- exports.replayHackers = [
18
- replay_block_text_fragment_1.replayBlockTextFragment,
19
- replay_history_path_1.replayHistoryPath,
20
- replay_fetch_1.replayFetchResponder,
21
- replay_xhr_1.replayXhrResponder,
22
- replay_css_proxy_1.replayCssProxy,
23
- replay_dom_rewrite_1.replayDomRewriter,
24
- replay_svg_image_1.replaySvgImageRewriter,
25
- replay_beacon_1.replayBeaconStub,
26
- replay_websocket_1.replayWebSocketStub,
27
- replay_eventsource_1.replayEventSourceStub
1
+ import { preloadFetchRecorder } from "./preload-fetch.js";
2
+ import { preloadXhrRecorder } from "./preload-xhr.js";
3
+ import { replayBeaconStub } from "./replay-beacon.js";
4
+ import { replayBlockTextFragment } from "./replay-block-text-fragment.js";
5
+ import { replayCssProxy } from "./replay-css-proxy.js";
6
+ import { replayDomRewriter } from "./replay-dom-rewrite.js";
7
+ import { replayEventSourceStub } from "./replay-eventsource.js";
8
+ import { replayFetchResponder } from "./replay-fetch.js";
9
+ import { replayHistoryPath } from "./replay-history-path.js";
10
+ import { replaySvgImageRewriter } from "./replay-svg-image.js";
11
+ import { replayWebSocketStub } from "./replay-websocket.js";
12
+ import { replayXhrResponder } from "./replay-xhr.js";
13
+ export const preloadHackers = [preloadFetchRecorder, preloadXhrRecorder];
14
+ export const replayHackers = [
15
+ replayBlockTextFragment,
16
+ replayHistoryPath,
17
+ replayFetchResponder,
18
+ replayXhrResponder,
19
+ replayCssProxy,
20
+ replayDomRewriter,
21
+ replaySvgImageRewriter,
22
+ replayBeaconStub,
23
+ replayWebSocketStub,
24
+ replayEventSourceStub
28
25
  ];
@@ -1,2 +1,2 @@
1
- import type { ScriptHacker } from "./types";
1
+ import type { ScriptHacker } from "./types.js";
2
2
  export declare const preloadFetchRecorder: ScriptHacker;
@@ -1,7 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.preloadFetchRecorder = void 0;
4
- exports.preloadFetchRecorder = {
1
+ export const preloadFetchRecorder = {
5
2
  id: "preload-fetch-recorder",
6
3
  stage: "preload",
7
4
  build: () => `
@@ -1,2 +1,2 @@
1
- import type { ScriptHacker } from "./types";
1
+ import type { ScriptHacker } from "./types.js";
2
2
  export declare const preloadXhrRecorder: ScriptHacker;
@@ -1,7 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.preloadXhrRecorder = void 0;
4
- exports.preloadXhrRecorder = {
1
+ export const preloadXhrRecorder = {
5
2
  id: "preload-xhr-recorder",
6
3
  stage: "preload",
7
4
  build: () => `
@@ -1,2 +1,2 @@
1
- import type { ScriptHacker } from "./types";
1
+ import type { ScriptHacker } from "./types.js";
2
2
  export declare const replayBeaconStub: ScriptHacker;