@pagepocket/lib 0.12.0 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/core/pagepocket.d.ts +17 -0
  2. package/dist/core/pagepocket.js +25 -1
  3. package/dist/hackers/index.js +3 -1
  4. package/dist/hackers/replay-dom-rewrite/script-part-1.d.ts +1 -1
  5. package/dist/hackers/replay-dom-rewrite/script-part-1.js +40 -51
  6. package/dist/hackers/replay-dom-rewrite/script-part-2.d.ts +1 -1
  7. package/dist/hackers/replay-dom-rewrite/script-part-2.js +74 -44
  8. package/dist/hackers/replay-fetch.js +4 -0
  9. package/dist/hackers/replay-websocket.js +50 -8
  10. package/dist/hackers/replay-worker.d.ts +18 -0
  11. package/dist/hackers/replay-worker.js +242 -0
  12. package/dist/index.d.ts +1 -0
  13. package/dist/replay/match-api.js +103 -3
  14. package/dist/replay/templates/loader-template.d.ts +15 -0
  15. package/dist/replay/templates/loader-template.js +164 -0
  16. package/dist/replay/templates/match-api-source.d.ts +1 -1
  17. package/dist/replay/templates/match-api-source.js +86 -4
  18. package/dist/replay/templates/replay-script-template.part-2.js +24 -1
  19. package/dist/resource-filter.js +29 -3
  20. package/dist/snapshot-builder/build-snapshot.js +33 -3
  21. package/dist/units/apply-replace-elements-to-file-tree.d.ts +7 -0
  22. package/dist/units/apply-replace-elements-to-file-tree.js +63 -0
  23. package/dist/units/contracts-bridge.d.ts +9 -1
  24. package/dist/units/file-tree-unit.d.ts +28 -0
  25. package/dist/units/file-tree-unit.js +53 -0
  26. package/dist/units/index.d.ts +3 -0
  27. package/dist/units/index.js +2 -0
  28. package/dist/units/internal/runtime.d.ts +5 -2
  29. package/dist/units/internal/runtime.js +14 -0
  30. package/dist/units/runner.d.ts +3 -1
  31. package/dist/units/runner.js +72 -2
  32. package/dist/units/snapshot-unit.d.ts +31 -0
  33. package/dist/units/snapshot-unit.js +58 -0
  34. package/package.json +4 -4
@@ -107,12 +107,25 @@ export const matchAPISource = `function matchAPI(options) {
107
107
  const recordPathSearch = stripTrailingSlash(toPathSearch(recordUrl));
108
108
  if (inputPathSearch === recordPathSearch)
109
109
  return true;
110
- const inputPath = stripTrailingSlash(toPathname(inputUrl));
111
- const recordPath = stripTrailingSlash(toPathname(recordUrl));
112
- if (inputPath === recordPath)
113
- return true;
114
110
  return false;
115
111
  };
112
+ const normalizePathLoose = (input) => {
113
+ const parsed = safeUrl(input);
114
+ if (parsed) {
115
+ return stripTrailingSlash(parsed.pathname);
116
+ }
117
+ const rawValue = String(input ?? "");
118
+ if (!rawValue) {
119
+ return "";
120
+ }
121
+ const valueWithoutHash = rawValue.split("#")[0] ?? "";
122
+ const valueWithoutQuery = valueWithoutHash.split("?")[0] ?? "";
123
+ const withoutOrigin = valueWithoutQuery.replace(/^([a-zA-Z][a-zA-Z\\d+\\-.]*:)?\\/\\/[^/]+/, "");
124
+ if (!withoutOrigin) {
125
+ return "/";
126
+ }
127
+ return stripTrailingSlash(withoutOrigin.startsWith("/") ? withoutOrigin : "/" + withoutOrigin);
128
+ };
116
129
  const scanRecords = (keyMethod, keyBody) => {
117
130
  for (const record of records || []) {
118
131
  if (!record || !record.url || !record.method)
@@ -128,10 +141,79 @@ export const matchAPISource = `function matchAPI(options) {
128
141
  }
129
142
  return undefined;
130
143
  };
144
+ const scanRecordsIgnoreQueryAndProtocol = (keyMethod, keyBody) => {
145
+ const inputPathname = normalizePathLoose(url);
146
+ if (!inputPathname) {
147
+ return undefined;
148
+ }
149
+ const candidates = [];
150
+ for (const record of records || []) {
151
+ if (!record || !record.url || !record.method)
152
+ continue;
153
+ if (record.method.toUpperCase() !== keyMethod)
154
+ continue;
155
+ const recordPathname = normalizePathLoose(record.url);
156
+ if (!recordPathname || recordPathname !== inputPathname)
157
+ continue;
158
+ const recordBody = record.requestBody || record.requestBodyBase64 || "";
159
+ if (keyBody && recordBody !== keyBody)
160
+ continue;
161
+ candidates.push(record);
162
+ }
163
+ if (candidates.length === 0)
164
+ return undefined;
165
+ if (candidates.length === 1) {
166
+ const candidate = candidates[0];
167
+ const inputParsed = safeUrl(url);
168
+ const candidateParsed = safeUrl(candidate.url);
169
+ if (inputParsed && candidateParsed) {
170
+ const inputKeys = Array.from(inputParsed.searchParams.keys());
171
+ const candidateKeys = new Set(candidateParsed.searchParams.keys());
172
+ const inputAllContained = inputKeys.length > 0 && inputKeys.every(k => candidateKeys.has(k));
173
+ const candidateHasExtra = candidateKeys.size > inputKeys.length;
174
+ if (inputAllContained && candidateHasExtra)
175
+ return undefined;
176
+ }
177
+ return candidate;
178
+ }
179
+ const inputParsed = safeUrl(url);
180
+ const inputSearch = inputParsed ? inputParsed.search : "";
181
+ let bestRecord = undefined;
182
+ let bestScore = -Infinity;
183
+ for (const candidate of candidates) {
184
+ const candidateParsed = safeUrl(candidate.url);
185
+ const candidateSearch = candidateParsed ? candidateParsed.search : "";
186
+ if (candidateSearch === inputSearch)
187
+ return candidate;
188
+ const inputParams = inputParsed ? inputParsed.searchParams : new URLSearchParams();
189
+ const candidateParams = candidateParsed ? candidateParsed.searchParams : new URLSearchParams();
190
+ let shared = 0;
191
+ let recordOnly = 0;
192
+ for (const [key, value] of candidateParams.entries()) {
193
+ if (inputParams.get(key) === value) {
194
+ shared++;
195
+ }
196
+ else {
197
+ recordOnly++;
198
+ }
199
+ }
200
+ const score = shared - recordOnly * 2;
201
+ if (score > bestScore) {
202
+ bestScore = score;
203
+ bestRecord = candidate;
204
+ }
205
+ }
206
+ return bestRecord;
207
+ };
131
208
  for (const [keyMethod, keyBody] of matchOrder) {
132
209
  const record = scanRecords(keyMethod, keyBody);
133
210
  if (record)
134
211
  return record;
135
212
  }
213
+ for (const [keyMethod, keyBody] of matchOrder) {
214
+ const record = scanRecordsIgnoreQueryAndProtocol(keyMethod, keyBody);
215
+ if (record)
216
+ return record;
217
+ }
136
218
  return undefined;
137
219
  }`;
@@ -37,6 +37,25 @@ export const buildReplayInjectedScriptPart2 = (options) => {
37
37
  return snapshot;
38
38
  })();
39
39
 
40
+
41
+ // Synchronous fast-path: when the loader page has already set
42
+ // window.__pagepocketResourcesPath / window.__pagepocketApiSnapshot,
43
+ // build resourceIndex immediately so document.write URL rewriting
44
+ // works before the async ready promise resolves.
45
+ try {
46
+ var preloadedResources = window.__pagepocketResourcesPath;
47
+ if (preloadedResources && preloadedResources.version === "1.0" && Array.isArray(preloadedResources.items)) {
48
+ resourceIndex = __pagepocketResourceProxy.buildIndex(preloadedResources);
49
+ }
50
+ } catch (syncInitError) {}
51
+
52
+ try {
53
+ var preloadedApi = window.__pagepocketApiSnapshot;
54
+ if (preloadedApi && preloadedApi.version === "1.0" && Array.isArray(preloadedApi.records)) {
55
+ primeLookups(preloadedApi);
56
+ }
57
+ } catch (syncInitError) {}
58
+
40
59
  const isLocalResource = (value) => {
41
60
  if (!value) return false;
42
61
  if (value.startsWith("data:") || value.startsWith("blob:")) return true;
@@ -202,7 +221,11 @@ export const buildReplayInjectedScriptPart2 = (options) => {
202
221
  };
203
222
 
204
223
  const responseFromRecord = (record) => {
205
- const headers = new Headers(record.responseHeaders || {});
224
+ var headers = new Headers();
225
+ var rawHeaders = record.responseHeaders || {};
226
+ for (var key in rawHeaders) {
227
+ try { headers.append(key, rawHeaders[key]); } catch (e) {}
228
+ }
206
229
  if (record.responseEncoding === "base64" && record.responseBodyBase64) {
207
230
  const bytes = decodeBase64(record.responseBodyBase64);
208
231
  return new Response(bytes, {
@@ -4,17 +4,43 @@ const isSkippableUrl = (url) => url.startsWith("data:") ||
4
4
  url.startsWith("mailto:") ||
5
5
  url.startsWith("tel:") ||
6
6
  url.startsWith("javascript:");
7
+ /**
8
+ * Checks whether a response mimeType represents a saveable binary asset.
9
+ *
10
+ * Some SPAs (e.g. Feishu, Notion) load images and fonts via `fetch()` or
11
+ * `XMLHttpRequest` instead of `<img>` tags. Puppeteer marks those requests
12
+ * as `resourceType: "fetch"` / `"xhr"`, but the response is still an image
13
+ * or font that must be persisted for offline replay.
14
+ *
15
+ * We detect this by inspecting the response `mimeType`.
16
+ */
17
+ const isSaveableMimeType = (mimeType) => {
18
+ const lower = mimeType.toLowerCase().split(";")[0].trim();
19
+ if (lower.startsWith("image/")) {
20
+ return true;
21
+ }
22
+ if (lower.startsWith("font/")) {
23
+ return true;
24
+ }
25
+ if (lower.startsWith("audio/") || lower.startsWith("video/")) {
26
+ return true;
27
+ }
28
+ if (lower === "application/font-woff" || lower === "application/font-woff2" || lower === "application/octet-stream") {
29
+ return true;
30
+ }
31
+ return false;
32
+ };
7
33
  export const createDefaultResourceFilter = () => ({
8
34
  shouldSave(req, res) {
9
35
  if (isSkippableUrl(req.url)) {
10
36
  return false;
11
37
  }
12
- if (req.resourceType && (req.resourceType === "fetch" || req.resourceType === "xhr")) {
13
- return false;
14
- }
15
38
  if (res && res.status >= 400) {
16
39
  return false;
17
40
  }
41
+ if (req.resourceType && (req.resourceType === "fetch" || req.resourceType === "xhr")) {
42
+ return res?.mimeType ? isSaveableMimeType(res.mimeType) : false;
43
+ }
18
44
  if (req.resourceType) {
19
45
  return DEFAULT_ALLOWED.has(req.resourceType);
20
46
  }
@@ -8,6 +8,7 @@ import { responseMimeType } from "./http.js";
8
8
  import { docDirFromUrl, resolveSnapshotPath } from "./path-map.js";
9
9
  import { buildResourcesPathSnapshot } from "./resources-path.js";
10
10
  import { maybeRewriteScript, maybeRewriteStylesheet } from "./rewrite-resource.js";
11
+ import { buildLoaderHtml } from "../replay/templates/loader-template.js";
11
12
  export const buildSnapshot = async (input) => {
12
13
  const warnings = input.warnings;
13
14
  const contentStore = input.capture.contentStore;
@@ -46,10 +47,11 @@ export const buildSnapshot = async (input) => {
46
47
  const apiPath = ensureLeadingSlash(multiDoc ? `${sanitizePosixPath(docDir)}/api.json` : "/api.json");
47
48
  for (const resource of group.resources) {
48
49
  if (resource.request.resourceType === "document") {
49
- const path = urlToPath.get(resource.request.url) ?? "/index.html";
50
+ const originalPath = urlToPath.get(resource.request.url) ?? "/index.html";
51
+ const pagePath = toPagePath(originalPath);
50
52
  const { file } = await emitDocumentFile({
51
53
  resource,
52
- path,
54
+ path: pagePath,
53
55
  entryUrl: input.entryUrl,
54
56
  groupUrl: group.url,
55
57
  apiPath,
@@ -60,8 +62,23 @@ export const buildSnapshot = async (input) => {
60
62
  snapshotEntryUrl: input.entryUrl
61
63
  });
62
64
  files.push(file);
65
+ const loaderHtml = buildLoaderHtml({
66
+ pageUrl: pagePath,
67
+ apiPath,
68
+ resourcesPathUrl: "/resources_path.json"
69
+ });
70
+ const loaderBytes = new TextEncoder().encode(loaderHtml);
71
+ const loaderRef = await contentStore.put({ kind: "buffer", data: loaderBytes }, { url: originalPath, mimeType: "text/html", sizeHint: loaderBytes.byteLength });
72
+ files.push({
73
+ path: originalPath,
74
+ mimeType: "text/html",
75
+ size: loaderBytes.byteLength,
76
+ source: loaderRef,
77
+ originalUrl: resource.request.url,
78
+ resourceType: "document"
79
+ });
63
80
  if (resource.request.url === input.entryUrl || !entryPath) {
64
- entryPath = path;
81
+ entryPath = originalPath;
65
82
  }
66
83
  continue;
67
84
  }
@@ -135,3 +152,16 @@ export const buildSnapshot = async (input) => {
135
152
  };
136
153
  };
137
154
  const resourceMimeType = (resource) => resource.mimeType || responseMimeType(resource.response) || undefined;
155
+ /**
156
+ * Derive the "page" path from the original document path.
157
+ * `/index.html` → `/_page.html`, `/foo/bar.html` → `/foo/_bar.html`.
158
+ */
159
+ const toPagePath = (originalPath) => {
160
+ const lastSlash = originalPath.lastIndexOf("/");
161
+ if (lastSlash === -1) {
162
+ return `_${originalPath}`;
163
+ }
164
+ const dir = originalPath.slice(0, lastSlash + 1);
165
+ const filename = originalPath.slice(lastSlash + 1);
166
+ return `${dir}_${filename}`;
167
+ };
@@ -0,0 +1,7 @@
1
+ import type { FileTree } from "../core/file-tree.js";
2
+ import type { ReplaceElementsConfig } from "../types.js";
3
+ export declare const applyReplaceElementsToFileTree: (input: {
4
+ files: FileTree;
5
+ replaceElements: ReplaceElementsConfig;
6
+ entryUrl: string;
7
+ }) => Promise<FileTree>;
@@ -0,0 +1,63 @@
1
+ import * as cheerio from "cheerio";
2
+ import { applyReplaceElements } from "../replace-elements.js";
3
+ import { streamToUint8Array } from "../utils/streams.js";
4
+ import { decodeUtf8 } from "../utils.js";
5
+ const isHtmlFile = (path) => path.endsWith(".html") || path.endsWith(".htm");
6
+ const readFileSource = async (file, fileTree) => {
7
+ const source = file.source;
8
+ if (source.kind === "bytes") {
9
+ return source.data;
10
+ }
11
+ if (source.kind === "text") {
12
+ return new TextEncoder().encode(source.text);
13
+ }
14
+ if (source.kind === "content-ref" && fileTree.content) {
15
+ const stream = await fileTree.content.open(source.ref);
16
+ return streamToUint8Array(stream);
17
+ }
18
+ return undefined;
19
+ };
20
+ const processHtmlFile = async (file, fileTree, replaceElements, entryUrl) => {
21
+ const bytes = await readFileSource(file, fileTree);
22
+ if (!bytes) {
23
+ return file;
24
+ }
25
+ const decoded = decodeUtf8(bytes);
26
+ if (typeof decoded === "undefined") {
27
+ return file;
28
+ }
29
+ const $ = cheerio.load(decoded);
30
+ await applyReplaceElements({
31
+ $,
32
+ entryUrl,
33
+ url: entryUrl,
34
+ replaceElements,
35
+ isEntryDocument: true
36
+ });
37
+ const updatedHtml = $.html();
38
+ const updatedBytes = new TextEncoder().encode(updatedHtml);
39
+ return {
40
+ ...file,
41
+ source: { kind: "bytes", data: updatedBytes }
42
+ };
43
+ };
44
+ const processDirectory = async (dir, fileTree, replaceElements, entryUrl) => {
45
+ const updatedEntries = [];
46
+ for (const entry of dir.entries) {
47
+ if (entry.kind === "file" && isHtmlFile(entry.path)) {
48
+ updatedEntries.push(await processHtmlFile(entry, fileTree, replaceElements, entryUrl));
49
+ continue;
50
+ }
51
+ if (entry.kind === "directory") {
52
+ updatedEntries.push(await processDirectory(entry, fileTree, replaceElements, entryUrl));
53
+ continue;
54
+ }
55
+ updatedEntries.push(entry);
56
+ }
57
+ return { ...dir, entries: updatedEntries };
58
+ };
59
+ export const applyReplaceElementsToFileTree = async (input) => {
60
+ const { files, replaceElements, entryUrl } = input;
61
+ const updatedRoot = await processDirectory(files.root, files, replaceElements, entryUrl);
62
+ return { ...files, root: updatedRoot };
63
+ };
@@ -55,6 +55,14 @@ export interface UnitRuntime {
55
55
  hasPublisher(t: ChannelToken<unknown>): boolean;
56
56
  readonly elements: ElementPatchRegistry;
57
57
  defer(promise: DeferredHandle): void;
58
+ /**
59
+ * Emit a log message from the currently executing unit.
60
+ *
61
+ * The message is published on the well-known PROGRESS channel as a
62
+ * `unit:log` event so external consumers (e.g. `PagePocket.on("unit:log", …)`)
63
+ * can observe it.
64
+ */
65
+ log(message: string, data?: unknown): void;
58
66
  }
59
67
  export interface PluginHost {
60
68
  readonly entry: EntryInfo;
@@ -66,7 +74,7 @@ export interface PluginHost {
66
74
  }
67
75
  export declare abstract class Unit {
68
76
  abstract readonly id: string;
69
- abstract readonly kind: string;
77
+ abstract readonly description: string;
70
78
  abstract run(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
71
79
  merge(returnValue: UnitPatch, pluginContributedValue?: UnitPatch): UnitPatch;
72
80
  }
@@ -0,0 +1,28 @@
1
+ import { Unit, type UnitPatch } from "./contracts-bridge.js";
2
+ /**
3
+ * Abstract base class for units whose `run()` produces a `FileTree`.
4
+ *
5
+ * Provides a default `merge` implementation that deep-merges the `files`
6
+ * property when both the unit return value and the plugin-contributed value
7
+ * contain a valid `FileTree`. All other properties are shallow-spread
8
+ * (plugin wins on conflict), matching the base `Unit.merge` behaviour.
9
+ *
10
+ * Subclasses only need to implement `id` and `run()`.
11
+ *
12
+ * Usage:
13
+ * ```ts
14
+ * import { FileTreeUnit } from "@pagepocket/lib";
15
+ *
16
+ * export class MyUnit extends FileTreeUnit {
17
+ * readonly id = "my";
18
+ *
19
+ * async run(ctx, rt) {
20
+ * const files = buildFiles();
21
+ * return { files };
22
+ * }
23
+ * }
24
+ * ```
25
+ */
26
+ export declare abstract class FileTreeUnit extends Unit {
27
+ merge(returnValue: UnitPatch, pluginContributedValue?: UnitPatch): UnitPatch;
28
+ }
@@ -0,0 +1,53 @@
1
+ import { mergeFileTrees } from "../core/file-tree-merge.js";
2
+ import { Unit } from "./contracts-bridge.js";
3
+ /**
4
+ * Abstract base class for units whose `run()` produces a `FileTree`.
5
+ *
6
+ * Provides a default `merge` implementation that deep-merges the `files`
7
+ * property when both the unit return value and the plugin-contributed value
8
+ * contain a valid `FileTree`. All other properties are shallow-spread
9
+ * (plugin wins on conflict), matching the base `Unit.merge` behaviour.
10
+ *
11
+ * Subclasses only need to implement `id` and `run()`.
12
+ *
13
+ * Usage:
14
+ * ```ts
15
+ * import { FileTreeUnit } from "@pagepocket/lib";
16
+ *
17
+ * export class MyUnit extends FileTreeUnit {
18
+ * readonly id = "my";
19
+ *
20
+ * async run(ctx, rt) {
21
+ * const files = buildFiles();
22
+ * return { files };
23
+ * }
24
+ * }
25
+ * ```
26
+ */
27
+ export class FileTreeUnit extends Unit {
28
+ merge(returnValue, pluginContributedValue = {}) {
29
+ const mergedValue = { ...returnValue, ...pluginContributedValue };
30
+ const returnFiles = returnValue.files;
31
+ const pluginFiles = pluginContributedValue.files;
32
+ if (!isFileTree(returnFiles) || !isFileTree(pluginFiles)) {
33
+ return mergedValue;
34
+ }
35
+ return { ...mergedValue, files: mergeFileTrees(returnFiles, pluginFiles) };
36
+ }
37
+ }
38
+ const isFileTree = (value) => {
39
+ if (!value || typeof value !== "object") {
40
+ return false;
41
+ }
42
+ if (!("root" in value)) {
43
+ return false;
44
+ }
45
+ const root = value.root;
46
+ if (!root || typeof root !== "object") {
47
+ return false;
48
+ }
49
+ const rootRecord = root;
50
+ return (rootRecord.kind === "directory" &&
51
+ typeof rootRecord.path === "string" &&
52
+ Array.isArray(rootRecord.entries));
53
+ };
@@ -1,4 +1,7 @@
1
1
  export type { CaptureOptions, EntryInfo } from "./types.js";
2
2
  export type { CaptureResult, Plugin, PluginHost, UnitRuntime } from "./contracts-bridge.js";
3
3
  export { Unit } from "./contracts-bridge.js";
4
+ export { FileTreeUnit } from "./file-tree-unit.js";
5
+ export { SnapshotUnit } from "./snapshot-unit.js";
4
6
  export { runCapture } from "./runner.js";
7
+ export type { ProgressListener } from "./runner.js";
@@ -1,2 +1,4 @@
1
1
  export { Unit } from "./contracts-bridge.js";
2
+ export { FileTreeUnit } from "./file-tree-unit.js";
3
+ export { SnapshotUnit } from "./snapshot-unit.js";
2
4
  export { runCapture } from "./runner.js";
@@ -1,4 +1,4 @@
1
- import type { ChannelToken, ReplaceElementsConfig } from "@pagepocket/contracts";
1
+ import { type ChannelToken, type ReplaceElementsConfig } from "@pagepocket/contracts";
2
2
  import { type ElementPatchRegistry, type UnitContext, type UnitPatch, type UnitRuntime } from "../contracts-bridge.js";
3
3
  import type { CaptureOptions, EntryInfo, PagePocketOptions } from "../types.js";
4
4
  declare class ElementPatchRegistryImpl implements ElementPatchRegistry {
@@ -19,17 +19,20 @@ export declare class RuntimeImpl implements UnitRuntime {
19
19
  readonly pocketOptions: PagePocketOptions;
20
20
  private channels;
21
21
  private deferred;
22
+ private currentUnitId;
22
23
  readonly elements: ElementPatchRegistryImpl;
23
24
  constructor(input: {
24
25
  entry: EntryInfo;
25
26
  options: CaptureOptions;
26
27
  pocketOptions: PagePocketOptions;
27
28
  });
29
+ _setCurrentUnitId(unitId: string): void;
30
+ log(message: string, data?: unknown): void;
28
31
  publish<T>(channelToken: ChannelToken<T>, value: T): void;
29
32
  subscribe<T>(channelToken: ChannelToken<T>): AsyncIterable<T>;
30
33
  hasPublisher(channelToken: ChannelToken<unknown>): boolean;
31
34
  defer(promise: Promise<unknown>): void;
32
- _ensureChannel(channelToken: ChannelToken<unknown>): void;
35
+ _ensureChannel<T>(channelToken: ChannelToken<T>): void;
33
36
  _closeAllChannels(): Promise<void>;
34
37
  _awaitDeferred(): Promise<void>;
35
38
  }
@@ -1,3 +1,4 @@
1
+ import { PROGRESS } from "@pagepocket/contracts";
1
2
  import { TERMINAL_RESULT_KEY } from "../contracts-bridge.js";
2
3
  import { AsyncQueue, emptyAsyncIterable } from "./async-queue.js";
3
4
  import { DeferredTracker } from "./deferred-tracker.js";
@@ -30,11 +31,24 @@ export class RuntimeImpl {
30
31
  constructor(input) {
31
32
  this.channels = new Map();
32
33
  this.deferred = new DeferredTracker();
34
+ this.currentUnitId = "";
33
35
  this.elements = new ElementPatchRegistryImpl();
34
36
  this.entry = input.entry;
35
37
  this.options = input.options;
36
38
  this.pocketOptions = input.pocketOptions;
37
39
  }
40
+ _setCurrentUnitId(unitId) {
41
+ this.currentUnitId = unitId;
42
+ }
43
+ log(message, data) {
44
+ const event = {
45
+ type: "unit:log",
46
+ unitId: this.currentUnitId,
47
+ message,
48
+ ...(data !== undefined ? { data } : {})
49
+ };
50
+ this.publish(PROGRESS, event);
51
+ }
38
52
  publish(channelToken, value) {
39
53
  const state = this.channels.get(channelToken.id);
40
54
  if (!state || state.closed) {
@@ -1,6 +1,7 @@
1
- import type { ChannelToken } from "@pagepocket/contracts";
1
+ import { type ChannelToken, type ProgressEvent } from "@pagepocket/contracts";
2
2
  import { type CaptureResult, type Plugin, type Unit } from "./contracts-bridge.js";
3
3
  import type { CaptureOptions, EntryInfo, PagePocketOptions } from "./types.js";
4
+ export type ProgressListener = (event: ProgressEvent) => void;
4
5
  export declare const runCapture: (input: {
5
6
  entry: EntryInfo;
6
7
  pocketOptions: PagePocketOptions;
@@ -8,4 +9,5 @@ export declare const runCapture: (input: {
8
9
  units: Unit[];
9
10
  plugins?: Plugin[];
10
11
  declaredChannels?: ChannelToken<unknown>[];
12
+ onProgress?: ProgressListener;
11
13
  }) => Promise<CaptureResult>;
@@ -1,3 +1,5 @@
1
+ import { PROGRESS } from "@pagepocket/contracts";
2
+ import { debugLog } from "../core/debug.js";
1
3
  import { TERMINAL_RESULT_KEY } from "./contracts-bridge.js";
2
4
  import { mergePatchIntoFreshContext, RuntimeImpl } from "./internal/runtime.js";
3
5
  export const runCapture = async (input) => {
@@ -9,6 +11,16 @@ export const runCapture = async (input) => {
9
11
  for (const channel of input.declaredChannels ?? []) {
10
12
  rt._ensureChannel(channel);
11
13
  }
14
+ rt._ensureChannel(PROGRESS);
15
+ if (input.onProgress) {
16
+ const listener = input.onProgress;
17
+ const progressTask = (async () => {
18
+ for await (const event of rt.subscribe(PROGRESS)) {
19
+ listener(event);
20
+ }
21
+ })();
22
+ rt.defer(progressTask);
23
+ }
12
24
  const pluginHost = {
13
25
  entry: rt.entry,
14
26
  options: rt.options,
@@ -27,11 +39,24 @@ export const runCapture = async (input) => {
27
39
  const mergePatch = (_ctx, patch) => mergePatchIntoFreshContext(patch);
28
40
  let ctx = { value: {} };
29
41
  let result;
42
+ const unitErrors = [];
43
+ const totalUnits = input.units.length;
30
44
  try {
31
- for (const unit of input.units) {
45
+ for (let i = 0; i < input.units.length; i++) {
32
46
  if (result) {
33
47
  break;
34
48
  }
49
+ const unit = input.units[i];
50
+ const unitIndex = i;
51
+ rt._setCurrentUnitId(unit.id);
52
+ rt.publish(PROGRESS, {
53
+ type: "unit:start",
54
+ unitId: unit.id,
55
+ unitDescription: unit.description,
56
+ index: unitIndex,
57
+ total: totalUnits
58
+ });
59
+ const unitStartTime = Date.now();
35
60
  const baseCtx = ctx;
36
61
  const boundPlugins = (input.plugins ?? []).filter((plugin) => {
37
62
  const unitId = plugin.constructor?.unitId;
@@ -62,13 +87,45 @@ export const runCapture = async (input) => {
62
87
  }
63
88
  }
64
89
  if (result) {
90
+ rt.publish(PROGRESS, {
91
+ type: "unit:end",
92
+ unitId: unit.id,
93
+ unitDescription: unit.description,
94
+ index: unitIndex,
95
+ total: totalUnits,
96
+ durationMs: Date.now() - unitStartTime
97
+ });
65
98
  break;
66
99
  }
67
- const out = (await unit.run(baseCtx, rt)) ?? {};
100
+ let out;
101
+ try {
102
+ out = (await unit.run(baseCtx, rt)) ?? {};
103
+ }
104
+ catch (err) {
105
+ debugLog(`[runner] unit "${unit.id}" threw:`, err);
106
+ unitErrors.push({ unitId: unit.id, error: err });
107
+ rt.publish(PROGRESS, {
108
+ type: "unit:end",
109
+ unitId: unit.id,
110
+ unitDescription: unit.description,
111
+ index: unitIndex,
112
+ total: totalUnits,
113
+ durationMs: Date.now() - unitStartTime
114
+ });
115
+ continue;
116
+ }
68
117
  const unitReturnValue = out && typeof out === "object" ? out : {};
69
118
  const merged = unit.merge(unitReturnValue, pluginContributedValue);
70
119
  const mergedPatch = merged && typeof merged === "object" ? merged : {};
71
120
  ctx = { value: mergePatch({ value: {} }, mergedPatch).value };
121
+ rt.publish(PROGRESS, {
122
+ type: "unit:end",
123
+ unitId: unit.id,
124
+ unitDescription: unit.description,
125
+ index: unitIndex,
126
+ total: totalUnits,
127
+ durationMs: Date.now() - unitStartTime
128
+ });
72
129
  const terminal = mergedPatch[TERMINAL_RESULT_KEY];
73
130
  if (terminal) {
74
131
  result = terminal;
@@ -81,6 +138,19 @@ export const runCapture = async (input) => {
81
138
  await rt._awaitDeferred();
82
139
  }
83
140
  if (!result) {
141
+ if (unitErrors.length > 0) {
142
+ const first = unitErrors[0];
143
+ const rootCause = first.error instanceof Error ? first.error : new Error(String(first.error));
144
+ if (unitErrors.length > 1) {
145
+ const summary = unitErrors
146
+ .map((e) => `${e.unitId}: ${e.error instanceof Error ? e.error.message : String(e.error)}`)
147
+ .join("; ");
148
+ const wrapped = new Error(`Pipeline failed: ${unitErrors.length} unit(s) threw errors. ${summary}`);
149
+ wrapped.cause = rootCause;
150
+ throw wrapped;
151
+ }
152
+ throw rootCause;
153
+ }
84
154
  throw new Error("No terminal result. Ensure a terminal unit returns a CaptureResult or calls finish().");
85
155
  }
86
156
  return result;
@@ -0,0 +1,31 @@
1
+ import type { UnitContext, UnitPatch, UnitRuntime } from "./contracts-bridge.js";
2
+ import { FileTreeUnit } from "./file-tree-unit.js";
3
+ /**
4
+ * Base class for units that produce the primary snapshot FileTree.
5
+ *
6
+ * `run()` delegates to `build()` and automatically:
7
+ * - Injects `snapshotType` into the returned patch.
8
+ * - Compiles all plugin-contributed element-replacement rules
9
+ * (`rt.elements.compile()`) and applies them to every HTML file
10
+ * in the returned FileTree. Subclasses never need to call
11
+ * `rt.elements.compile()` themselves.
12
+ *
13
+ * Subclasses implement `id`, `snapshotType`, and `build()`.
14
+ *
15
+ * ```ts
16
+ * export class MySnapshotUnit extends SnapshotUnit {
17
+ * readonly id = "mySnapshot";
18
+ * readonly snapshotType = "my-type";
19
+ *
20
+ * async build(ctx, rt) {
21
+ * return { files: buildFiles(), html: ctx.value.html };
22
+ * }
23
+ * }
24
+ * ```
25
+ */
26
+ export declare abstract class SnapshotUnit extends FileTreeUnit {
27
+ /** Identifier for the kind of snapshot this unit produces (e.g. "full", "main-content"). */
28
+ abstract readonly snapshotType: string;
29
+ abstract build(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
30
+ run(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
31
+ }