@pagepocket/lib 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/pagepocket.d.ts +17 -0
- package/dist/core/pagepocket.js +25 -1
- package/dist/index.d.ts +1 -0
- package/dist/units/apply-replace-elements-to-file-tree.d.ts +7 -0
- package/dist/units/apply-replace-elements-to-file-tree.js +63 -0
- package/dist/units/contracts-bridge.d.ts +9 -1
- package/dist/units/file-tree-unit.d.ts +28 -0
- package/dist/units/file-tree-unit.js +53 -0
- package/dist/units/index.d.ts +3 -0
- package/dist/units/index.js +2 -0
- package/dist/units/internal/runtime.d.ts +5 -2
- package/dist/units/internal/runtime.js +14 -0
- package/dist/units/runner.d.ts +3 -1
- package/dist/units/runner.js +57 -2
- package/dist/units/snapshot-unit.d.ts +31 -0
- package/dist/units/snapshot-unit.js +58 -0
- package/package.json +4 -4
|
@@ -13,12 +13,27 @@ export type CaptureTarget = {
|
|
|
13
13
|
baseUrl: string;
|
|
14
14
|
url?: string;
|
|
15
15
|
};
|
|
16
|
+
import type { ProgressEvent } from "@pagepocket/contracts";
|
|
16
17
|
import type { PagePocketOptions } from "../types.js";
|
|
17
18
|
import type { CaptureResult as PagePocketCaptureResult, Plugin as V3Plugin, Unit as V3Unit } from "../units/contracts-bridge.js";
|
|
18
19
|
import type { CaptureOptions } from "../units/types.js";
|
|
20
|
+
export type CaptureEventMap = {
|
|
21
|
+
"unit:start": Extract<ProgressEvent, {
|
|
22
|
+
type: "unit:start";
|
|
23
|
+
}>;
|
|
24
|
+
"unit:end": Extract<ProgressEvent, {
|
|
25
|
+
type: "unit:end";
|
|
26
|
+
}>;
|
|
27
|
+
"unit:log": Extract<ProgressEvent, {
|
|
28
|
+
type: "unit:log";
|
|
29
|
+
}>;
|
|
30
|
+
};
|
|
31
|
+
type CaptureEventName = keyof CaptureEventMap;
|
|
32
|
+
type CaptureEventListener<K extends CaptureEventName> = (event: CaptureEventMap[K]) => void;
|
|
19
33
|
export declare class PagePocket {
|
|
20
34
|
private target;
|
|
21
35
|
private options;
|
|
36
|
+
private listeners;
|
|
22
37
|
private constructor();
|
|
23
38
|
static fromURL(url: string, options?: PagePocketOptions): PagePocket;
|
|
24
39
|
static fromPuppeteerPage(page: unknown, options?: PagePocketOptions): PagePocket;
|
|
@@ -32,8 +47,10 @@ export declare class PagePocket {
|
|
|
32
47
|
url?: string;
|
|
33
48
|
serialize?: (doc: unknown) => string;
|
|
34
49
|
} & PagePocketOptions): PagePocket;
|
|
50
|
+
on<K extends CaptureEventName>(event: K, listener: CaptureEventListener<K>): this;
|
|
35
51
|
capture(options: {
|
|
36
52
|
units: V3Unit[];
|
|
37
53
|
plugins?: V3Plugin[];
|
|
38
54
|
} & CaptureOptions): Promise<PagePocketCaptureResult>;
|
|
39
55
|
}
|
|
56
|
+
export {};
|
package/dist/core/pagepocket.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { runCapture } from "../units/index.js";
|
|
2
2
|
export class PagePocket {
|
|
3
3
|
constructor(target, options) {
|
|
4
|
+
this.listeners = new Map();
|
|
4
5
|
this.target = target;
|
|
5
6
|
this.options = options ?? {};
|
|
6
7
|
}
|
|
@@ -32,6 +33,16 @@ export class PagePocket {
|
|
|
32
33
|
const { baseUrl, url, serialize: _serialize, ...rest } = options;
|
|
33
34
|
return new PagePocket({ kind: "html", htmlString, baseUrl, ...(url ? { url } : {}) }, rest);
|
|
34
35
|
}
|
|
36
|
+
on(event, listener) {
|
|
37
|
+
const existing = this.listeners.get(event);
|
|
38
|
+
if (existing) {
|
|
39
|
+
existing.push(listener);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
this.listeners.set(event, [listener]);
|
|
43
|
+
}
|
|
44
|
+
return this;
|
|
45
|
+
}
|
|
35
46
|
async capture(options) {
|
|
36
47
|
const entry = this.target.kind === "url"
|
|
37
48
|
? { kind: "url", url: this.target.url }
|
|
@@ -45,12 +56,25 @@ export class PagePocket {
|
|
|
45
56
|
htmlString: this.target.htmlString,
|
|
46
57
|
...(this.target.url ? { url: this.target.url } : {})
|
|
47
58
|
};
|
|
59
|
+
const hasListeners = this.listeners.size > 0;
|
|
60
|
+
const onProgress = hasListeners
|
|
61
|
+
? (event) => {
|
|
62
|
+
const eventListeners = this.listeners.get(event.type);
|
|
63
|
+
if (!eventListeners) {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
for (const listener of eventListeners) {
|
|
67
|
+
listener(event);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
: undefined;
|
|
48
71
|
const result = await runCapture({
|
|
49
72
|
entry,
|
|
50
73
|
pocketOptions: this.options,
|
|
51
74
|
options,
|
|
52
75
|
units: options.units,
|
|
53
|
-
plugins: options.plugins
|
|
76
|
+
plugins: options.plugins,
|
|
77
|
+
onProgress
|
|
54
78
|
});
|
|
55
79
|
return result;
|
|
56
80
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { PagePocket } from "./core/pagepocket.js";
|
|
2
|
+
export type { CaptureEventMap } from "./core/pagepocket.js";
|
|
2
3
|
export * from "./units/index.js";
|
|
3
4
|
export type { UnitContext, UnitContributeContext, UnitPatch, UnitRuntime } from "./units/contracts-bridge.js";
|
|
4
5
|
export { TERMINAL_RESULT_KEY } from "./units/contracts-bridge.js";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { FileTree } from "../core/file-tree.js";
|
|
2
|
+
import type { ReplaceElementsConfig } from "../types.js";
|
|
3
|
+
export declare const applyReplaceElementsToFileTree: (input: {
|
|
4
|
+
files: FileTree;
|
|
5
|
+
replaceElements: ReplaceElementsConfig;
|
|
6
|
+
entryUrl: string;
|
|
7
|
+
}) => Promise<FileTree>;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import { applyReplaceElements } from "../replace-elements.js";
|
|
3
|
+
import { streamToUint8Array } from "../utils/streams.js";
|
|
4
|
+
import { decodeUtf8 } from "../utils.js";
|
|
5
|
+
const isHtmlFile = (path) => path.endsWith(".html") || path.endsWith(".htm");
|
|
6
|
+
const readFileSource = async (file, fileTree) => {
|
|
7
|
+
const source = file.source;
|
|
8
|
+
if (source.kind === "bytes") {
|
|
9
|
+
return source.data;
|
|
10
|
+
}
|
|
11
|
+
if (source.kind === "text") {
|
|
12
|
+
return new TextEncoder().encode(source.text);
|
|
13
|
+
}
|
|
14
|
+
if (source.kind === "content-ref" && fileTree.content) {
|
|
15
|
+
const stream = await fileTree.content.open(source.ref);
|
|
16
|
+
return streamToUint8Array(stream);
|
|
17
|
+
}
|
|
18
|
+
return undefined;
|
|
19
|
+
};
|
|
20
|
+
const processHtmlFile = async (file, fileTree, replaceElements, entryUrl) => {
|
|
21
|
+
const bytes = await readFileSource(file, fileTree);
|
|
22
|
+
if (!bytes) {
|
|
23
|
+
return file;
|
|
24
|
+
}
|
|
25
|
+
const decoded = decodeUtf8(bytes);
|
|
26
|
+
if (typeof decoded === "undefined") {
|
|
27
|
+
return file;
|
|
28
|
+
}
|
|
29
|
+
const $ = cheerio.load(decoded);
|
|
30
|
+
await applyReplaceElements({
|
|
31
|
+
$,
|
|
32
|
+
entryUrl,
|
|
33
|
+
url: entryUrl,
|
|
34
|
+
replaceElements,
|
|
35
|
+
isEntryDocument: true
|
|
36
|
+
});
|
|
37
|
+
const updatedHtml = $.html();
|
|
38
|
+
const updatedBytes = new TextEncoder().encode(updatedHtml);
|
|
39
|
+
return {
|
|
40
|
+
...file,
|
|
41
|
+
source: { kind: "bytes", data: updatedBytes }
|
|
42
|
+
};
|
|
43
|
+
};
|
|
44
|
+
const processDirectory = async (dir, fileTree, replaceElements, entryUrl) => {
|
|
45
|
+
const updatedEntries = [];
|
|
46
|
+
for (const entry of dir.entries) {
|
|
47
|
+
if (entry.kind === "file" && isHtmlFile(entry.path)) {
|
|
48
|
+
updatedEntries.push(await processHtmlFile(entry, fileTree, replaceElements, entryUrl));
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (entry.kind === "directory") {
|
|
52
|
+
updatedEntries.push(await processDirectory(entry, fileTree, replaceElements, entryUrl));
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
updatedEntries.push(entry);
|
|
56
|
+
}
|
|
57
|
+
return { ...dir, entries: updatedEntries };
|
|
58
|
+
};
|
|
59
|
+
export const applyReplaceElementsToFileTree = async (input) => {
|
|
60
|
+
const { files, replaceElements, entryUrl } = input;
|
|
61
|
+
const updatedRoot = await processDirectory(files.root, files, replaceElements, entryUrl);
|
|
62
|
+
return { ...files, root: updatedRoot };
|
|
63
|
+
};
|
|
@@ -55,6 +55,14 @@ export interface UnitRuntime {
|
|
|
55
55
|
hasPublisher(t: ChannelToken<unknown>): boolean;
|
|
56
56
|
readonly elements: ElementPatchRegistry;
|
|
57
57
|
defer(promise: DeferredHandle): void;
|
|
58
|
+
/**
|
|
59
|
+
* Emit a log message from the currently executing unit.
|
|
60
|
+
*
|
|
61
|
+
* The message is published on the well-known PROGRESS channel as a
|
|
62
|
+
* `unit:log` event so external consumers (e.g. `PagePocket.on("unit:log", …)`)
|
|
63
|
+
* can observe it.
|
|
64
|
+
*/
|
|
65
|
+
log(message: string, data?: unknown): void;
|
|
58
66
|
}
|
|
59
67
|
export interface PluginHost {
|
|
60
68
|
readonly entry: EntryInfo;
|
|
@@ -66,7 +74,7 @@ export interface PluginHost {
|
|
|
66
74
|
}
|
|
67
75
|
export declare abstract class Unit {
|
|
68
76
|
abstract readonly id: string;
|
|
69
|
-
abstract readonly
|
|
77
|
+
abstract readonly description: string;
|
|
70
78
|
abstract run(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
|
|
71
79
|
merge(returnValue: UnitPatch, pluginContributedValue?: UnitPatch): UnitPatch;
|
|
72
80
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Unit, type UnitPatch } from "./contracts-bridge.js";
|
|
2
|
+
/**
|
|
3
|
+
* Abstract base class for units whose `run()` produces a `FileTree`.
|
|
4
|
+
*
|
|
5
|
+
* Provides a default `merge` implementation that deep-merges the `files`
|
|
6
|
+
* property when both the unit return value and the plugin-contributed value
|
|
7
|
+
* contain a valid `FileTree`. All other properties are shallow-spread
|
|
8
|
+
* (plugin wins on conflict), matching the base `Unit.merge` behaviour.
|
|
9
|
+
*
|
|
10
|
+
* Subclasses only need to implement `id` and `run()`.
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* ```ts
|
|
14
|
+
* import { FileTreeUnit } from "@pagepocket/lib";
|
|
15
|
+
*
|
|
16
|
+
* export class MyUnit extends FileTreeUnit {
|
|
17
|
+
* readonly id = "my";
|
|
18
|
+
*
|
|
19
|
+
* async run(ctx, rt) {
|
|
20
|
+
* const files = buildFiles();
|
|
21
|
+
* return { files };
|
|
22
|
+
* }
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export declare abstract class FileTreeUnit extends Unit {
|
|
27
|
+
merge(returnValue: UnitPatch, pluginContributedValue?: UnitPatch): UnitPatch;
|
|
28
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { mergeFileTrees } from "../core/file-tree-merge.js";
|
|
2
|
+
import { Unit } from "./contracts-bridge.js";
|
|
3
|
+
/**
|
|
4
|
+
* Abstract base class for units whose `run()` produces a `FileTree`.
|
|
5
|
+
*
|
|
6
|
+
* Provides a default `merge` implementation that deep-merges the `files`
|
|
7
|
+
* property when both the unit return value and the plugin-contributed value
|
|
8
|
+
* contain a valid `FileTree`. All other properties are shallow-spread
|
|
9
|
+
* (plugin wins on conflict), matching the base `Unit.merge` behaviour.
|
|
10
|
+
*
|
|
11
|
+
* Subclasses only need to implement `id` and `run()`.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* ```ts
|
|
15
|
+
* import { FileTreeUnit } from "@pagepocket/lib";
|
|
16
|
+
*
|
|
17
|
+
* export class MyUnit extends FileTreeUnit {
|
|
18
|
+
* readonly id = "my";
|
|
19
|
+
*
|
|
20
|
+
* async run(ctx, rt) {
|
|
21
|
+
* const files = buildFiles();
|
|
22
|
+
* return { files };
|
|
23
|
+
* }
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export class FileTreeUnit extends Unit {
|
|
28
|
+
merge(returnValue, pluginContributedValue = {}) {
|
|
29
|
+
const mergedValue = { ...returnValue, ...pluginContributedValue };
|
|
30
|
+
const returnFiles = returnValue.files;
|
|
31
|
+
const pluginFiles = pluginContributedValue.files;
|
|
32
|
+
if (!isFileTree(returnFiles) || !isFileTree(pluginFiles)) {
|
|
33
|
+
return mergedValue;
|
|
34
|
+
}
|
|
35
|
+
return { ...mergedValue, files: mergeFileTrees(returnFiles, pluginFiles) };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const isFileTree = (value) => {
|
|
39
|
+
if (!value || typeof value !== "object") {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
if (!("root" in value)) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
const root = value.root;
|
|
46
|
+
if (!root || typeof root !== "object") {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
const rootRecord = root;
|
|
50
|
+
return (rootRecord.kind === "directory" &&
|
|
51
|
+
typeof rootRecord.path === "string" &&
|
|
52
|
+
Array.isArray(rootRecord.entries));
|
|
53
|
+
};
|
package/dist/units/index.d.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
export type { CaptureOptions, EntryInfo } from "./types.js";
|
|
2
2
|
export type { CaptureResult, Plugin, PluginHost, UnitRuntime } from "./contracts-bridge.js";
|
|
3
3
|
export { Unit } from "./contracts-bridge.js";
|
|
4
|
+
export { FileTreeUnit } from "./file-tree-unit.js";
|
|
5
|
+
export { SnapshotUnit } from "./snapshot-unit.js";
|
|
4
6
|
export { runCapture } from "./runner.js";
|
|
7
|
+
export type { ProgressListener } from "./runner.js";
|
package/dist/units/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type ChannelToken, type ReplaceElementsConfig } from "@pagepocket/contracts";
|
|
2
2
|
import { type ElementPatchRegistry, type UnitContext, type UnitPatch, type UnitRuntime } from "../contracts-bridge.js";
|
|
3
3
|
import type { CaptureOptions, EntryInfo, PagePocketOptions } from "../types.js";
|
|
4
4
|
declare class ElementPatchRegistryImpl implements ElementPatchRegistry {
|
|
@@ -19,17 +19,20 @@ export declare class RuntimeImpl implements UnitRuntime {
|
|
|
19
19
|
readonly pocketOptions: PagePocketOptions;
|
|
20
20
|
private channels;
|
|
21
21
|
private deferred;
|
|
22
|
+
private currentUnitId;
|
|
22
23
|
readonly elements: ElementPatchRegistryImpl;
|
|
23
24
|
constructor(input: {
|
|
24
25
|
entry: EntryInfo;
|
|
25
26
|
options: CaptureOptions;
|
|
26
27
|
pocketOptions: PagePocketOptions;
|
|
27
28
|
});
|
|
29
|
+
_setCurrentUnitId(unitId: string): void;
|
|
30
|
+
log(message: string, data?: unknown): void;
|
|
28
31
|
publish<T>(channelToken: ChannelToken<T>, value: T): void;
|
|
29
32
|
subscribe<T>(channelToken: ChannelToken<T>): AsyncIterable<T>;
|
|
30
33
|
hasPublisher(channelToken: ChannelToken<unknown>): boolean;
|
|
31
34
|
defer(promise: Promise<unknown>): void;
|
|
32
|
-
_ensureChannel(channelToken: ChannelToken<
|
|
35
|
+
_ensureChannel<T>(channelToken: ChannelToken<T>): void;
|
|
33
36
|
_closeAllChannels(): Promise<void>;
|
|
34
37
|
_awaitDeferred(): Promise<void>;
|
|
35
38
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { PROGRESS } from "@pagepocket/contracts";
|
|
1
2
|
import { TERMINAL_RESULT_KEY } from "../contracts-bridge.js";
|
|
2
3
|
import { AsyncQueue, emptyAsyncIterable } from "./async-queue.js";
|
|
3
4
|
import { DeferredTracker } from "./deferred-tracker.js";
|
|
@@ -30,11 +31,24 @@ export class RuntimeImpl {
|
|
|
30
31
|
constructor(input) {
|
|
31
32
|
this.channels = new Map();
|
|
32
33
|
this.deferred = new DeferredTracker();
|
|
34
|
+
this.currentUnitId = "";
|
|
33
35
|
this.elements = new ElementPatchRegistryImpl();
|
|
34
36
|
this.entry = input.entry;
|
|
35
37
|
this.options = input.options;
|
|
36
38
|
this.pocketOptions = input.pocketOptions;
|
|
37
39
|
}
|
|
40
|
+
_setCurrentUnitId(unitId) {
|
|
41
|
+
this.currentUnitId = unitId;
|
|
42
|
+
}
|
|
43
|
+
log(message, data) {
|
|
44
|
+
const event = {
|
|
45
|
+
type: "unit:log",
|
|
46
|
+
unitId: this.currentUnitId,
|
|
47
|
+
message,
|
|
48
|
+
...(data !== undefined ? { data } : {})
|
|
49
|
+
};
|
|
50
|
+
this.publish(PROGRESS, event);
|
|
51
|
+
}
|
|
38
52
|
publish(channelToken, value) {
|
|
39
53
|
const state = this.channels.get(channelToken.id);
|
|
40
54
|
if (!state || state.closed) {
|
package/dist/units/runner.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type ChannelToken, type ProgressEvent } from "@pagepocket/contracts";
|
|
2
2
|
import { type CaptureResult, type Plugin, type Unit } from "./contracts-bridge.js";
|
|
3
3
|
import type { CaptureOptions, EntryInfo, PagePocketOptions } from "./types.js";
|
|
4
|
+
export type ProgressListener = (event: ProgressEvent) => void;
|
|
4
5
|
export declare const runCapture: (input: {
|
|
5
6
|
entry: EntryInfo;
|
|
6
7
|
pocketOptions: PagePocketOptions;
|
|
@@ -8,4 +9,5 @@ export declare const runCapture: (input: {
|
|
|
8
9
|
units: Unit[];
|
|
9
10
|
plugins?: Plugin[];
|
|
10
11
|
declaredChannels?: ChannelToken<unknown>[];
|
|
12
|
+
onProgress?: ProgressListener;
|
|
11
13
|
}) => Promise<CaptureResult>;
|
package/dist/units/runner.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { PROGRESS } from "@pagepocket/contracts";
|
|
2
|
+
import { debugLog } from "../core/debug.js";
|
|
1
3
|
import { TERMINAL_RESULT_KEY } from "./contracts-bridge.js";
|
|
2
4
|
import { mergePatchIntoFreshContext, RuntimeImpl } from "./internal/runtime.js";
|
|
3
5
|
export const runCapture = async (input) => {
|
|
@@ -9,6 +11,16 @@ export const runCapture = async (input) => {
|
|
|
9
11
|
for (const channel of input.declaredChannels ?? []) {
|
|
10
12
|
rt._ensureChannel(channel);
|
|
11
13
|
}
|
|
14
|
+
rt._ensureChannel(PROGRESS);
|
|
15
|
+
if (input.onProgress) {
|
|
16
|
+
const listener = input.onProgress;
|
|
17
|
+
const progressTask = (async () => {
|
|
18
|
+
for await (const event of rt.subscribe(PROGRESS)) {
|
|
19
|
+
listener(event);
|
|
20
|
+
}
|
|
21
|
+
})();
|
|
22
|
+
rt.defer(progressTask);
|
|
23
|
+
}
|
|
12
24
|
const pluginHost = {
|
|
13
25
|
entry: rt.entry,
|
|
14
26
|
options: rt.options,
|
|
@@ -27,11 +39,23 @@ export const runCapture = async (input) => {
|
|
|
27
39
|
const mergePatch = (_ctx, patch) => mergePatchIntoFreshContext(patch);
|
|
28
40
|
let ctx = { value: {} };
|
|
29
41
|
let result;
|
|
42
|
+
const totalUnits = input.units.length;
|
|
30
43
|
try {
|
|
31
|
-
for (
|
|
44
|
+
for (let i = 0; i < input.units.length; i++) {
|
|
32
45
|
if (result) {
|
|
33
46
|
break;
|
|
34
47
|
}
|
|
48
|
+
const unit = input.units[i];
|
|
49
|
+
const unitIndex = i;
|
|
50
|
+
rt._setCurrentUnitId(unit.id);
|
|
51
|
+
rt.publish(PROGRESS, {
|
|
52
|
+
type: "unit:start",
|
|
53
|
+
unitId: unit.id,
|
|
54
|
+
unitDescription: unit.description,
|
|
55
|
+
index: unitIndex,
|
|
56
|
+
total: totalUnits
|
|
57
|
+
});
|
|
58
|
+
const unitStartTime = Date.now();
|
|
35
59
|
const baseCtx = ctx;
|
|
36
60
|
const boundPlugins = (input.plugins ?? []).filter((plugin) => {
|
|
37
61
|
const unitId = plugin.constructor?.unitId;
|
|
@@ -62,13 +86,44 @@ export const runCapture = async (input) => {
|
|
|
62
86
|
}
|
|
63
87
|
}
|
|
64
88
|
if (result) {
|
|
89
|
+
rt.publish(PROGRESS, {
|
|
90
|
+
type: "unit:end",
|
|
91
|
+
unitId: unit.id,
|
|
92
|
+
unitDescription: unit.description,
|
|
93
|
+
index: unitIndex,
|
|
94
|
+
total: totalUnits,
|
|
95
|
+
durationMs: Date.now() - unitStartTime
|
|
96
|
+
});
|
|
65
97
|
break;
|
|
66
98
|
}
|
|
67
|
-
|
|
99
|
+
let out;
|
|
100
|
+
try {
|
|
101
|
+
out = (await unit.run(baseCtx, rt)) ?? {};
|
|
102
|
+
}
|
|
103
|
+
catch (err) {
|
|
104
|
+
debugLog(`[runner] unit "${unit.id}" threw:`, err);
|
|
105
|
+
rt.publish(PROGRESS, {
|
|
106
|
+
type: "unit:end",
|
|
107
|
+
unitId: unit.id,
|
|
108
|
+
unitDescription: unit.description,
|
|
109
|
+
index: unitIndex,
|
|
110
|
+
total: totalUnits,
|
|
111
|
+
durationMs: Date.now() - unitStartTime
|
|
112
|
+
});
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
68
115
|
const unitReturnValue = out && typeof out === "object" ? out : {};
|
|
69
116
|
const merged = unit.merge(unitReturnValue, pluginContributedValue);
|
|
70
117
|
const mergedPatch = merged && typeof merged === "object" ? merged : {};
|
|
71
118
|
ctx = { value: mergePatch({ value: {} }, mergedPatch).value };
|
|
119
|
+
rt.publish(PROGRESS, {
|
|
120
|
+
type: "unit:end",
|
|
121
|
+
unitId: unit.id,
|
|
122
|
+
unitDescription: unit.description,
|
|
123
|
+
index: unitIndex,
|
|
124
|
+
total: totalUnits,
|
|
125
|
+
durationMs: Date.now() - unitStartTime
|
|
126
|
+
});
|
|
72
127
|
const terminal = mergedPatch[TERMINAL_RESULT_KEY];
|
|
73
128
|
if (terminal) {
|
|
74
129
|
result = terminal;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { UnitContext, UnitPatch, UnitRuntime } from "./contracts-bridge.js";
|
|
2
|
+
import { FileTreeUnit } from "./file-tree-unit.js";
|
|
3
|
+
/**
|
|
4
|
+
* Base class for units that produce the primary snapshot FileTree.
|
|
5
|
+
*
|
|
6
|
+
* `run()` delegates to `build()` and automatically:
|
|
7
|
+
* - Injects `snapshotType` into the returned patch.
|
|
8
|
+
* - Compiles all plugin-contributed element-replacement rules
|
|
9
|
+
* (`rt.elements.compile()`) and applies them to every HTML file
|
|
10
|
+
* in the returned FileTree. Subclasses never need to call
|
|
11
|
+
* `rt.elements.compile()` themselves.
|
|
12
|
+
*
|
|
13
|
+
* Subclasses implement `id`, `snapshotType`, and `build()`.
|
|
14
|
+
*
|
|
15
|
+
* ```ts
|
|
16
|
+
* export class MySnapshotUnit extends SnapshotUnit {
|
|
17
|
+
* readonly id = "mySnapshot";
|
|
18
|
+
* readonly snapshotType = "my-type";
|
|
19
|
+
*
|
|
20
|
+
* async build(ctx, rt) {
|
|
21
|
+
* return { files: buildFiles(), html: ctx.value.html };
|
|
22
|
+
* }
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export declare abstract class SnapshotUnit extends FileTreeUnit {
|
|
27
|
+
/** Identifier for the kind of snapshot this unit produces (e.g. "full", "main-content"). */
|
|
28
|
+
abstract readonly snapshotType: string;
|
|
29
|
+
abstract build(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
|
|
30
|
+
run(ctx: UnitContext, rt: UnitRuntime): Promise<void | UnitPatch>;
|
|
31
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { applyReplaceElementsToFileTree } from "./apply-replace-elements-to-file-tree.js";
|
|
2
|
+
import { FileTreeUnit } from "./file-tree-unit.js";
|
|
3
|
+
/**
|
|
4
|
+
* Base class for units that produce the primary snapshot FileTree.
|
|
5
|
+
*
|
|
6
|
+
* `run()` delegates to `build()` and automatically:
|
|
7
|
+
* - Injects `snapshotType` into the returned patch.
|
|
8
|
+
* - Compiles all plugin-contributed element-replacement rules
|
|
9
|
+
* (`rt.elements.compile()`) and applies them to every HTML file
|
|
10
|
+
* in the returned FileTree. Subclasses never need to call
|
|
11
|
+
* `rt.elements.compile()` themselves.
|
|
12
|
+
*
|
|
13
|
+
* Subclasses implement `id`, `snapshotType`, and `build()`.
|
|
14
|
+
*
|
|
15
|
+
* ```ts
|
|
16
|
+
* export class MySnapshotUnit extends SnapshotUnit {
|
|
17
|
+
* readonly id = "mySnapshot";
|
|
18
|
+
* readonly snapshotType = "my-type";
|
|
19
|
+
*
|
|
20
|
+
* async build(ctx, rt) {
|
|
21
|
+
* return { files: buildFiles(), html: ctx.value.html };
|
|
22
|
+
* }
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export class SnapshotUnit extends FileTreeUnit {
|
|
27
|
+
async run(ctx, rt) {
|
|
28
|
+
const patch = await this.build(ctx, rt);
|
|
29
|
+
if (!patch) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
const files = patch.files;
|
|
33
|
+
const replaceElements = await rt.elements.compile();
|
|
34
|
+
if (files && replaceElements.length > 0) {
|
|
35
|
+
const entryUrl = resolveEntryUrl(rt);
|
|
36
|
+
const updatedFiles = await applyReplaceElementsToFileTree({
|
|
37
|
+
files,
|
|
38
|
+
replaceElements,
|
|
39
|
+
entryUrl
|
|
40
|
+
});
|
|
41
|
+
return { ...patch, files: updatedFiles, snapshotType: this.snapshotType };
|
|
42
|
+
}
|
|
43
|
+
return { ...patch, snapshotType: this.snapshotType };
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
const resolveEntryUrl = (rt) => {
|
|
47
|
+
const entry = rt.entry;
|
|
48
|
+
if (entry.kind === "url") {
|
|
49
|
+
return entry.url;
|
|
50
|
+
}
|
|
51
|
+
if (entry.kind === "html-string" || entry.kind === "document") {
|
|
52
|
+
return entry.url ?? entry.baseUrl;
|
|
53
|
+
}
|
|
54
|
+
if (entry.kind === "puppeteer-page" || entry.kind === "cdp-tab") {
|
|
55
|
+
return entry.url ?? "";
|
|
56
|
+
}
|
|
57
|
+
return "";
|
|
58
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pagepocket/lib",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.0",
|
|
4
4
|
"description": "Library for rewriting HTML snapshots and inlining local resources.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -20,9 +20,9 @@
|
|
|
20
20
|
"dependencies": {
|
|
21
21
|
"cheerio": "^1.0.0-rc.12",
|
|
22
22
|
"domhandler": "^5.0.3",
|
|
23
|
-
"@pagepocket/contracts": "0.
|
|
24
|
-
"@pagepocket/
|
|
25
|
-
"@pagepocket/
|
|
23
|
+
"@pagepocket/contracts": "0.13.0",
|
|
24
|
+
"@pagepocket/shared": "0.13.0",
|
|
25
|
+
"@pagepocket/uni-fs": "0.13.0"
|
|
26
26
|
},
|
|
27
27
|
"devDependencies": {
|
|
28
28
|
"@playwright/test": "^1.50.1",
|