@pagepocket/capture-http-cdp-unit 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ import { type PagePocketContext, type PagePocketPlugin } from "@pagepocket/lib";
2
+ import { type CdpAdapterOptions } from "./internal/cdp-adapter.js";
3
+ export type CaptureHttpCdpPluginOptions = CdpAdapterOptions;
4
+ export declare class CaptureHttpCdpPlugin implements PagePocketPlugin {
5
+ readonly name = "plugin:capture-http-cdp";
6
+ private adapterOptions;
7
+ constructor(options?: CaptureHttpCdpPluginOptions);
8
+ apply(ctx: PagePocketContext): void;
9
+ }
@@ -0,0 +1,132 @@
1
+ import { createMemoryContentStore } from "@pagepocket/lib";
2
+ import { InflightTracker, networkIdle, normalizeCompletion, timeout } from "@pagepocket/lib";
3
+ import { CdpAdapter } from "./internal/cdp-adapter.js";
4
+ const headersRecordToList = (headers) => {
5
+ if (!headers)
6
+ return [];
7
+ return Object.keys(headers).map((name) => ({ name, value: headers[name] }));
8
+ };
9
+ export class CaptureHttpCdpPlugin {
10
+ constructor(options) {
11
+ this.name = "plugin:capture-http-cdp";
12
+ this.adapterOptions = options ?? {};
13
+ }
14
+ apply(ctx) {
15
+ const contentStore = createMemoryContentStore("capture-http-cdp");
16
+ const events = [];
17
+ const capabilities = {
18
+ requestHeaders: "approx",
19
+ responseHeaders: "approx",
20
+ requestBodies: false,
21
+ responseBodies: "decoded",
22
+ httpVersion: false,
23
+ remoteIp: false,
24
+ headerOrderPreserved: false
25
+ };
26
+ ctx.capture = {
27
+ events,
28
+ contentStore,
29
+ capabilities
30
+ };
31
+ const inflightTracker = new InflightTracker();
32
+ const handleNetworkEvent = async (event) => {
33
+ inflightTracker.handleEvent(event);
34
+ ctx.emitNetworkEvent?.(event);
35
+ if (event.type === "request") {
36
+ events.push({
37
+ type: "http.request",
38
+ requestId: event.requestId,
39
+ url: event.url,
40
+ method: event.method,
41
+ headers: headersRecordToList(event.headers),
42
+ timestamp: event.timestamp,
43
+ frameId: event.frameId,
44
+ resourceType: event.resourceType,
45
+ initiator: event.initiator
46
+ });
47
+ return;
48
+ }
49
+ if (event.type === "failed") {
50
+ events.push({
51
+ type: "http.failed",
52
+ requestId: event.requestId,
53
+ url: event.url,
54
+ errorText: event.errorText,
55
+ timestamp: event.timestamp
56
+ });
57
+ return;
58
+ }
59
+ const bodyRef = event.body
60
+ ? await contentStore.put(event.body, {
61
+ url: event.url,
62
+ mimeType: event.mimeType,
63
+ sizeHint: undefined
64
+ })
65
+ : undefined;
66
+ events.push({
67
+ type: "http.response",
68
+ requestId: event.requestId,
69
+ url: event.url,
70
+ status: event.status,
71
+ statusText: event.statusText,
72
+ headers: headersRecordToList(event.headers),
73
+ timestamp: event.timestamp,
74
+ mimeType: event.mimeType,
75
+ fromDiskCache: event.fromDiskCache,
76
+ fromServiceWorker: event.fromServiceWorker,
77
+ bodyRef,
78
+ bodySize: undefined
79
+ });
80
+ };
81
+ const stateKey = "captureHttpCdp.session";
82
+ ctx.onInit(async () => {
83
+ if (ctx.entry.kind !== "cdp-tab") {
84
+ throw new Error(`CaptureHttpCdpPlugin requires entry kind "cdp-tab" (got ${String(ctx.entry.kind)})`);
85
+ }
86
+ const adapter = new CdpAdapter(this.adapterOptions);
87
+ const session = await adapter.start({ kind: "cdp-tab", tabId: ctx.entry.tabId }, {
88
+ onEvent(event) {
89
+ void handleNetworkEvent(event);
90
+ },
91
+ onError(error) {
92
+ // eslint-disable-next-line no-console
93
+ console.warn("[pagepocket][capture-http-cdp] adapter error", error);
94
+ }
95
+ }, {
96
+ timeoutMs: ctx.options.timeoutMs,
97
+ maxDurationMs: ctx.options.maxDurationMs
98
+ });
99
+ ctx.state[stateKey] = session;
100
+ if (!ctx.html) {
101
+ const html = await session.waitForHtml();
102
+ ctx.setHtml(html);
103
+ }
104
+ });
105
+ ctx.onBeforeNetwork(async () => {
106
+ const session = ctx.state[stateKey];
107
+ if (!session) {
108
+ throw new Error("CaptureHttpCdpPlugin internal error: missing session");
109
+ }
110
+ await session.startCapture();
111
+ const completionStrategies = normalizeCompletion(ctx.options.completion);
112
+ const idleMs = ctx.options.timeoutMs ?? 5000;
113
+ const maxDurationMs = ctx.options.maxDurationMs;
114
+ const completion = completionStrategies.length > 0
115
+ ? completionStrategies
116
+ : [networkIdle(idleMs), ...(maxDurationMs !== undefined ? [timeout(maxDurationMs)] : [])];
117
+ if (completion.length === 1) {
118
+ await completion[0].wait({
119
+ now: () => Date.now(),
120
+ getStats: () => inflightTracker.getStats()
121
+ });
122
+ }
123
+ else {
124
+ await Promise.race(completion.map((strategy) => strategy.wait({
125
+ now: () => Date.now(),
126
+ getStats: () => inflightTracker.getStats()
127
+ })));
128
+ }
129
+ await session.stop();
130
+ });
131
+ }
132
+ }
@@ -0,0 +1,13 @@
1
+ import { Unit, type CaptureArtifacts } from "@pagepocket/lib";
2
+ import { type CdpAdapterOptions } from "./internal/cdp-adapter.js";
3
+ export type CaptureHttpCdpUnitOptions = CdpAdapterOptions;
4
+ export declare class CaptureHttpCdpUnit extends Unit {
5
+ readonly id = "captureHttpCdp";
6
+ readonly kind = "capture.http.cdp";
7
+ private adapterOptions;
8
+ constructor(options?: CaptureHttpCdpUnitOptions);
9
+ run(ctx: import("@pagepocket/lib").UnitContext, rt: import("@pagepocket/lib").UnitRuntime): Promise<{
10
+ capture: CaptureArtifacts;
11
+ html: {};
12
+ }>;
13
+ }
@@ -0,0 +1,123 @@
1
+ import { NETWORK } from "@pagepocket/contracts";
2
+ import { Unit, createMemoryContentStore, InflightTracker, mapKind, networkIdle, normalizeCompletion, throwRequiredEntryKind, timeout } from "@pagepocket/lib";
3
+ import { CdpAdapter } from "./internal/cdp-adapter.js";
4
+ const headersRecordToList = (headers) => {
5
+ if (!headers)
6
+ return [];
7
+ return Object.keys(headers).map((name) => ({ name, value: headers[name] }));
8
+ };
9
+ const targetBuilders = {
10
+ "cdp-tab": (entry) => ({
11
+ kind: "cdp-tab",
12
+ tabId: entry.tabId
13
+ })
14
+ };
15
+ export class CaptureHttpCdpUnit extends Unit {
16
+ constructor(options) {
17
+ super();
18
+ this.id = "captureHttpCdp";
19
+ this.kind = "capture.http.cdp";
20
+ this.adapterOptions = options ?? {};
21
+ }
22
+ async run(ctx, rt) {
23
+ const target = mapKind(rt.entry, targetBuilders, {
24
+ onUnsupportedKind: throwRequiredEntryKind("CaptureHttpCdpUnit", "cdp-tab")
25
+ });
26
+ const contentStore = createMemoryContentStore("capture-http-cdp");
27
+ const events = [];
28
+ const capabilities = {
29
+ requestHeaders: "approx",
30
+ responseHeaders: "approx",
31
+ requestBodies: false,
32
+ responseBodies: "decoded",
33
+ httpVersion: false,
34
+ remoteIp: false,
35
+ headerOrderPreserved: false
36
+ };
37
+ const inflightTracker = new InflightTracker();
38
+ const handleNetworkEvent = async (event) => {
39
+ inflightTracker.handleEvent(event);
40
+ rt.publish(NETWORK, event);
41
+ if (event.type === "request") {
42
+ events.push({
43
+ type: "http.request",
44
+ requestId: event.requestId,
45
+ url: event.url,
46
+ method: event.method,
47
+ headers: headersRecordToList(event.headers),
48
+ timestamp: event.timestamp,
49
+ frameId: event.frameId,
50
+ resourceType: event.resourceType,
51
+ initiator: event.initiator
52
+ });
53
+ return;
54
+ }
55
+ if (event.type === "failed") {
56
+ events.push({
57
+ type: "http.failed",
58
+ requestId: event.requestId,
59
+ url: event.url,
60
+ errorText: event.errorText,
61
+ timestamp: event.timestamp
62
+ });
63
+ return;
64
+ }
65
+ const bodyRef = event.body
66
+ ? await contentStore.put(event.body, {
67
+ url: event.url,
68
+ mimeType: event.mimeType,
69
+ sizeHint: undefined
70
+ })
71
+ : undefined;
72
+ events.push({
73
+ type: "http.response",
74
+ requestId: event.requestId,
75
+ url: event.url,
76
+ status: event.status,
77
+ statusText: event.statusText,
78
+ headers: headersRecordToList(event.headers),
79
+ timestamp: event.timestamp,
80
+ mimeType: event.mimeType,
81
+ fromDiskCache: event.fromDiskCache,
82
+ fromServiceWorker: event.fromServiceWorker,
83
+ bodyRef,
84
+ bodySize: undefined
85
+ });
86
+ };
87
+ const capture = { events, contentStore, capabilities };
88
+ const adapter = new CdpAdapter(this.adapterOptions);
89
+ const session = await adapter.start(target, {
90
+ onEvent: (event) => {
91
+ void handleNetworkEvent(event);
92
+ },
93
+ onError: (error) => {
94
+ console.warn("[pagepocket][capture-http-cdp] adapter error", error);
95
+ }
96
+ }, {
97
+ timeoutMs: rt.options.timeoutMs,
98
+ maxDurationMs: rt.options.maxDurationMs
99
+ });
100
+ const html = ctx.value.html ?? (await session.waitForHtml());
101
+ await session.startCapture();
102
+ const completionStrategies = normalizeCompletion(rt.options.completion);
103
+ const idleMs = rt.options.timeoutMs ?? 5000;
104
+ const maxDurationMs = rt.options.maxDurationMs;
105
+ const completion = completionStrategies.length > 0
106
+ ? completionStrategies
107
+ : [networkIdle(idleMs), ...(maxDurationMs !== undefined ? [timeout(maxDurationMs)] : [])];
108
+ if (completion.length === 1) {
109
+ await completion[0].wait({
110
+ now: () => Date.now(),
111
+ getStats: () => inflightTracker.getStats()
112
+ });
113
+ }
114
+ else {
115
+ await Promise.race(completion.map((strategy) => strategy.wait({
116
+ now: () => Date.now(),
117
+ getStats: () => inflightTracker.getStats()
118
+ })));
119
+ }
120
+ await session.stop();
121
+ return { capture, html };
122
+ }
123
+ }
@@ -0,0 +1,2 @@
1
+ export { CaptureHttpCdpUnit } from "./capture-http-cdp-unit.js";
2
+ export type { CaptureHttpCdpUnitOptions } from "./capture-http-cdp-unit.js";
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ export { CaptureHttpCdpUnit } from "./capture-http-cdp-unit.js";
@@ -0,0 +1,29 @@
1
+ import type { InterceptSession, InterceptTarget, NetworkEventHandlers, NetworkInterceptorAdapter, TriggerAction } from "@pagepocket/lib";
2
+ import type { CdpClient } from "./types.js";
3
+ export type CdpAdapterOptions = {
4
+ protocolVersion?: string;
5
+ clientFactory?: (options?: {
6
+ tabId: number;
7
+ protocolVersion: string;
8
+ }) => Promise<CdpClient> | CdpClient;
9
+ triggerActions?: TriggerAction[];
10
+ };
11
+ /**
12
+ * Internal adapter for the capture plugin.
13
+ *
14
+ * Note: this is intentionally NOT published as a separate package.
15
+ */
16
+ export declare class CdpAdapter implements NetworkInterceptorAdapter {
17
+ readonly name = "cdp";
18
+ readonly capabilities: {
19
+ canGetResponseBody: boolean;
20
+ canStreamResponseBody: boolean;
21
+ canGetRequestBody: boolean;
22
+ providesResourceType: boolean;
23
+ canWaitForHtml: boolean;
24
+ supportsStagedCapture: boolean;
25
+ };
26
+ private options;
27
+ constructor(options?: CdpAdapterOptions);
28
+ start(target: InterceptTarget, handlers: NetworkEventHandlers, _options?: Record<string, unknown>): Promise<InterceptSession>;
29
+ }
@@ -0,0 +1,40 @@
1
+ import { createChromeDebuggerClient } from "./chrome-debugger-client.js";
2
+ import { createCdpSession } from "./session.js";
3
+ /**
4
+ * Internal adapter for the capture plugin.
5
+ *
6
+ * Note: this is intentionally NOT published as a separate package.
7
+ */
8
+ export class CdpAdapter {
9
+ constructor(options = {}) {
10
+ this.name = "cdp";
11
+ this.capabilities = {
12
+ canGetResponseBody: true,
13
+ canStreamResponseBody: false,
14
+ canGetRequestBody: false,
15
+ providesResourceType: true,
16
+ canWaitForHtml: true,
17
+ supportsStagedCapture: true
18
+ };
19
+ this.options = options;
20
+ }
21
+ async start(target, handlers, _options) {
22
+ if (target.kind !== "cdp-tab") {
23
+ throw new Error("CdpAdapter only supports cdp-tab targets.");
24
+ }
25
+ const protocolVersion = this.options.protocolVersion ?? "1.3";
26
+ const clientFactory = this.options.clientFactory;
27
+ const client = (await clientFactory?.({ tabId: target.tabId, protocolVersion })) ??
28
+ createChromeDebuggerClient({ tabId: target.tabId }, protocolVersion);
29
+ const ownsClient = true;
30
+ const base = await createCdpSession({
31
+ client,
32
+ handlers,
33
+ ownsClient,
34
+ triggerActions: this.options.triggerActions
35
+ });
36
+ // `cdp-tab` sessions already support `waitForHtml()` and `startCapture()`.
37
+ // This wrapper exists to keep this adapter entry point contained.
38
+ return base;
39
+ }
40
+ }
@@ -0,0 +1,11 @@
1
+ import type { ResourceType } from "@pagepocket/lib";
2
+ import type { CdpClient } from "./types.js";
3
+ export declare const decodeBase64: (input: string) => Uint8Array<ArrayBuffer>;
4
+ export declare const encodeUtf8: (input: string) => Uint8Array<ArrayBuffer>;
5
+ export declare const normalizeHeaders: (headers?: Record<string, unknown>) => Record<string, string>;
6
+ export declare const mapResourceType: (input?: string) => ResourceType | undefined;
7
+ export declare const inferResourceTypeFromMime: (mimeType?: string) => ResourceType | undefined;
8
+ export declare const callCdp: <T>(client: CdpClient, method: string, params?: Record<string, unknown>) => Promise<T>;
9
+ export declare const subscribe: (client: CdpClient, eventName: string, handler: (payload: unknown) => void) => () => void | undefined;
10
+ export declare const isNoBodyError: (error: unknown) => boolean;
11
+ export declare const logInfo: (label: string, data: Record<string, unknown>) => void;
@@ -0,0 +1,112 @@
1
+ export const decodeBase64 = (input) => {
2
+ const bufferCtor = globalThis.Buffer;
3
+ if (bufferCtor) {
4
+ return new Uint8Array(bufferCtor.from(input, "base64"));
5
+ }
6
+ const binary = atob(input);
7
+ const bytes = new Uint8Array(binary.length);
8
+ for (let i = 0; i < binary.length; i += 1) {
9
+ bytes[i] = binary.charCodeAt(i);
10
+ }
11
+ return bytes;
12
+ };
13
+ export const encodeUtf8 = (input) => new TextEncoder().encode(input);
14
+ export const normalizeHeaders = (headers) => {
15
+ const output = {};
16
+ if (!headers)
17
+ return output;
18
+ for (const key of Object.keys(headers)) {
19
+ const value = headers[key];
20
+ if (value === undefined || value === null)
21
+ continue;
22
+ output[key] = Array.isArray(value) ? value.join(", ") : String(value);
23
+ }
24
+ return output;
25
+ };
26
+ export const mapResourceType = (input) => {
27
+ if (!input)
28
+ return undefined;
29
+ const normalized = input.toLowerCase();
30
+ switch (normalized) {
31
+ case "document":
32
+ return "document";
33
+ case "stylesheet":
34
+ return "stylesheet";
35
+ case "script":
36
+ return "script";
37
+ case "image":
38
+ return "image";
39
+ case "font":
40
+ return "font";
41
+ case "media":
42
+ return "media";
43
+ case "xhr":
44
+ return "xhr";
45
+ case "fetch":
46
+ return "fetch";
47
+ case "other":
48
+ return "other";
49
+ default:
50
+ return normalized;
51
+ }
52
+ };
53
+ export const inferResourceTypeFromMime = (mimeType) => {
54
+ if (!mimeType)
55
+ return undefined;
56
+ const normalized = mimeType.toLowerCase();
57
+ if (normalized.includes("text/html") || normalized.includes("application/xhtml+xml")) {
58
+ return "document";
59
+ }
60
+ if (normalized.includes("text/css")) {
61
+ return "stylesheet";
62
+ }
63
+ if (normalized.includes("javascript") || normalized.includes("ecmascript")) {
64
+ return "script";
65
+ }
66
+ if (normalized.startsWith("image/")) {
67
+ return "image";
68
+ }
69
+ if (normalized.startsWith("font/") ||
70
+ normalized.includes("woff") ||
71
+ normalized.includes("ttf") ||
72
+ normalized.includes("otf")) {
73
+ return "font";
74
+ }
75
+ if (normalized.startsWith("audio/") || normalized.startsWith("video/")) {
76
+ return "media";
77
+ }
78
+ return undefined;
79
+ };
80
+ export const callCdp = async (client, method, params) => {
81
+ if (typeof client.send === "function") {
82
+ return (await client.send(method, params));
83
+ }
84
+ const [domain, command] = method.split(".");
85
+ const domainApi = client[domain];
86
+ const fn = domainApi?.[command];
87
+ if (typeof fn === "function") {
88
+ return (await fn(params));
89
+ }
90
+ throw new Error(`CDP session missing method ${method}.`);
91
+ };
92
+ export const subscribe = (client, eventName, handler) => {
93
+ if (typeof client.on === "function") {
94
+ client.on(eventName, handler);
95
+ return () => client.off?.(eventName, handler);
96
+ }
97
+ const [domain, event] = eventName.split(".");
98
+ const domainApi = client[domain];
99
+ const fn = domainApi?.[event];
100
+ if (typeof fn === "function") {
101
+ fn(handler);
102
+ return () => undefined;
103
+ }
104
+ throw new Error(`CDP session missing event ${eventName}.`);
105
+ };
106
+ export const isNoBodyError = (error) => {
107
+ const message = error instanceof Error ? error.message : String(error);
108
+ return message.includes("No data found for resource with given identifier");
109
+ };
110
+ export const logInfo = (label, data) => {
111
+ console.info(`[pagepocket][cdp-adapter] ${label} ${JSON.stringify(data)}`);
112
+ };
@@ -0,0 +1,2 @@
1
+ import type { CdpClient, ChromeDebuggerTarget } from "./types.js";
2
+ export declare const createChromeDebuggerClient: (target: ChromeDebuggerTarget, protocolVersion: string) => CdpClient;
@@ -0,0 +1,89 @@
1
+ const getChromeGlobal = () => globalThis.chrome;
2
+ export const createChromeDebuggerClient = (target, protocolVersion) => {
3
+ const chromeGlobal = getChromeGlobal();
4
+ const chromeDebugger = chromeGlobal?.debugger;
5
+ if (!chromeDebugger) {
6
+ throw new Error("chrome.debugger API is not available in this environment.");
7
+ }
8
+ const chromeRuntime = chromeGlobal?.runtime;
9
+ let attached = false;
10
+ let closed = false;
11
+ const listeners = new Map();
12
+ const assertNoLastError = (action) => {
13
+ const lastError = chromeRuntime?.lastError;
14
+ if (lastError?.message) {
15
+ throw new Error(`${action} failed: ${lastError.message}`);
16
+ }
17
+ };
18
+ const attach = () => new Promise((resolve, reject) => {
19
+ chromeDebugger.attach(target, protocolVersion, () => {
20
+ try {
21
+ assertNoLastError("chrome.debugger.attach");
22
+ attached = true;
23
+ resolve();
24
+ }
25
+ catch (error) {
26
+ reject(error);
27
+ }
28
+ });
29
+ });
30
+ const detach = () => new Promise((resolve, reject) => {
31
+ chromeDebugger.detach(target, () => {
32
+ try {
33
+ assertNoLastError("chrome.debugger.detach");
34
+ attached = false;
35
+ resolve();
36
+ }
37
+ catch (error) {
38
+ reject(error);
39
+ }
40
+ });
41
+ });
42
+ const ensureAttached = async () => {
43
+ if (attached)
44
+ return;
45
+ await attach();
46
+ };
47
+ return {
48
+ send: async (method, params) => {
49
+ await ensureAttached();
50
+ return new Promise((resolve, reject) => {
51
+ chromeDebugger.sendCommand(target, method, params ?? {}, (result) => {
52
+ try {
53
+ assertNoLastError(`chrome.debugger.sendCommand(${method})`);
54
+ resolve(result);
55
+ }
56
+ catch (error) {
57
+ reject(error);
58
+ }
59
+ });
60
+ });
61
+ },
62
+ on: (event, listener) => {
63
+ const handler = (source, method, params) => {
64
+ if (source.tabId !== target.tabId)
65
+ return;
66
+ if (method !== event)
67
+ return;
68
+ listener(params);
69
+ };
70
+ listeners.set(listener, handler);
71
+ chromeDebugger.onEvent.addListener(handler);
72
+ },
73
+ off: (_event, listener) => {
74
+ const handler = listeners.get(listener);
75
+ if (!handler)
76
+ return;
77
+ listeners.delete(listener);
78
+ chromeDebugger.onEvent.removeListener(handler);
79
+ },
80
+ close: async () => {
81
+ if (closed)
82
+ return;
83
+ closed = true;
84
+ if (!attached)
85
+ return;
86
+ await detach();
87
+ }
88
+ };
89
+ };
@@ -0,0 +1,6 @@
1
+ import { type NetworkEventHandlers, type TriggerAction } from "@pagepocket/lib";
2
+ import type { CdpClient } from "./types.js";
3
+ export declare const createPageActions: (client: CdpClient, handlers: NetworkEventHandlers) => {
4
+ ensurePageEnabled: () => Promise<void>;
5
+ runTriggerActions: (actions?: TriggerAction[]) => Promise<void>;
6
+ };
@@ -0,0 +1,58 @@
1
+ import { TriggerActionValues } from "@pagepocket/lib";
2
+ import { callCdp } from "./cdp-utils.js";
3
+ export const createPageActions = (client, handlers) => {
4
+ const ensurePageEnabled = async () => {
5
+ try {
6
+ await callCdp(client, "Page.enable");
7
+ }
8
+ catch {
9
+ // Page domain may be unavailable; ignore.
10
+ }
11
+ };
12
+ const runTriggerActions = async (actions = []) => {
13
+ if (actions.length === 0) {
14
+ return;
15
+ }
16
+ await ensurePageEnabled();
17
+ for (const action of actions) {
18
+ if (action === TriggerActionValues.HOVER) {
19
+ try {
20
+ await callCdp(client, "Runtime.evaluate", {
21
+ expression: `(() => {
22
+ const elements = Array.from(document.querySelectorAll('*'));
23
+ for (const el of elements) {
24
+ const rect = el.getBoundingClientRect();
25
+ const x = rect.left + rect.width / 2;
26
+ const y = rect.top + rect.height / 2;
27
+ const ev = new MouseEvent('mouseover', { bubbles: true, cancelable: true, clientX: x, clientY: y });
28
+ el.dispatchEvent(ev);
29
+ }
30
+ })();`
31
+ });
32
+ }
33
+ catch (error) {
34
+ handlers.onError?.(error);
35
+ }
36
+ }
37
+ if (action === TriggerActionValues.SCROLL_TO_END) {
38
+ try {
39
+ await callCdp(client, "Runtime.evaluate", {
40
+ expression: `(() => {
41
+ const scrollHeight = document.documentElement?.scrollHeight ?? document.body?.scrollHeight;
42
+ if (typeof scrollHeight === 'number') {
43
+ window.scrollTo({ top: scrollHeight, behavior: 'instant' });
44
+ }
45
+ })();`
46
+ });
47
+ }
48
+ catch (error) {
49
+ handlers.onError?.(error);
50
+ }
51
+ }
52
+ }
53
+ };
54
+ return {
55
+ ensurePageEnabled,
56
+ runTriggerActions
57
+ };
58
+ };
@@ -0,0 +1,16 @@
1
+ type HtmlArtifact = {
2
+ htmlString: string;
3
+ baseUrl: string;
4
+ url?: string;
5
+ contentType?: string;
6
+ };
7
+ export declare const createHtmlMilestone: () => {
8
+ htmlPromise: Promise<never>;
9
+ tryResolveHtml: (input: {
10
+ url: string;
11
+ contentType?: string;
12
+ bodyBytes: Uint8Array | null;
13
+ }) => void;
14
+ getHtmlArtifact: () => HtmlArtifact | null;
15
+ };
16
+ export {};
@@ -0,0 +1,29 @@
1
+ export const createHtmlMilestone = () => {
2
+ let htmlArtifact = null;
3
+ let resolveHtml = null;
4
+ const htmlPromise = new Promise((resolve, _reject) => {
5
+ resolveHtml = resolve;
6
+ });
7
+ const tryResolveHtml = (input) => {
8
+ if (htmlArtifact) {
9
+ return;
10
+ }
11
+ if (!input.bodyBytes || input.bodyBytes.byteLength === 0) {
12
+ return;
13
+ }
14
+ const htmlString = new TextDecoder().decode(input.bodyBytes);
15
+ htmlArtifact = {
16
+ htmlString,
17
+ baseUrl: input.url,
18
+ url: input.url,
19
+ contentType: input.contentType
20
+ };
21
+ resolveHtml?.(htmlArtifact);
22
+ resolveHtml = null;
23
+ };
24
+ return {
25
+ htmlPromise,
26
+ tryResolveHtml,
27
+ getHtmlArtifact: () => htmlArtifact
28
+ };
29
+ };
@@ -0,0 +1,27 @@
1
+ import type { NetworkEventHandlers, NetworkRequestEvent } from "@pagepocket/lib";
2
+ import type { RequestInfo, RequestWillBeSent, ResponseReceived } from "../types.js";
3
+ export declare const createRequestState: (handlers: NetworkEventHandlers) => {
4
+ activeRequestId: Map<string, string>;
5
+ requestUrls: Map<string, string>;
6
+ requestInfo: Map<string, RequestInfo>;
7
+ requestEvents: Map<string, NetworkRequestEvent>;
8
+ getLogicalRequestId: (cdpRequestId: string) => string;
9
+ handleRequestWillBeSent: (payload: RequestWillBeSent, timestampMs: number) => {
10
+ cdpRequestId: string;
11
+ logicalRequestId: string;
12
+ };
13
+ handleResponseReceived: (payload: ResponseReceived, timestampMs: number) => {
14
+ cdpRequestId: string;
15
+ logicalRequestId: string;
16
+ response: {
17
+ url: string;
18
+ status: number;
19
+ statusText?: string;
20
+ headers?: Record<string, unknown>;
21
+ mimeType?: string;
22
+ fromDiskCache?: boolean;
23
+ fromServiceWorker?: boolean;
24
+ };
25
+ resolvedType: import("@pagepocket/lib").ResourceType | undefined;
26
+ };
27
+ };
@@ -0,0 +1,110 @@
1
+ import { inferResourceTypeFromMime, mapResourceType, normalizeHeaders } from "../cdp-utils.js";
2
+ export const createRequestState = (handlers) => {
3
+ const activeRequestId = new Map();
4
+ const requestSequence = new Map();
5
+ const requestUrls = new Map();
6
+ const requestInfo = new Map();
7
+ const requestEvents = new Map();
8
+ const getLogicalRequestId = (cdpRequestId) => activeRequestId.get(cdpRequestId) ?? `${cdpRequestId}:0`;
9
+ const handleRequestWillBeSent = (payload, timestampMs) => {
10
+ const cdpRequestId = payload.requestId;
11
+ if (payload.redirectResponse) {
12
+ const previousRequestId = getLogicalRequestId(cdpRequestId);
13
+ const redirectResponse = payload.redirectResponse;
14
+ const redirectEvent = {
15
+ type: "response",
16
+ requestId: previousRequestId,
17
+ url: redirectResponse.url,
18
+ status: redirectResponse.status,
19
+ statusText: redirectResponse.statusText ?? "",
20
+ headers: normalizeHeaders(redirectResponse.headers),
21
+ mimeType: redirectResponse.mimeType,
22
+ fromDiskCache: redirectResponse.fromDiskCache,
23
+ fromServiceWorker: redirectResponse.fromServiceWorker,
24
+ timestamp: timestampMs
25
+ };
26
+ handlers.onEvent(redirectEvent);
27
+ }
28
+ const sequence = (requestSequence.get(cdpRequestId) ?? -1) + 1;
29
+ requestSequence.set(cdpRequestId, sequence);
30
+ const logicalRequestId = `${cdpRequestId}:${sequence}`;
31
+ activeRequestId.set(cdpRequestId, logicalRequestId);
32
+ const url = payload.request.url;
33
+ requestUrls.set(logicalRequestId, url);
34
+ requestInfo.set(cdpRequestId, {
35
+ url,
36
+ frameId: payload.frameId,
37
+ resourceType: mapResourceType(payload.type),
38
+ initiator: payload.initiator
39
+ });
40
+ const requestEvent = {
41
+ type: "request",
42
+ requestId: logicalRequestId,
43
+ url,
44
+ method: payload.request.method || "GET",
45
+ headers: normalizeHeaders(payload.request.headers),
46
+ frameId: payload.frameId,
47
+ resourceType: mapResourceType(payload.type),
48
+ initiator: payload.initiator,
49
+ timestamp: timestampMs
50
+ };
51
+ requestEvents.set(cdpRequestId, requestEvent);
52
+ handlers.onEvent(requestEvent);
53
+ return { cdpRequestId, logicalRequestId };
54
+ };
55
+ const handleResponseReceived = (payload, timestampMs) => {
56
+ const cdpRequestId = payload.requestId;
57
+ const logicalRequestId = getLogicalRequestId(cdpRequestId);
58
+ const response = payload.response;
59
+ if (!requestUrls.has(logicalRequestId)) {
60
+ requestUrls.set(logicalRequestId, response.url);
61
+ }
62
+ const storedRequest = requestEvents.get(cdpRequestId);
63
+ const existingInfo = requestInfo.get(cdpRequestId);
64
+ const responseType = mapResourceType(payload.type);
65
+ const inferred = inferResourceTypeFromMime(response.mimeType);
66
+ const resolvedType = responseType ?? inferred;
67
+ if ((!existingInfo || !existingInfo.resourceType) && resolvedType) {
68
+ requestInfo.set(cdpRequestId, {
69
+ url: response.url,
70
+ frameId: existingInfo?.frameId ?? payload.frameId,
71
+ resourceType: resolvedType,
72
+ initiator: existingInfo?.initiator
73
+ });
74
+ }
75
+ if (!storedRequest && resolvedType) {
76
+ const synthesizedRequest = {
77
+ type: "request",
78
+ requestId: logicalRequestId,
79
+ url: response.url,
80
+ method: "GET",
81
+ headers: {},
82
+ frameId: payload.frameId,
83
+ resourceType: resolvedType,
84
+ initiator: undefined,
85
+ timestamp: timestampMs
86
+ };
87
+ requestEvents.set(cdpRequestId, synthesizedRequest);
88
+ handlers.onEvent(synthesizedRequest);
89
+ }
90
+ else if (storedRequest && resolvedType && !storedRequest.resourceType) {
91
+ const updatedRequest = {
92
+ ...storedRequest,
93
+ resourceType: resolvedType,
94
+ timestamp: timestampMs
95
+ };
96
+ requestEvents.set(cdpRequestId, updatedRequest);
97
+ handlers.onEvent(updatedRequest);
98
+ }
99
+ return { cdpRequestId, logicalRequestId, response, resolvedType };
100
+ };
101
+ return {
102
+ activeRequestId,
103
+ requestUrls,
104
+ requestInfo,
105
+ requestEvents,
106
+ getLogicalRequestId,
107
+ handleRequestWillBeSent,
108
+ handleResponseReceived
109
+ };
110
+ };
@@ -0,0 +1,6 @@
1
+ export declare const createTimestampResolver: () => {
2
+ resolveTimestampMs: (payload: {
3
+ timestamp?: number;
4
+ wallTime?: number;
5
+ }, requestId?: string) => number;
6
+ };
@@ -0,0 +1,24 @@
1
+ export const createTimestampResolver = () => {
2
+ const requestTimeOffsets = new Map();
3
+ let globalTimeOffset = null;
4
+ const resolveTimestampMs = (payload, requestId) => {
5
+ if (typeof payload.wallTime === "number") {
6
+ if (typeof payload.timestamp === "number") {
7
+ const offset = payload.wallTime - payload.timestamp;
8
+ if (requestId) {
9
+ requestTimeOffsets.set(requestId, offset);
10
+ }
11
+ globalTimeOffset = offset;
12
+ }
13
+ return payload.wallTime * 1000;
14
+ }
15
+ if (typeof payload.timestamp === "number") {
16
+ const offset = (requestId ? requestTimeOffsets.get(requestId) : undefined) ?? globalTimeOffset;
17
+ if (typeof offset === "number") {
18
+ return (payload.timestamp + offset) * 1000;
19
+ }
20
+ }
21
+ return Date.now();
22
+ };
23
+ return { resolveTimestampMs };
24
+ };
@@ -0,0 +1,10 @@
1
+ import type { InterceptSession, NetworkEventHandlers, TriggerAction } from "@pagepocket/lib";
2
+ import type { CdpClient } from "./types.js";
3
+ type CreateSessionOptions = {
4
+ client: CdpClient;
5
+ handlers: NetworkEventHandlers;
6
+ ownsClient: boolean;
7
+ triggerActions?: TriggerAction[];
8
+ };
9
+ export declare const createCdpSession: ({ client, handlers, ownsClient, triggerActions }: CreateSessionOptions) => Promise<InterceptSession>;
10
+ export {};
@@ -0,0 +1,191 @@
1
+ import { callCdp, decodeBase64, encodeUtf8, inferResourceTypeFromMime, isNoBodyError, logInfo, normalizeHeaders, subscribe } from "./cdp-utils.js";
2
+ import { createPageActions } from "./page-actions.js";
3
+ import { createHtmlMilestone } from "./session/html-milestone.js";
4
+ import { createRequestState } from "./session/request-state.js";
5
+ import { createTimestampResolver } from "./session/time.js";
6
+ export const createCdpSession = async ({ client, handlers, ownsClient, triggerActions }) => {
7
+ await callCdp(client, "Network.enable");
8
+ const { ensurePageEnabled, runTriggerActions } = createPageActions(client, handlers);
9
+ const { htmlPromise, tryResolveHtml } = createHtmlMilestone();
10
+ const requestState = createRequestState(handlers);
11
+ const responses = new Map();
12
+ const { resolveTimestampMs } = createTimestampResolver();
13
+ const handleRequestWillBeSent = (payload) => {
14
+ const cdpRequestId = payload.requestId;
15
+ const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
16
+ requestState.handleRequestWillBeSent(payload, eventTimestamp);
17
+ };
18
+ const handleResponseReceived = (payload) => {
19
+ const cdpRequestId = payload.requestId;
20
+ const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
21
+ const result = requestState.handleResponseReceived(payload, eventTimestamp);
22
+ logInfo("response received", {
23
+ requestId: cdpRequestId,
24
+ url: result.response.url,
25
+ status: result.response.status,
26
+ mimeType: result.response.mimeType,
27
+ fromDiskCache: result.response.fromDiskCache,
28
+ fromServiceWorker: result.response.fromServiceWorker
29
+ });
30
+ responses.set(cdpRequestId, {
31
+ requestId: result.logicalRequestId,
32
+ response: result.response
33
+ });
34
+ };
35
+ const tryGetResponseBody = async (cdpRequestId) => {
36
+ try {
37
+ const result = await callCdp(client, "Network.getResponseBody", { requestId: cdpRequestId });
38
+ if (result.base64Encoded) {
39
+ return decodeBase64(result.body);
40
+ }
41
+ return encodeUtf8(result.body);
42
+ }
43
+ catch (error) {
44
+ if (isNoBodyError(error)) {
45
+ return null;
46
+ }
47
+ throw error;
48
+ }
49
+ };
50
+ const tryGetPageResourceContent = async (info) => {
51
+ if (!info.frameId || !info.url) {
52
+ return null;
53
+ }
54
+ try {
55
+ const result = await callCdp(client, "Page.getResourceContent", {
56
+ frameId: info.frameId,
57
+ url: info.url
58
+ });
59
+ if (result.base64Encoded) {
60
+ return decodeBase64(result.content);
61
+ }
62
+ return encodeUtf8(result.content);
63
+ }
64
+ catch {
65
+ return null;
66
+ }
67
+ };
68
+ const handleLoadingFinished = async (payload) => {
69
+ const cdpRequestId = payload.requestId;
70
+ const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
71
+ const storedResponse = responses.get(cdpRequestId);
72
+ if (!storedResponse) {
73
+ logInfo("loadingFinished without response", {
74
+ requestId: cdpRequestId
75
+ });
76
+ return;
77
+ }
78
+ const inferred = inferResourceTypeFromMime(storedResponse.response.mimeType);
79
+ const infoFromMime = requestState.requestInfo.get(cdpRequestId);
80
+ if ((!infoFromMime || !infoFromMime.resourceType) && inferred) {
81
+ requestState.requestInfo.set(cdpRequestId, {
82
+ url: storedResponse.response.url,
83
+ frameId: infoFromMime?.frameId,
84
+ resourceType: inferred,
85
+ initiator: infoFromMime?.initiator
86
+ });
87
+ const storedRequest = requestState.requestEvents.get(cdpRequestId);
88
+ if (storedRequest && !storedRequest.resourceType) {
89
+ const updatedRequest = {
90
+ ...storedRequest,
91
+ resourceType: inferred
92
+ };
93
+ requestState.requestEvents.set(cdpRequestId, updatedRequest);
94
+ handlers.onEvent(updatedRequest);
95
+ }
96
+ }
97
+ let bodyBytes = null;
98
+ try {
99
+ bodyBytes = await tryGetResponseBody(cdpRequestId);
100
+ }
101
+ catch (error) {
102
+ handlers.onError?.(error instanceof Error ? error : new Error(String(error)));
103
+ }
104
+ if (bodyBytes && bodyBytes.byteLength === 0) {
105
+ bodyBytes = null;
106
+ }
107
+ if (!bodyBytes) {
108
+ const fallbackInfo = requestState.requestInfo.get(cdpRequestId);
109
+ if (fallbackInfo?.frameId) {
110
+ bodyBytes = await tryGetPageResourceContent(fallbackInfo);
111
+ }
112
+ }
113
+ logInfo("response body status", {
114
+ requestId: cdpRequestId,
115
+ url: storedResponse.response.url,
116
+ resourceType: requestState.requestInfo.get(cdpRequestId)?.resourceType,
117
+ bodyBytes: bodyBytes ? bodyBytes.byteLength : 0
118
+ });
119
+ const responseEvent = {
120
+ type: "response",
121
+ requestId: storedResponse.requestId,
122
+ url: storedResponse.response.url,
123
+ status: storedResponse.response.status,
124
+ statusText: storedResponse.response.statusText,
125
+ headers: normalizeHeaders(storedResponse.response.headers),
126
+ mimeType: storedResponse.response.mimeType,
127
+ fromDiskCache: storedResponse.response.fromDiskCache,
128
+ fromServiceWorker: storedResponse.response.fromServiceWorker,
129
+ timestamp: eventTimestamp,
130
+ body: bodyBytes ? { kind: "buffer", data: bodyBytes } : undefined
131
+ };
132
+ handlers.onEvent(responseEvent);
133
+ const infoForHtml = requestState.requestInfo.get(cdpRequestId);
134
+ if (infoForHtml?.resourceType === "document") {
135
+ tryResolveHtml({
136
+ url: storedResponse.response.url,
137
+ contentType: storedResponse.response.mimeType,
138
+ bodyBytes
139
+ });
140
+ }
141
+ };
142
+ const handleLoadingFailed = (payload) => {
143
+ const cdpRequestId = payload.requestId;
144
+ const logicalRequestId = requestState.getLogicalRequestId(cdpRequestId);
145
+ const url = requestState.requestUrls.get(logicalRequestId) ?? "";
146
+ const failedEvent = {
147
+ type: "failed",
148
+ requestId: logicalRequestId,
149
+ url,
150
+ errorText: payload.errorText,
151
+ timestamp: resolveTimestampMs(payload, cdpRequestId)
152
+ };
153
+ handlers.onEvent(failedEvent);
154
+ };
155
+ const cleanupHandlers = [];
156
+ try {
157
+ cleanupHandlers.push(subscribe(client, "Network.requestWillBeSent", (payload) => handleRequestWillBeSent(payload)));
158
+ cleanupHandlers.push(subscribe(client, "Network.responseReceived", (payload) => handleResponseReceived(payload)));
159
+ cleanupHandlers.push(subscribe(client, "Network.loadingFailed", (payload) => handleLoadingFailed(payload)));
160
+ cleanupHandlers.push(subscribe(client, "Network.loadingFinished", (payload) => {
161
+ void handleLoadingFinished(payload);
162
+ }));
163
+ }
164
+ catch (error) {
165
+ if (error instanceof Error) {
166
+ handlers.onError?.(error);
167
+ }
168
+ throw error;
169
+ }
170
+ return {
171
+ navigate: async (url) => {
172
+ await ensurePageEnabled();
173
+ await callCdp(client, "Page.navigate", { url });
174
+ },
175
+ waitForHtml: async () => {
176
+ return htmlPromise;
177
+ },
178
+ startCapture: async () => {
179
+ await runTriggerActions(triggerActions);
180
+ },
181
+ stop: async () => {
182
+ for (const cleanup of cleanupHandlers) {
183
+ cleanup();
184
+ }
185
+ await callCdp(client, "Network.disable").catch(() => undefined);
186
+ if (ownsClient) {
187
+ await client.close?.();
188
+ }
189
+ }
190
+ };
191
+ };
@@ -0,0 +1,113 @@
1
+ import type { ResourceType } from "@pagepocket/lib";
2
+ export type ChromeDebuggerTarget = {
3
+ tabId: number;
4
+ };
5
+ export type ChromeDebuggerEvent = (source: ChromeDebuggerTarget, method: string, params?: Record<string, unknown>) => void;
6
+ export type ChromeDebuggerApi = {
7
+ attach: (target: ChromeDebuggerTarget, version: string, callback: () => void) => void;
8
+ detach: (target: ChromeDebuggerTarget, callback: () => void) => void;
9
+ sendCommand: (target: ChromeDebuggerTarget, method: string, params: Record<string, unknown>, callback: (result?: unknown) => void) => void;
10
+ onEvent: {
11
+ addListener: (listener: ChromeDebuggerEvent) => void;
12
+ removeListener: (listener: ChromeDebuggerEvent) => void;
13
+ };
14
+ };
15
+ export type ChromeRuntimeApi = {
16
+ lastError?: {
17
+ message?: string;
18
+ };
19
+ };
20
+ export type ChromeGlobal = {
21
+ debugger?: ChromeDebuggerApi;
22
+ runtime?: ChromeRuntimeApi;
23
+ };
24
+ export type CdpClient = {
25
+ send?: (method: string, params?: Record<string, unknown>) => Promise<unknown>;
26
+ on?: (event: string, listener: (payload: unknown) => void) => void;
27
+ off?: (event: string, listener: (payload: unknown) => void) => void;
28
+ close?: () => Promise<void>;
29
+ Network?: {
30
+ enable?: (params?: Record<string, unknown>) => Promise<void>;
31
+ disable?: () => Promise<void>;
32
+ getResponseBody?: (params: {
33
+ requestId: string;
34
+ }) => Promise<{
35
+ body: string;
36
+ base64Encoded?: boolean;
37
+ }>;
38
+ requestWillBeSent?: (listener: (payload: unknown) => void) => void;
39
+ responseReceived?: (listener: (payload: unknown) => void) => void;
40
+ loadingFailed?: (listener: (payload: unknown) => void) => void;
41
+ loadingFinished?: (listener: (payload: unknown) => void) => void;
42
+ };
43
+ Page?: {
44
+ enable?: () => Promise<void>;
45
+ navigate?: (params: {
46
+ url: string;
47
+ }) => Promise<void>;
48
+ getResourceContent?: (params: {
49
+ frameId: string;
50
+ url: string;
51
+ }) => Promise<{
52
+ content: string;
53
+ base64Encoded?: boolean;
54
+ }>;
55
+ };
56
+ };
57
+ export type RequestWillBeSent = {
58
+ requestId: string;
59
+ frameId?: string;
60
+ timestamp?: number;
61
+ wallTime?: number;
62
+ type?: string;
63
+ initiator?: {
64
+ type?: string;
65
+ url?: string;
66
+ };
67
+ request: {
68
+ url: string;
69
+ method: string;
70
+ headers?: Record<string, unknown>;
71
+ };
72
+ redirectResponse?: ResponseReceived["response"];
73
+ };
74
+ export type ResponseReceived = {
75
+ requestId: string;
76
+ frameId?: string;
77
+ timestamp?: number;
78
+ wallTime?: number;
79
+ type?: string;
80
+ response: {
81
+ url: string;
82
+ status: number;
83
+ statusText?: string;
84
+ headers?: Record<string, unknown>;
85
+ mimeType?: string;
86
+ fromDiskCache?: boolean;
87
+ fromServiceWorker?: boolean;
88
+ };
89
+ };
90
+ export type LoadingFailed = {
91
+ requestId: string;
92
+ timestamp?: number;
93
+ wallTime?: number;
94
+ errorText: string;
95
+ };
96
+ export type LoadingFinished = {
97
+ requestId: string;
98
+ timestamp?: number;
99
+ wallTime?: number;
100
+ };
101
+ export type StoredResponse = {
102
+ requestId: string;
103
+ response: ResponseReceived["response"];
104
+ };
105
+ export type RequestInfo = {
106
+ url: string;
107
+ frameId?: string;
108
+ resourceType?: ResourceType;
109
+ initiator?: {
110
+ type?: string;
111
+ url?: string;
112
+ };
113
+ };
@@ -0,0 +1 @@
1
+ export {};
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "@pagepocket/capture-http-cdp-unit",
3
+ "version": "0.8.0",
4
+ "description": "PagePocket plugin: capture HTTP events (CDP)",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "files": [
9
+ "dist"
10
+ ],
11
+ "license": "ISC",
12
+ "dependencies": {
13
+ "@pagepocket/lib": "0.8.0",
14
+ "@pagepocket/contracts": "0.8.0"
15
+ },
16
+ "devDependencies": {
17
+ "typescript": "^5.4.5"
18
+ },
19
+ "scripts": {
20
+ "build": "tsc -p tsconfig.json",
21
+ "test": "node -e \"process.exit(0)\""
22
+ }
23
+ }