@pagepocket/capture-http-cdp-unit 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capture-http-cdp-plugin.d.ts +9 -0
- package/dist/capture-http-cdp-plugin.js +132 -0
- package/dist/capture-http-cdp-unit.d.ts +13 -0
- package/dist/capture-http-cdp-unit.js +123 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -0
- package/dist/internal/cdp-adapter.d.ts +29 -0
- package/dist/internal/cdp-adapter.js +40 -0
- package/dist/internal/cdp-utils.d.ts +11 -0
- package/dist/internal/cdp-utils.js +112 -0
- package/dist/internal/chrome-debugger-client.d.ts +2 -0
- package/dist/internal/chrome-debugger-client.js +89 -0
- package/dist/internal/page-actions.d.ts +6 -0
- package/dist/internal/page-actions.js +58 -0
- package/dist/internal/session/html-milestone.d.ts +16 -0
- package/dist/internal/session/html-milestone.js +29 -0
- package/dist/internal/session/request-state.d.ts +27 -0
- package/dist/internal/session/request-state.js +110 -0
- package/dist/internal/session/time.d.ts +6 -0
- package/dist/internal/session/time.js +24 -0
- package/dist/internal/session.d.ts +10 -0
- package/dist/internal/session.js +191 -0
- package/dist/internal/types.d.ts +113 -0
- package/dist/internal/types.js +1 -0
- package/package.json +23 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { type PagePocketContext, type PagePocketPlugin } from "@pagepocket/lib";
|
|
2
|
+
import { type CdpAdapterOptions } from "./internal/cdp-adapter.js";
|
|
3
|
+
export type CaptureHttpCdpPluginOptions = CdpAdapterOptions;
|
|
4
|
+
export declare class CaptureHttpCdpPlugin implements PagePocketPlugin {
|
|
5
|
+
readonly name = "plugin:capture-http-cdp";
|
|
6
|
+
private adapterOptions;
|
|
7
|
+
constructor(options?: CaptureHttpCdpPluginOptions);
|
|
8
|
+
apply(ctx: PagePocketContext): void;
|
|
9
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { createMemoryContentStore } from "@pagepocket/lib";
|
|
2
|
+
import { InflightTracker, networkIdle, normalizeCompletion, timeout } from "@pagepocket/lib";
|
|
3
|
+
import { CdpAdapter } from "./internal/cdp-adapter.js";
|
|
4
|
+
const headersRecordToList = (headers) => {
|
|
5
|
+
if (!headers)
|
|
6
|
+
return [];
|
|
7
|
+
return Object.keys(headers).map((name) => ({ name, value: headers[name] }));
|
|
8
|
+
};
|
|
9
|
+
export class CaptureHttpCdpPlugin {
|
|
10
|
+
constructor(options) {
|
|
11
|
+
this.name = "plugin:capture-http-cdp";
|
|
12
|
+
this.adapterOptions = options ?? {};
|
|
13
|
+
}
|
|
14
|
+
apply(ctx) {
|
|
15
|
+
const contentStore = createMemoryContentStore("capture-http-cdp");
|
|
16
|
+
const events = [];
|
|
17
|
+
const capabilities = {
|
|
18
|
+
requestHeaders: "approx",
|
|
19
|
+
responseHeaders: "approx",
|
|
20
|
+
requestBodies: false,
|
|
21
|
+
responseBodies: "decoded",
|
|
22
|
+
httpVersion: false,
|
|
23
|
+
remoteIp: false,
|
|
24
|
+
headerOrderPreserved: false
|
|
25
|
+
};
|
|
26
|
+
ctx.capture = {
|
|
27
|
+
events,
|
|
28
|
+
contentStore,
|
|
29
|
+
capabilities
|
|
30
|
+
};
|
|
31
|
+
const inflightTracker = new InflightTracker();
|
|
32
|
+
const handleNetworkEvent = async (event) => {
|
|
33
|
+
inflightTracker.handleEvent(event);
|
|
34
|
+
ctx.emitNetworkEvent?.(event);
|
|
35
|
+
if (event.type === "request") {
|
|
36
|
+
events.push({
|
|
37
|
+
type: "http.request",
|
|
38
|
+
requestId: event.requestId,
|
|
39
|
+
url: event.url,
|
|
40
|
+
method: event.method,
|
|
41
|
+
headers: headersRecordToList(event.headers),
|
|
42
|
+
timestamp: event.timestamp,
|
|
43
|
+
frameId: event.frameId,
|
|
44
|
+
resourceType: event.resourceType,
|
|
45
|
+
initiator: event.initiator
|
|
46
|
+
});
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
if (event.type === "failed") {
|
|
50
|
+
events.push({
|
|
51
|
+
type: "http.failed",
|
|
52
|
+
requestId: event.requestId,
|
|
53
|
+
url: event.url,
|
|
54
|
+
errorText: event.errorText,
|
|
55
|
+
timestamp: event.timestamp
|
|
56
|
+
});
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
const bodyRef = event.body
|
|
60
|
+
? await contentStore.put(event.body, {
|
|
61
|
+
url: event.url,
|
|
62
|
+
mimeType: event.mimeType,
|
|
63
|
+
sizeHint: undefined
|
|
64
|
+
})
|
|
65
|
+
: undefined;
|
|
66
|
+
events.push({
|
|
67
|
+
type: "http.response",
|
|
68
|
+
requestId: event.requestId,
|
|
69
|
+
url: event.url,
|
|
70
|
+
status: event.status,
|
|
71
|
+
statusText: event.statusText,
|
|
72
|
+
headers: headersRecordToList(event.headers),
|
|
73
|
+
timestamp: event.timestamp,
|
|
74
|
+
mimeType: event.mimeType,
|
|
75
|
+
fromDiskCache: event.fromDiskCache,
|
|
76
|
+
fromServiceWorker: event.fromServiceWorker,
|
|
77
|
+
bodyRef,
|
|
78
|
+
bodySize: undefined
|
|
79
|
+
});
|
|
80
|
+
};
|
|
81
|
+
const stateKey = "captureHttpCdp.session";
|
|
82
|
+
ctx.onInit(async () => {
|
|
83
|
+
if (ctx.entry.kind !== "cdp-tab") {
|
|
84
|
+
throw new Error(`CaptureHttpCdpPlugin requires entry kind "cdp-tab" (got ${String(ctx.entry.kind)})`);
|
|
85
|
+
}
|
|
86
|
+
const adapter = new CdpAdapter(this.adapterOptions);
|
|
87
|
+
const session = await adapter.start({ kind: "cdp-tab", tabId: ctx.entry.tabId }, {
|
|
88
|
+
onEvent(event) {
|
|
89
|
+
void handleNetworkEvent(event);
|
|
90
|
+
},
|
|
91
|
+
onError(error) {
|
|
92
|
+
// eslint-disable-next-line no-console
|
|
93
|
+
console.warn("[pagepocket][capture-http-cdp] adapter error", error);
|
|
94
|
+
}
|
|
95
|
+
}, {
|
|
96
|
+
timeoutMs: ctx.options.timeoutMs,
|
|
97
|
+
maxDurationMs: ctx.options.maxDurationMs
|
|
98
|
+
});
|
|
99
|
+
ctx.state[stateKey] = session;
|
|
100
|
+
if (!ctx.html) {
|
|
101
|
+
const html = await session.waitForHtml();
|
|
102
|
+
ctx.setHtml(html);
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
ctx.onBeforeNetwork(async () => {
|
|
106
|
+
const session = ctx.state[stateKey];
|
|
107
|
+
if (!session) {
|
|
108
|
+
throw new Error("CaptureHttpCdpPlugin internal error: missing session");
|
|
109
|
+
}
|
|
110
|
+
await session.startCapture();
|
|
111
|
+
const completionStrategies = normalizeCompletion(ctx.options.completion);
|
|
112
|
+
const idleMs = ctx.options.timeoutMs ?? 5000;
|
|
113
|
+
const maxDurationMs = ctx.options.maxDurationMs;
|
|
114
|
+
const completion = completionStrategies.length > 0
|
|
115
|
+
? completionStrategies
|
|
116
|
+
: [networkIdle(idleMs), ...(maxDurationMs !== undefined ? [timeout(maxDurationMs)] : [])];
|
|
117
|
+
if (completion.length === 1) {
|
|
118
|
+
await completion[0].wait({
|
|
119
|
+
now: () => Date.now(),
|
|
120
|
+
getStats: () => inflightTracker.getStats()
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
await Promise.race(completion.map((strategy) => strategy.wait({
|
|
125
|
+
now: () => Date.now(),
|
|
126
|
+
getStats: () => inflightTracker.getStats()
|
|
127
|
+
})));
|
|
128
|
+
}
|
|
129
|
+
await session.stop();
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Unit, type CaptureArtifacts } from "@pagepocket/lib";
|
|
2
|
+
import { type CdpAdapterOptions } from "./internal/cdp-adapter.js";
|
|
3
|
+
export type CaptureHttpCdpUnitOptions = CdpAdapterOptions;
|
|
4
|
+
export declare class CaptureHttpCdpUnit extends Unit {
|
|
5
|
+
readonly id = "captureHttpCdp";
|
|
6
|
+
readonly kind = "capture.http.cdp";
|
|
7
|
+
private adapterOptions;
|
|
8
|
+
constructor(options?: CaptureHttpCdpUnitOptions);
|
|
9
|
+
run(ctx: import("@pagepocket/lib").UnitContext, rt: import("@pagepocket/lib").UnitRuntime): Promise<{
|
|
10
|
+
capture: CaptureArtifacts;
|
|
11
|
+
html: {};
|
|
12
|
+
}>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { NETWORK } from "@pagepocket/contracts";
|
|
2
|
+
import { Unit, createMemoryContentStore, InflightTracker, mapKind, networkIdle, normalizeCompletion, throwRequiredEntryKind, timeout } from "@pagepocket/lib";
|
|
3
|
+
import { CdpAdapter } from "./internal/cdp-adapter.js";
|
|
4
|
+
const headersRecordToList = (headers) => {
|
|
5
|
+
if (!headers)
|
|
6
|
+
return [];
|
|
7
|
+
return Object.keys(headers).map((name) => ({ name, value: headers[name] }));
|
|
8
|
+
};
|
|
9
|
+
const targetBuilders = {
|
|
10
|
+
"cdp-tab": (entry) => ({
|
|
11
|
+
kind: "cdp-tab",
|
|
12
|
+
tabId: entry.tabId
|
|
13
|
+
})
|
|
14
|
+
};
|
|
15
|
+
export class CaptureHttpCdpUnit extends Unit {
|
|
16
|
+
constructor(options) {
|
|
17
|
+
super();
|
|
18
|
+
this.id = "captureHttpCdp";
|
|
19
|
+
this.kind = "capture.http.cdp";
|
|
20
|
+
this.adapterOptions = options ?? {};
|
|
21
|
+
}
|
|
22
|
+
async run(ctx, rt) {
|
|
23
|
+
const target = mapKind(rt.entry, targetBuilders, {
|
|
24
|
+
onUnsupportedKind: throwRequiredEntryKind("CaptureHttpCdpUnit", "cdp-tab")
|
|
25
|
+
});
|
|
26
|
+
const contentStore = createMemoryContentStore("capture-http-cdp");
|
|
27
|
+
const events = [];
|
|
28
|
+
const capabilities = {
|
|
29
|
+
requestHeaders: "approx",
|
|
30
|
+
responseHeaders: "approx",
|
|
31
|
+
requestBodies: false,
|
|
32
|
+
responseBodies: "decoded",
|
|
33
|
+
httpVersion: false,
|
|
34
|
+
remoteIp: false,
|
|
35
|
+
headerOrderPreserved: false
|
|
36
|
+
};
|
|
37
|
+
const inflightTracker = new InflightTracker();
|
|
38
|
+
const handleNetworkEvent = async (event) => {
|
|
39
|
+
inflightTracker.handleEvent(event);
|
|
40
|
+
rt.publish(NETWORK, event);
|
|
41
|
+
if (event.type === "request") {
|
|
42
|
+
events.push({
|
|
43
|
+
type: "http.request",
|
|
44
|
+
requestId: event.requestId,
|
|
45
|
+
url: event.url,
|
|
46
|
+
method: event.method,
|
|
47
|
+
headers: headersRecordToList(event.headers),
|
|
48
|
+
timestamp: event.timestamp,
|
|
49
|
+
frameId: event.frameId,
|
|
50
|
+
resourceType: event.resourceType,
|
|
51
|
+
initiator: event.initiator
|
|
52
|
+
});
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
if (event.type === "failed") {
|
|
56
|
+
events.push({
|
|
57
|
+
type: "http.failed",
|
|
58
|
+
requestId: event.requestId,
|
|
59
|
+
url: event.url,
|
|
60
|
+
errorText: event.errorText,
|
|
61
|
+
timestamp: event.timestamp
|
|
62
|
+
});
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
const bodyRef = event.body
|
|
66
|
+
? await contentStore.put(event.body, {
|
|
67
|
+
url: event.url,
|
|
68
|
+
mimeType: event.mimeType,
|
|
69
|
+
sizeHint: undefined
|
|
70
|
+
})
|
|
71
|
+
: undefined;
|
|
72
|
+
events.push({
|
|
73
|
+
type: "http.response",
|
|
74
|
+
requestId: event.requestId,
|
|
75
|
+
url: event.url,
|
|
76
|
+
status: event.status,
|
|
77
|
+
statusText: event.statusText,
|
|
78
|
+
headers: headersRecordToList(event.headers),
|
|
79
|
+
timestamp: event.timestamp,
|
|
80
|
+
mimeType: event.mimeType,
|
|
81
|
+
fromDiskCache: event.fromDiskCache,
|
|
82
|
+
fromServiceWorker: event.fromServiceWorker,
|
|
83
|
+
bodyRef,
|
|
84
|
+
bodySize: undefined
|
|
85
|
+
});
|
|
86
|
+
};
|
|
87
|
+
const capture = { events, contentStore, capabilities };
|
|
88
|
+
const adapter = new CdpAdapter(this.adapterOptions);
|
|
89
|
+
const session = await adapter.start(target, {
|
|
90
|
+
onEvent: (event) => {
|
|
91
|
+
void handleNetworkEvent(event);
|
|
92
|
+
},
|
|
93
|
+
onError: (error) => {
|
|
94
|
+
console.warn("[pagepocket][capture-http-cdp] adapter error", error);
|
|
95
|
+
}
|
|
96
|
+
}, {
|
|
97
|
+
timeoutMs: rt.options.timeoutMs,
|
|
98
|
+
maxDurationMs: rt.options.maxDurationMs
|
|
99
|
+
});
|
|
100
|
+
const html = ctx.value.html ?? (await session.waitForHtml());
|
|
101
|
+
await session.startCapture();
|
|
102
|
+
const completionStrategies = normalizeCompletion(rt.options.completion);
|
|
103
|
+
const idleMs = rt.options.timeoutMs ?? 5000;
|
|
104
|
+
const maxDurationMs = rt.options.maxDurationMs;
|
|
105
|
+
const completion = completionStrategies.length > 0
|
|
106
|
+
? completionStrategies
|
|
107
|
+
: [networkIdle(idleMs), ...(maxDurationMs !== undefined ? [timeout(maxDurationMs)] : [])];
|
|
108
|
+
if (completion.length === 1) {
|
|
109
|
+
await completion[0].wait({
|
|
110
|
+
now: () => Date.now(),
|
|
111
|
+
getStats: () => inflightTracker.getStats()
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
await Promise.race(completion.map((strategy) => strategy.wait({
|
|
116
|
+
now: () => Date.now(),
|
|
117
|
+
getStats: () => inflightTracker.getStats()
|
|
118
|
+
})));
|
|
119
|
+
}
|
|
120
|
+
await session.stop();
|
|
121
|
+
return { capture, html };
|
|
122
|
+
}
|
|
123
|
+
}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { CaptureHttpCdpUnit } from "./capture-http-cdp-unit.js";
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { InterceptSession, InterceptTarget, NetworkEventHandlers, NetworkInterceptorAdapter, TriggerAction } from "@pagepocket/lib";
|
|
2
|
+
import type { CdpClient } from "./types.js";
|
|
3
|
+
export type CdpAdapterOptions = {
|
|
4
|
+
protocolVersion?: string;
|
|
5
|
+
clientFactory?: (options?: {
|
|
6
|
+
tabId: number;
|
|
7
|
+
protocolVersion: string;
|
|
8
|
+
}) => Promise<CdpClient> | CdpClient;
|
|
9
|
+
triggerActions?: TriggerAction[];
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Internal adapter for the capture plugin.
|
|
13
|
+
*
|
|
14
|
+
* Note: this is intentionally NOT published as a separate package.
|
|
15
|
+
*/
|
|
16
|
+
export declare class CdpAdapter implements NetworkInterceptorAdapter {
|
|
17
|
+
readonly name = "cdp";
|
|
18
|
+
readonly capabilities: {
|
|
19
|
+
canGetResponseBody: boolean;
|
|
20
|
+
canStreamResponseBody: boolean;
|
|
21
|
+
canGetRequestBody: boolean;
|
|
22
|
+
providesResourceType: boolean;
|
|
23
|
+
canWaitForHtml: boolean;
|
|
24
|
+
supportsStagedCapture: boolean;
|
|
25
|
+
};
|
|
26
|
+
private options;
|
|
27
|
+
constructor(options?: CdpAdapterOptions);
|
|
28
|
+
start(target: InterceptTarget, handlers: NetworkEventHandlers, _options?: Record<string, unknown>): Promise<InterceptSession>;
|
|
29
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { createChromeDebuggerClient } from "./chrome-debugger-client.js";
|
|
2
|
+
import { createCdpSession } from "./session.js";
|
|
3
|
+
/**
|
|
4
|
+
* Internal adapter for the capture plugin.
|
|
5
|
+
*
|
|
6
|
+
* Note: this is intentionally NOT published as a separate package.
|
|
7
|
+
*/
|
|
8
|
+
export class CdpAdapter {
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.name = "cdp";
|
|
11
|
+
this.capabilities = {
|
|
12
|
+
canGetResponseBody: true,
|
|
13
|
+
canStreamResponseBody: false,
|
|
14
|
+
canGetRequestBody: false,
|
|
15
|
+
providesResourceType: true,
|
|
16
|
+
canWaitForHtml: true,
|
|
17
|
+
supportsStagedCapture: true
|
|
18
|
+
};
|
|
19
|
+
this.options = options;
|
|
20
|
+
}
|
|
21
|
+
async start(target, handlers, _options) {
|
|
22
|
+
if (target.kind !== "cdp-tab") {
|
|
23
|
+
throw new Error("CdpAdapter only supports cdp-tab targets.");
|
|
24
|
+
}
|
|
25
|
+
const protocolVersion = this.options.protocolVersion ?? "1.3";
|
|
26
|
+
const clientFactory = this.options.clientFactory;
|
|
27
|
+
const client = (await clientFactory?.({ tabId: target.tabId, protocolVersion })) ??
|
|
28
|
+
createChromeDebuggerClient({ tabId: target.tabId }, protocolVersion);
|
|
29
|
+
const ownsClient = true;
|
|
30
|
+
const base = await createCdpSession({
|
|
31
|
+
client,
|
|
32
|
+
handlers,
|
|
33
|
+
ownsClient,
|
|
34
|
+
triggerActions: this.options.triggerActions
|
|
35
|
+
});
|
|
36
|
+
// `cdp-tab` sessions already support `waitForHtml()` and `startCapture()`.
|
|
37
|
+
// This wrapper exists to keep this adapter entry point contained.
|
|
38
|
+
return base;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ResourceType } from "@pagepocket/lib";
|
|
2
|
+
import type { CdpClient } from "./types.js";
|
|
3
|
+
export declare const decodeBase64: (input: string) => Uint8Array<ArrayBuffer>;
|
|
4
|
+
export declare const encodeUtf8: (input: string) => Uint8Array<ArrayBuffer>;
|
|
5
|
+
export declare const normalizeHeaders: (headers?: Record<string, unknown>) => Record<string, string>;
|
|
6
|
+
export declare const mapResourceType: (input?: string) => ResourceType | undefined;
|
|
7
|
+
export declare const inferResourceTypeFromMime: (mimeType?: string) => ResourceType | undefined;
|
|
8
|
+
export declare const callCdp: <T>(client: CdpClient, method: string, params?: Record<string, unknown>) => Promise<T>;
|
|
9
|
+
export declare const subscribe: (client: CdpClient, eventName: string, handler: (payload: unknown) => void) => () => void | undefined;
|
|
10
|
+
export declare const isNoBodyError: (error: unknown) => boolean;
|
|
11
|
+
export declare const logInfo: (label: string, data: Record<string, unknown>) => void;
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
export const decodeBase64 = (input) => {
|
|
2
|
+
const bufferCtor = globalThis.Buffer;
|
|
3
|
+
if (bufferCtor) {
|
|
4
|
+
return new Uint8Array(bufferCtor.from(input, "base64"));
|
|
5
|
+
}
|
|
6
|
+
const binary = atob(input);
|
|
7
|
+
const bytes = new Uint8Array(binary.length);
|
|
8
|
+
for (let i = 0; i < binary.length; i += 1) {
|
|
9
|
+
bytes[i] = binary.charCodeAt(i);
|
|
10
|
+
}
|
|
11
|
+
return bytes;
|
|
12
|
+
};
|
|
13
|
+
export const encodeUtf8 = (input) => new TextEncoder().encode(input);
|
|
14
|
+
export const normalizeHeaders = (headers) => {
|
|
15
|
+
const output = {};
|
|
16
|
+
if (!headers)
|
|
17
|
+
return output;
|
|
18
|
+
for (const key of Object.keys(headers)) {
|
|
19
|
+
const value = headers[key];
|
|
20
|
+
if (value === undefined || value === null)
|
|
21
|
+
continue;
|
|
22
|
+
output[key] = Array.isArray(value) ? value.join(", ") : String(value);
|
|
23
|
+
}
|
|
24
|
+
return output;
|
|
25
|
+
};
|
|
26
|
+
export const mapResourceType = (input) => {
|
|
27
|
+
if (!input)
|
|
28
|
+
return undefined;
|
|
29
|
+
const normalized = input.toLowerCase();
|
|
30
|
+
switch (normalized) {
|
|
31
|
+
case "document":
|
|
32
|
+
return "document";
|
|
33
|
+
case "stylesheet":
|
|
34
|
+
return "stylesheet";
|
|
35
|
+
case "script":
|
|
36
|
+
return "script";
|
|
37
|
+
case "image":
|
|
38
|
+
return "image";
|
|
39
|
+
case "font":
|
|
40
|
+
return "font";
|
|
41
|
+
case "media":
|
|
42
|
+
return "media";
|
|
43
|
+
case "xhr":
|
|
44
|
+
return "xhr";
|
|
45
|
+
case "fetch":
|
|
46
|
+
return "fetch";
|
|
47
|
+
case "other":
|
|
48
|
+
return "other";
|
|
49
|
+
default:
|
|
50
|
+
return normalized;
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
export const inferResourceTypeFromMime = (mimeType) => {
|
|
54
|
+
if (!mimeType)
|
|
55
|
+
return undefined;
|
|
56
|
+
const normalized = mimeType.toLowerCase();
|
|
57
|
+
if (normalized.includes("text/html") || normalized.includes("application/xhtml+xml")) {
|
|
58
|
+
return "document";
|
|
59
|
+
}
|
|
60
|
+
if (normalized.includes("text/css")) {
|
|
61
|
+
return "stylesheet";
|
|
62
|
+
}
|
|
63
|
+
if (normalized.includes("javascript") || normalized.includes("ecmascript")) {
|
|
64
|
+
return "script";
|
|
65
|
+
}
|
|
66
|
+
if (normalized.startsWith("image/")) {
|
|
67
|
+
return "image";
|
|
68
|
+
}
|
|
69
|
+
if (normalized.startsWith("font/") ||
|
|
70
|
+
normalized.includes("woff") ||
|
|
71
|
+
normalized.includes("ttf") ||
|
|
72
|
+
normalized.includes("otf")) {
|
|
73
|
+
return "font";
|
|
74
|
+
}
|
|
75
|
+
if (normalized.startsWith("audio/") || normalized.startsWith("video/")) {
|
|
76
|
+
return "media";
|
|
77
|
+
}
|
|
78
|
+
return undefined;
|
|
79
|
+
};
|
|
80
|
+
export const callCdp = async (client, method, params) => {
|
|
81
|
+
if (typeof client.send === "function") {
|
|
82
|
+
return (await client.send(method, params));
|
|
83
|
+
}
|
|
84
|
+
const [domain, command] = method.split(".");
|
|
85
|
+
const domainApi = client[domain];
|
|
86
|
+
const fn = domainApi?.[command];
|
|
87
|
+
if (typeof fn === "function") {
|
|
88
|
+
return (await fn(params));
|
|
89
|
+
}
|
|
90
|
+
throw new Error(`CDP session missing method ${method}.`);
|
|
91
|
+
};
|
|
92
|
+
export const subscribe = (client, eventName, handler) => {
|
|
93
|
+
if (typeof client.on === "function") {
|
|
94
|
+
client.on(eventName, handler);
|
|
95
|
+
return () => client.off?.(eventName, handler);
|
|
96
|
+
}
|
|
97
|
+
const [domain, event] = eventName.split(".");
|
|
98
|
+
const domainApi = client[domain];
|
|
99
|
+
const fn = domainApi?.[event];
|
|
100
|
+
if (typeof fn === "function") {
|
|
101
|
+
fn(handler);
|
|
102
|
+
return () => undefined;
|
|
103
|
+
}
|
|
104
|
+
throw new Error(`CDP session missing event ${eventName}.`);
|
|
105
|
+
};
|
|
106
|
+
export const isNoBodyError = (error) => {
|
|
107
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
108
|
+
return message.includes("No data found for resource with given identifier");
|
|
109
|
+
};
|
|
110
|
+
export const logInfo = (label, data) => {
|
|
111
|
+
console.info(`[pagepocket][cdp-adapter] ${label} ${JSON.stringify(data)}`);
|
|
112
|
+
};
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
const getChromeGlobal = () => globalThis.chrome;
|
|
2
|
+
export const createChromeDebuggerClient = (target, protocolVersion) => {
|
|
3
|
+
const chromeGlobal = getChromeGlobal();
|
|
4
|
+
const chromeDebugger = chromeGlobal?.debugger;
|
|
5
|
+
if (!chromeDebugger) {
|
|
6
|
+
throw new Error("chrome.debugger API is not available in this environment.");
|
|
7
|
+
}
|
|
8
|
+
const chromeRuntime = chromeGlobal?.runtime;
|
|
9
|
+
let attached = false;
|
|
10
|
+
let closed = false;
|
|
11
|
+
const listeners = new Map();
|
|
12
|
+
const assertNoLastError = (action) => {
|
|
13
|
+
const lastError = chromeRuntime?.lastError;
|
|
14
|
+
if (lastError?.message) {
|
|
15
|
+
throw new Error(`${action} failed: ${lastError.message}`);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
const attach = () => new Promise((resolve, reject) => {
|
|
19
|
+
chromeDebugger.attach(target, protocolVersion, () => {
|
|
20
|
+
try {
|
|
21
|
+
assertNoLastError("chrome.debugger.attach");
|
|
22
|
+
attached = true;
|
|
23
|
+
resolve();
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
reject(error);
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
const detach = () => new Promise((resolve, reject) => {
|
|
31
|
+
chromeDebugger.detach(target, () => {
|
|
32
|
+
try {
|
|
33
|
+
assertNoLastError("chrome.debugger.detach");
|
|
34
|
+
attached = false;
|
|
35
|
+
resolve();
|
|
36
|
+
}
|
|
37
|
+
catch (error) {
|
|
38
|
+
reject(error);
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
const ensureAttached = async () => {
|
|
43
|
+
if (attached)
|
|
44
|
+
return;
|
|
45
|
+
await attach();
|
|
46
|
+
};
|
|
47
|
+
return {
|
|
48
|
+
send: async (method, params) => {
|
|
49
|
+
await ensureAttached();
|
|
50
|
+
return new Promise((resolve, reject) => {
|
|
51
|
+
chromeDebugger.sendCommand(target, method, params ?? {}, (result) => {
|
|
52
|
+
try {
|
|
53
|
+
assertNoLastError(`chrome.debugger.sendCommand(${method})`);
|
|
54
|
+
resolve(result);
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
reject(error);
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
},
|
|
62
|
+
on: (event, listener) => {
|
|
63
|
+
const handler = (source, method, params) => {
|
|
64
|
+
if (source.tabId !== target.tabId)
|
|
65
|
+
return;
|
|
66
|
+
if (method !== event)
|
|
67
|
+
return;
|
|
68
|
+
listener(params);
|
|
69
|
+
};
|
|
70
|
+
listeners.set(listener, handler);
|
|
71
|
+
chromeDebugger.onEvent.addListener(handler);
|
|
72
|
+
},
|
|
73
|
+
off: (_event, listener) => {
|
|
74
|
+
const handler = listeners.get(listener);
|
|
75
|
+
if (!handler)
|
|
76
|
+
return;
|
|
77
|
+
listeners.delete(listener);
|
|
78
|
+
chromeDebugger.onEvent.removeListener(handler);
|
|
79
|
+
},
|
|
80
|
+
close: async () => {
|
|
81
|
+
if (closed)
|
|
82
|
+
return;
|
|
83
|
+
closed = true;
|
|
84
|
+
if (!attached)
|
|
85
|
+
return;
|
|
86
|
+
await detach();
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
};
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { type NetworkEventHandlers, type TriggerAction } from "@pagepocket/lib";
|
|
2
|
+
import type { CdpClient } from "./types.js";
|
|
3
|
+
export declare const createPageActions: (client: CdpClient, handlers: NetworkEventHandlers) => {
|
|
4
|
+
ensurePageEnabled: () => Promise<void>;
|
|
5
|
+
runTriggerActions: (actions?: TriggerAction[]) => Promise<void>;
|
|
6
|
+
};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { TriggerActionValues } from "@pagepocket/lib";
|
|
2
|
+
import { callCdp } from "./cdp-utils.js";
|
|
3
|
+
export const createPageActions = (client, handlers) => {
|
|
4
|
+
const ensurePageEnabled = async () => {
|
|
5
|
+
try {
|
|
6
|
+
await callCdp(client, "Page.enable");
|
|
7
|
+
}
|
|
8
|
+
catch {
|
|
9
|
+
// Page domain may be unavailable; ignore.
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
const runTriggerActions = async (actions = []) => {
|
|
13
|
+
if (actions.length === 0) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
await ensurePageEnabled();
|
|
17
|
+
for (const action of actions) {
|
|
18
|
+
if (action === TriggerActionValues.HOVER) {
|
|
19
|
+
try {
|
|
20
|
+
await callCdp(client, "Runtime.evaluate", {
|
|
21
|
+
expression: `(() => {
|
|
22
|
+
const elements = Array.from(document.querySelectorAll('*'));
|
|
23
|
+
for (const el of elements) {
|
|
24
|
+
const rect = el.getBoundingClientRect();
|
|
25
|
+
const x = rect.left + rect.width / 2;
|
|
26
|
+
const y = rect.top + rect.height / 2;
|
|
27
|
+
const ev = new MouseEvent('mouseover', { bubbles: true, cancelable: true, clientX: x, clientY: y });
|
|
28
|
+
el.dispatchEvent(ev);
|
|
29
|
+
}
|
|
30
|
+
})();`
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
handlers.onError?.(error);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (action === TriggerActionValues.SCROLL_TO_END) {
|
|
38
|
+
try {
|
|
39
|
+
await callCdp(client, "Runtime.evaluate", {
|
|
40
|
+
expression: `(() => {
|
|
41
|
+
const scrollHeight = document.documentElement?.scrollHeight ?? document.body?.scrollHeight;
|
|
42
|
+
if (typeof scrollHeight === 'number') {
|
|
43
|
+
window.scrollTo({ top: scrollHeight, behavior: 'instant' });
|
|
44
|
+
}
|
|
45
|
+
})();`
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
handlers.onError?.(error);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
return {
|
|
55
|
+
ensurePageEnabled,
|
|
56
|
+
runTriggerActions
|
|
57
|
+
};
|
|
58
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
type HtmlArtifact = {
|
|
2
|
+
htmlString: string;
|
|
3
|
+
baseUrl: string;
|
|
4
|
+
url?: string;
|
|
5
|
+
contentType?: string;
|
|
6
|
+
};
|
|
7
|
+
export declare const createHtmlMilestone: () => {
|
|
8
|
+
htmlPromise: Promise<never>;
|
|
9
|
+
tryResolveHtml: (input: {
|
|
10
|
+
url: string;
|
|
11
|
+
contentType?: string;
|
|
12
|
+
bodyBytes: Uint8Array | null;
|
|
13
|
+
}) => void;
|
|
14
|
+
getHtmlArtifact: () => HtmlArtifact | null;
|
|
15
|
+
};
|
|
16
|
+
export {};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export const createHtmlMilestone = () => {
|
|
2
|
+
let htmlArtifact = null;
|
|
3
|
+
let resolveHtml = null;
|
|
4
|
+
const htmlPromise = new Promise((resolve, _reject) => {
|
|
5
|
+
resolveHtml = resolve;
|
|
6
|
+
});
|
|
7
|
+
const tryResolveHtml = (input) => {
|
|
8
|
+
if (htmlArtifact) {
|
|
9
|
+
return;
|
|
10
|
+
}
|
|
11
|
+
if (!input.bodyBytes || input.bodyBytes.byteLength === 0) {
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
const htmlString = new TextDecoder().decode(input.bodyBytes);
|
|
15
|
+
htmlArtifact = {
|
|
16
|
+
htmlString,
|
|
17
|
+
baseUrl: input.url,
|
|
18
|
+
url: input.url,
|
|
19
|
+
contentType: input.contentType
|
|
20
|
+
};
|
|
21
|
+
resolveHtml?.(htmlArtifact);
|
|
22
|
+
resolveHtml = null;
|
|
23
|
+
};
|
|
24
|
+
return {
|
|
25
|
+
htmlPromise,
|
|
26
|
+
tryResolveHtml,
|
|
27
|
+
getHtmlArtifact: () => htmlArtifact
|
|
28
|
+
};
|
|
29
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { NetworkEventHandlers, NetworkRequestEvent } from "@pagepocket/lib";
|
|
2
|
+
import type { RequestInfo, RequestWillBeSent, ResponseReceived } from "../types.js";
|
|
3
|
+
export declare const createRequestState: (handlers: NetworkEventHandlers) => {
|
|
4
|
+
activeRequestId: Map<string, string>;
|
|
5
|
+
requestUrls: Map<string, string>;
|
|
6
|
+
requestInfo: Map<string, RequestInfo>;
|
|
7
|
+
requestEvents: Map<string, NetworkRequestEvent>;
|
|
8
|
+
getLogicalRequestId: (cdpRequestId: string) => string;
|
|
9
|
+
handleRequestWillBeSent: (payload: RequestWillBeSent, timestampMs: number) => {
|
|
10
|
+
cdpRequestId: string;
|
|
11
|
+
logicalRequestId: string;
|
|
12
|
+
};
|
|
13
|
+
handleResponseReceived: (payload: ResponseReceived, timestampMs: number) => {
|
|
14
|
+
cdpRequestId: string;
|
|
15
|
+
logicalRequestId: string;
|
|
16
|
+
response: {
|
|
17
|
+
url: string;
|
|
18
|
+
status: number;
|
|
19
|
+
statusText?: string;
|
|
20
|
+
headers?: Record<string, unknown>;
|
|
21
|
+
mimeType?: string;
|
|
22
|
+
fromDiskCache?: boolean;
|
|
23
|
+
fromServiceWorker?: boolean;
|
|
24
|
+
};
|
|
25
|
+
resolvedType: import("@pagepocket/lib").ResourceType | undefined;
|
|
26
|
+
};
|
|
27
|
+
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { inferResourceTypeFromMime, mapResourceType, normalizeHeaders } from "../cdp-utils.js";
|
|
2
|
+
export const createRequestState = (handlers) => {
|
|
3
|
+
const activeRequestId = new Map();
|
|
4
|
+
const requestSequence = new Map();
|
|
5
|
+
const requestUrls = new Map();
|
|
6
|
+
const requestInfo = new Map();
|
|
7
|
+
const requestEvents = new Map();
|
|
8
|
+
const getLogicalRequestId = (cdpRequestId) => activeRequestId.get(cdpRequestId) ?? `${cdpRequestId}:0`;
|
|
9
|
+
const handleRequestWillBeSent = (payload, timestampMs) => {
|
|
10
|
+
const cdpRequestId = payload.requestId;
|
|
11
|
+
if (payload.redirectResponse) {
|
|
12
|
+
const previousRequestId = getLogicalRequestId(cdpRequestId);
|
|
13
|
+
const redirectResponse = payload.redirectResponse;
|
|
14
|
+
const redirectEvent = {
|
|
15
|
+
type: "response",
|
|
16
|
+
requestId: previousRequestId,
|
|
17
|
+
url: redirectResponse.url,
|
|
18
|
+
status: redirectResponse.status,
|
|
19
|
+
statusText: redirectResponse.statusText ?? "",
|
|
20
|
+
headers: normalizeHeaders(redirectResponse.headers),
|
|
21
|
+
mimeType: redirectResponse.mimeType,
|
|
22
|
+
fromDiskCache: redirectResponse.fromDiskCache,
|
|
23
|
+
fromServiceWorker: redirectResponse.fromServiceWorker,
|
|
24
|
+
timestamp: timestampMs
|
|
25
|
+
};
|
|
26
|
+
handlers.onEvent(redirectEvent);
|
|
27
|
+
}
|
|
28
|
+
const sequence = (requestSequence.get(cdpRequestId) ?? -1) + 1;
|
|
29
|
+
requestSequence.set(cdpRequestId, sequence);
|
|
30
|
+
const logicalRequestId = `${cdpRequestId}:${sequence}`;
|
|
31
|
+
activeRequestId.set(cdpRequestId, logicalRequestId);
|
|
32
|
+
const url = payload.request.url;
|
|
33
|
+
requestUrls.set(logicalRequestId, url);
|
|
34
|
+
requestInfo.set(cdpRequestId, {
|
|
35
|
+
url,
|
|
36
|
+
frameId: payload.frameId,
|
|
37
|
+
resourceType: mapResourceType(payload.type),
|
|
38
|
+
initiator: payload.initiator
|
|
39
|
+
});
|
|
40
|
+
const requestEvent = {
|
|
41
|
+
type: "request",
|
|
42
|
+
requestId: logicalRequestId,
|
|
43
|
+
url,
|
|
44
|
+
method: payload.request.method || "GET",
|
|
45
|
+
headers: normalizeHeaders(payload.request.headers),
|
|
46
|
+
frameId: payload.frameId,
|
|
47
|
+
resourceType: mapResourceType(payload.type),
|
|
48
|
+
initiator: payload.initiator,
|
|
49
|
+
timestamp: timestampMs
|
|
50
|
+
};
|
|
51
|
+
requestEvents.set(cdpRequestId, requestEvent);
|
|
52
|
+
handlers.onEvent(requestEvent);
|
|
53
|
+
return { cdpRequestId, logicalRequestId };
|
|
54
|
+
};
|
|
55
|
+
const handleResponseReceived = (payload, timestampMs) => {
|
|
56
|
+
const cdpRequestId = payload.requestId;
|
|
57
|
+
const logicalRequestId = getLogicalRequestId(cdpRequestId);
|
|
58
|
+
const response = payload.response;
|
|
59
|
+
if (!requestUrls.has(logicalRequestId)) {
|
|
60
|
+
requestUrls.set(logicalRequestId, response.url);
|
|
61
|
+
}
|
|
62
|
+
const storedRequest = requestEvents.get(cdpRequestId);
|
|
63
|
+
const existingInfo = requestInfo.get(cdpRequestId);
|
|
64
|
+
const responseType = mapResourceType(payload.type);
|
|
65
|
+
const inferred = inferResourceTypeFromMime(response.mimeType);
|
|
66
|
+
const resolvedType = responseType ?? inferred;
|
|
67
|
+
if ((!existingInfo || !existingInfo.resourceType) && resolvedType) {
|
|
68
|
+
requestInfo.set(cdpRequestId, {
|
|
69
|
+
url: response.url,
|
|
70
|
+
frameId: existingInfo?.frameId ?? payload.frameId,
|
|
71
|
+
resourceType: resolvedType,
|
|
72
|
+
initiator: existingInfo?.initiator
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
if (!storedRequest && resolvedType) {
|
|
76
|
+
const synthesizedRequest = {
|
|
77
|
+
type: "request",
|
|
78
|
+
requestId: logicalRequestId,
|
|
79
|
+
url: response.url,
|
|
80
|
+
method: "GET",
|
|
81
|
+
headers: {},
|
|
82
|
+
frameId: payload.frameId,
|
|
83
|
+
resourceType: resolvedType,
|
|
84
|
+
initiator: undefined,
|
|
85
|
+
timestamp: timestampMs
|
|
86
|
+
};
|
|
87
|
+
requestEvents.set(cdpRequestId, synthesizedRequest);
|
|
88
|
+
handlers.onEvent(synthesizedRequest);
|
|
89
|
+
}
|
|
90
|
+
else if (storedRequest && resolvedType && !storedRequest.resourceType) {
|
|
91
|
+
const updatedRequest = {
|
|
92
|
+
...storedRequest,
|
|
93
|
+
resourceType: resolvedType,
|
|
94
|
+
timestamp: timestampMs
|
|
95
|
+
};
|
|
96
|
+
requestEvents.set(cdpRequestId, updatedRequest);
|
|
97
|
+
handlers.onEvent(updatedRequest);
|
|
98
|
+
}
|
|
99
|
+
return { cdpRequestId, logicalRequestId, response, resolvedType };
|
|
100
|
+
};
|
|
101
|
+
return {
|
|
102
|
+
activeRequestId,
|
|
103
|
+
requestUrls,
|
|
104
|
+
requestInfo,
|
|
105
|
+
requestEvents,
|
|
106
|
+
getLogicalRequestId,
|
|
107
|
+
handleRequestWillBeSent,
|
|
108
|
+
handleResponseReceived
|
|
109
|
+
};
|
|
110
|
+
};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export const createTimestampResolver = () => {
|
|
2
|
+
const requestTimeOffsets = new Map();
|
|
3
|
+
let globalTimeOffset = null;
|
|
4
|
+
const resolveTimestampMs = (payload, requestId) => {
|
|
5
|
+
if (typeof payload.wallTime === "number") {
|
|
6
|
+
if (typeof payload.timestamp === "number") {
|
|
7
|
+
const offset = payload.wallTime - payload.timestamp;
|
|
8
|
+
if (requestId) {
|
|
9
|
+
requestTimeOffsets.set(requestId, offset);
|
|
10
|
+
}
|
|
11
|
+
globalTimeOffset = offset;
|
|
12
|
+
}
|
|
13
|
+
return payload.wallTime * 1000;
|
|
14
|
+
}
|
|
15
|
+
if (typeof payload.timestamp === "number") {
|
|
16
|
+
const offset = (requestId ? requestTimeOffsets.get(requestId) : undefined) ?? globalTimeOffset;
|
|
17
|
+
if (typeof offset === "number") {
|
|
18
|
+
return (payload.timestamp + offset) * 1000;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return Date.now();
|
|
22
|
+
};
|
|
23
|
+
return { resolveTimestampMs };
|
|
24
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { InterceptSession, NetworkEventHandlers, TriggerAction } from "@pagepocket/lib";
|
|
2
|
+
import type { CdpClient } from "./types.js";
|
|
3
|
+
type CreateSessionOptions = {
|
|
4
|
+
client: CdpClient;
|
|
5
|
+
handlers: NetworkEventHandlers;
|
|
6
|
+
ownsClient: boolean;
|
|
7
|
+
triggerActions?: TriggerAction[];
|
|
8
|
+
};
|
|
9
|
+
export declare const createCdpSession: ({ client, handlers, ownsClient, triggerActions }: CreateSessionOptions) => Promise<InterceptSession>;
|
|
10
|
+
export {};
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { callCdp, decodeBase64, encodeUtf8, inferResourceTypeFromMime, isNoBodyError, logInfo, normalizeHeaders, subscribe } from "./cdp-utils.js";
|
|
2
|
+
import { createPageActions } from "./page-actions.js";
|
|
3
|
+
import { createHtmlMilestone } from "./session/html-milestone.js";
|
|
4
|
+
import { createRequestState } from "./session/request-state.js";
|
|
5
|
+
import { createTimestampResolver } from "./session/time.js";
|
|
6
|
+
export const createCdpSession = async ({ client, handlers, ownsClient, triggerActions }) => {
|
|
7
|
+
await callCdp(client, "Network.enable");
|
|
8
|
+
const { ensurePageEnabled, runTriggerActions } = createPageActions(client, handlers);
|
|
9
|
+
const { htmlPromise, tryResolveHtml } = createHtmlMilestone();
|
|
10
|
+
const requestState = createRequestState(handlers);
|
|
11
|
+
const responses = new Map();
|
|
12
|
+
const { resolveTimestampMs } = createTimestampResolver();
|
|
13
|
+
const handleRequestWillBeSent = (payload) => {
|
|
14
|
+
const cdpRequestId = payload.requestId;
|
|
15
|
+
const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
|
|
16
|
+
requestState.handleRequestWillBeSent(payload, eventTimestamp);
|
|
17
|
+
};
|
|
18
|
+
const handleResponseReceived = (payload) => {
|
|
19
|
+
const cdpRequestId = payload.requestId;
|
|
20
|
+
const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
|
|
21
|
+
const result = requestState.handleResponseReceived(payload, eventTimestamp);
|
|
22
|
+
logInfo("response received", {
|
|
23
|
+
requestId: cdpRequestId,
|
|
24
|
+
url: result.response.url,
|
|
25
|
+
status: result.response.status,
|
|
26
|
+
mimeType: result.response.mimeType,
|
|
27
|
+
fromDiskCache: result.response.fromDiskCache,
|
|
28
|
+
fromServiceWorker: result.response.fromServiceWorker
|
|
29
|
+
});
|
|
30
|
+
responses.set(cdpRequestId, {
|
|
31
|
+
requestId: result.logicalRequestId,
|
|
32
|
+
response: result.response
|
|
33
|
+
});
|
|
34
|
+
};
|
|
35
|
+
const tryGetResponseBody = async (cdpRequestId) => {
|
|
36
|
+
try {
|
|
37
|
+
const result = await callCdp(client, "Network.getResponseBody", { requestId: cdpRequestId });
|
|
38
|
+
if (result.base64Encoded) {
|
|
39
|
+
return decodeBase64(result.body);
|
|
40
|
+
}
|
|
41
|
+
return encodeUtf8(result.body);
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
if (isNoBodyError(error)) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
const tryGetPageResourceContent = async (info) => {
|
|
51
|
+
if (!info.frameId || !info.url) {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
const result = await callCdp(client, "Page.getResourceContent", {
|
|
56
|
+
frameId: info.frameId,
|
|
57
|
+
url: info.url
|
|
58
|
+
});
|
|
59
|
+
if (result.base64Encoded) {
|
|
60
|
+
return decodeBase64(result.content);
|
|
61
|
+
}
|
|
62
|
+
return encodeUtf8(result.content);
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
const handleLoadingFinished = async (payload) => {
|
|
69
|
+
const cdpRequestId = payload.requestId;
|
|
70
|
+
const eventTimestamp = resolveTimestampMs(payload, cdpRequestId);
|
|
71
|
+
const storedResponse = responses.get(cdpRequestId);
|
|
72
|
+
if (!storedResponse) {
|
|
73
|
+
logInfo("loadingFinished without response", {
|
|
74
|
+
requestId: cdpRequestId
|
|
75
|
+
});
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const inferred = inferResourceTypeFromMime(storedResponse.response.mimeType);
|
|
79
|
+
const infoFromMime = requestState.requestInfo.get(cdpRequestId);
|
|
80
|
+
if ((!infoFromMime || !infoFromMime.resourceType) && inferred) {
|
|
81
|
+
requestState.requestInfo.set(cdpRequestId, {
|
|
82
|
+
url: storedResponse.response.url,
|
|
83
|
+
frameId: infoFromMime?.frameId,
|
|
84
|
+
resourceType: inferred,
|
|
85
|
+
initiator: infoFromMime?.initiator
|
|
86
|
+
});
|
|
87
|
+
const storedRequest = requestState.requestEvents.get(cdpRequestId);
|
|
88
|
+
if (storedRequest && !storedRequest.resourceType) {
|
|
89
|
+
const updatedRequest = {
|
|
90
|
+
...storedRequest,
|
|
91
|
+
resourceType: inferred
|
|
92
|
+
};
|
|
93
|
+
requestState.requestEvents.set(cdpRequestId, updatedRequest);
|
|
94
|
+
handlers.onEvent(updatedRequest);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
let bodyBytes = null;
|
|
98
|
+
try {
|
|
99
|
+
bodyBytes = await tryGetResponseBody(cdpRequestId);
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
handlers.onError?.(error instanceof Error ? error : new Error(String(error)));
|
|
103
|
+
}
|
|
104
|
+
if (bodyBytes && bodyBytes.byteLength === 0) {
|
|
105
|
+
bodyBytes = null;
|
|
106
|
+
}
|
|
107
|
+
if (!bodyBytes) {
|
|
108
|
+
const fallbackInfo = requestState.requestInfo.get(cdpRequestId);
|
|
109
|
+
if (fallbackInfo?.frameId) {
|
|
110
|
+
bodyBytes = await tryGetPageResourceContent(fallbackInfo);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
logInfo("response body status", {
|
|
114
|
+
requestId: cdpRequestId,
|
|
115
|
+
url: storedResponse.response.url,
|
|
116
|
+
resourceType: requestState.requestInfo.get(cdpRequestId)?.resourceType,
|
|
117
|
+
bodyBytes: bodyBytes ? bodyBytes.byteLength : 0
|
|
118
|
+
});
|
|
119
|
+
const responseEvent = {
|
|
120
|
+
type: "response",
|
|
121
|
+
requestId: storedResponse.requestId,
|
|
122
|
+
url: storedResponse.response.url,
|
|
123
|
+
status: storedResponse.response.status,
|
|
124
|
+
statusText: storedResponse.response.statusText,
|
|
125
|
+
headers: normalizeHeaders(storedResponse.response.headers),
|
|
126
|
+
mimeType: storedResponse.response.mimeType,
|
|
127
|
+
fromDiskCache: storedResponse.response.fromDiskCache,
|
|
128
|
+
fromServiceWorker: storedResponse.response.fromServiceWorker,
|
|
129
|
+
timestamp: eventTimestamp,
|
|
130
|
+
body: bodyBytes ? { kind: "buffer", data: bodyBytes } : undefined
|
|
131
|
+
};
|
|
132
|
+
handlers.onEvent(responseEvent);
|
|
133
|
+
const infoForHtml = requestState.requestInfo.get(cdpRequestId);
|
|
134
|
+
if (infoForHtml?.resourceType === "document") {
|
|
135
|
+
tryResolveHtml({
|
|
136
|
+
url: storedResponse.response.url,
|
|
137
|
+
contentType: storedResponse.response.mimeType,
|
|
138
|
+
bodyBytes
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
const handleLoadingFailed = (payload) => {
|
|
143
|
+
const cdpRequestId = payload.requestId;
|
|
144
|
+
const logicalRequestId = requestState.getLogicalRequestId(cdpRequestId);
|
|
145
|
+
const url = requestState.requestUrls.get(logicalRequestId) ?? "";
|
|
146
|
+
const failedEvent = {
|
|
147
|
+
type: "failed",
|
|
148
|
+
requestId: logicalRequestId,
|
|
149
|
+
url,
|
|
150
|
+
errorText: payload.errorText,
|
|
151
|
+
timestamp: resolveTimestampMs(payload, cdpRequestId)
|
|
152
|
+
};
|
|
153
|
+
handlers.onEvent(failedEvent);
|
|
154
|
+
};
|
|
155
|
+
const cleanupHandlers = [];
|
|
156
|
+
try {
|
|
157
|
+
cleanupHandlers.push(subscribe(client, "Network.requestWillBeSent", (payload) => handleRequestWillBeSent(payload)));
|
|
158
|
+
cleanupHandlers.push(subscribe(client, "Network.responseReceived", (payload) => handleResponseReceived(payload)));
|
|
159
|
+
cleanupHandlers.push(subscribe(client, "Network.loadingFailed", (payload) => handleLoadingFailed(payload)));
|
|
160
|
+
cleanupHandlers.push(subscribe(client, "Network.loadingFinished", (payload) => {
|
|
161
|
+
void handleLoadingFinished(payload);
|
|
162
|
+
}));
|
|
163
|
+
}
|
|
164
|
+
catch (error) {
|
|
165
|
+
if (error instanceof Error) {
|
|
166
|
+
handlers.onError?.(error);
|
|
167
|
+
}
|
|
168
|
+
throw error;
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
navigate: async (url) => {
|
|
172
|
+
await ensurePageEnabled();
|
|
173
|
+
await callCdp(client, "Page.navigate", { url });
|
|
174
|
+
},
|
|
175
|
+
waitForHtml: async () => {
|
|
176
|
+
return htmlPromise;
|
|
177
|
+
},
|
|
178
|
+
startCapture: async () => {
|
|
179
|
+
await runTriggerActions(triggerActions);
|
|
180
|
+
},
|
|
181
|
+
stop: async () => {
|
|
182
|
+
for (const cleanup of cleanupHandlers) {
|
|
183
|
+
cleanup();
|
|
184
|
+
}
|
|
185
|
+
await callCdp(client, "Network.disable").catch(() => undefined);
|
|
186
|
+
if (ownsClient) {
|
|
187
|
+
await client.close?.();
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
};
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { ResourceType } from "@pagepocket/lib";
|
|
2
|
+
export type ChromeDebuggerTarget = {
|
|
3
|
+
tabId: number;
|
|
4
|
+
};
|
|
5
|
+
export type ChromeDebuggerEvent = (source: ChromeDebuggerTarget, method: string, params?: Record<string, unknown>) => void;
|
|
6
|
+
export type ChromeDebuggerApi = {
|
|
7
|
+
attach: (target: ChromeDebuggerTarget, version: string, callback: () => void) => void;
|
|
8
|
+
detach: (target: ChromeDebuggerTarget, callback: () => void) => void;
|
|
9
|
+
sendCommand: (target: ChromeDebuggerTarget, method: string, params: Record<string, unknown>, callback: (result?: unknown) => void) => void;
|
|
10
|
+
onEvent: {
|
|
11
|
+
addListener: (listener: ChromeDebuggerEvent) => void;
|
|
12
|
+
removeListener: (listener: ChromeDebuggerEvent) => void;
|
|
13
|
+
};
|
|
14
|
+
};
|
|
15
|
+
export type ChromeRuntimeApi = {
|
|
16
|
+
lastError?: {
|
|
17
|
+
message?: string;
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
export type ChromeGlobal = {
|
|
21
|
+
debugger?: ChromeDebuggerApi;
|
|
22
|
+
runtime?: ChromeRuntimeApi;
|
|
23
|
+
};
|
|
24
|
+
export type CdpClient = {
|
|
25
|
+
send?: (method: string, params?: Record<string, unknown>) => Promise<unknown>;
|
|
26
|
+
on?: (event: string, listener: (payload: unknown) => void) => void;
|
|
27
|
+
off?: (event: string, listener: (payload: unknown) => void) => void;
|
|
28
|
+
close?: () => Promise<void>;
|
|
29
|
+
Network?: {
|
|
30
|
+
enable?: (params?: Record<string, unknown>) => Promise<void>;
|
|
31
|
+
disable?: () => Promise<void>;
|
|
32
|
+
getResponseBody?: (params: {
|
|
33
|
+
requestId: string;
|
|
34
|
+
}) => Promise<{
|
|
35
|
+
body: string;
|
|
36
|
+
base64Encoded?: boolean;
|
|
37
|
+
}>;
|
|
38
|
+
requestWillBeSent?: (listener: (payload: unknown) => void) => void;
|
|
39
|
+
responseReceived?: (listener: (payload: unknown) => void) => void;
|
|
40
|
+
loadingFailed?: (listener: (payload: unknown) => void) => void;
|
|
41
|
+
loadingFinished?: (listener: (payload: unknown) => void) => void;
|
|
42
|
+
};
|
|
43
|
+
Page?: {
|
|
44
|
+
enable?: () => Promise<void>;
|
|
45
|
+
navigate?: (params: {
|
|
46
|
+
url: string;
|
|
47
|
+
}) => Promise<void>;
|
|
48
|
+
getResourceContent?: (params: {
|
|
49
|
+
frameId: string;
|
|
50
|
+
url: string;
|
|
51
|
+
}) => Promise<{
|
|
52
|
+
content: string;
|
|
53
|
+
base64Encoded?: boolean;
|
|
54
|
+
}>;
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
export type RequestWillBeSent = {
|
|
58
|
+
requestId: string;
|
|
59
|
+
frameId?: string;
|
|
60
|
+
timestamp?: number;
|
|
61
|
+
wallTime?: number;
|
|
62
|
+
type?: string;
|
|
63
|
+
initiator?: {
|
|
64
|
+
type?: string;
|
|
65
|
+
url?: string;
|
|
66
|
+
};
|
|
67
|
+
request: {
|
|
68
|
+
url: string;
|
|
69
|
+
method: string;
|
|
70
|
+
headers?: Record<string, unknown>;
|
|
71
|
+
};
|
|
72
|
+
redirectResponse?: ResponseReceived["response"];
|
|
73
|
+
};
|
|
74
|
+
export type ResponseReceived = {
|
|
75
|
+
requestId: string;
|
|
76
|
+
frameId?: string;
|
|
77
|
+
timestamp?: number;
|
|
78
|
+
wallTime?: number;
|
|
79
|
+
type?: string;
|
|
80
|
+
response: {
|
|
81
|
+
url: string;
|
|
82
|
+
status: number;
|
|
83
|
+
statusText?: string;
|
|
84
|
+
headers?: Record<string, unknown>;
|
|
85
|
+
mimeType?: string;
|
|
86
|
+
fromDiskCache?: boolean;
|
|
87
|
+
fromServiceWorker?: boolean;
|
|
88
|
+
};
|
|
89
|
+
};
|
|
90
|
+
export type LoadingFailed = {
|
|
91
|
+
requestId: string;
|
|
92
|
+
timestamp?: number;
|
|
93
|
+
wallTime?: number;
|
|
94
|
+
errorText: string;
|
|
95
|
+
};
|
|
96
|
+
export type LoadingFinished = {
|
|
97
|
+
requestId: string;
|
|
98
|
+
timestamp?: number;
|
|
99
|
+
wallTime?: number;
|
|
100
|
+
};
|
|
101
|
+
export type StoredResponse = {
|
|
102
|
+
requestId: string;
|
|
103
|
+
response: ResponseReceived["response"];
|
|
104
|
+
};
|
|
105
|
+
export type RequestInfo = {
|
|
106
|
+
url: string;
|
|
107
|
+
frameId?: string;
|
|
108
|
+
resourceType?: ResourceType;
|
|
109
|
+
initiator?: {
|
|
110
|
+
type?: string;
|
|
111
|
+
url?: string;
|
|
112
|
+
};
|
|
113
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pagepocket/capture-http-cdp-unit",
|
|
3
|
+
"version": "0.8.0",
|
|
4
|
+
"description": "PagePocket plugin: capture HTTP events (CDP)",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist"
|
|
10
|
+
],
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"@pagepocket/lib": "0.8.0",
|
|
14
|
+
"@pagepocket/contracts": "0.8.0"
|
|
15
|
+
},
|
|
16
|
+
"devDependencies": {
|
|
17
|
+
"typescript": "^5.4.5"
|
|
18
|
+
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "tsc -p tsconfig.json",
|
|
21
|
+
"test": "node -e \"process.exit(0)\""
|
|
22
|
+
}
|
|
23
|
+
}
|