libretto 0.6.16 → 0.6.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cli.js +32 -13
- package/dist/cli/commands/browser.js +2 -2
- package/dist/cli/commands/execution.js +1 -1
- package/dist/cli/commands/search.js +69 -0
- package/dist/cli/commands/update.js +122 -0
- package/dist/cli/core/context.js +4 -0
- package/dist/cli/core/daemon/daemon.js +3 -0
- package/dist/cli/core/experiments.js +14 -1
- package/dist/cli/core/providers/index.js +5 -1
- package/dist/cli/core/providers/steel.js +56 -0
- package/dist/cli/core/session-telemetry.js +143 -7
- package/dist/cli/core/skill-version.js +1 -0
- package/dist/cli/router.js +14 -3
- package/dist/shared/html-search/search-html.d.ts +9 -0
- package/dist/shared/html-search/search-html.js +46 -0
- package/dist/shared/html-search/search-html.spec.d.ts +2 -0
- package/dist/shared/html-search/search-html.spec.js +57 -0
- package/docs/releasing.md +3 -9
- package/package.json +2 -2
- package/scripts/generate-changelog.ts +207 -12
- package/skills/libretto/SKILL.md +22 -15
- package/skills/libretto/references/code-generation-rules.md +2 -2
- package/skills/libretto/references/configuration-file-reference.md +3 -2
- package/skills/libretto-readonly/SKILL.md +1 -1
- package/src/cli/cli.ts +38 -13
- package/src/cli/commands/browser.ts +2 -3
- package/src/cli/commands/execution.ts +1 -1
- package/src/cli/commands/search.ts +74 -0
- package/src/cli/commands/update.ts +149 -0
- package/src/cli/core/context.ts +4 -0
- package/src/cli/core/daemon/daemon.ts +3 -0
- package/src/cli/core/experiments.ts +15 -1
- package/src/cli/core/providers/index.ts +5 -1
- package/src/cli/core/providers/steel.ts +75 -0
- package/src/cli/core/session-telemetry.ts +176 -13
- package/src/cli/core/skill-version.ts +1 -1
- package/src/cli/core/telemetry.ts +19 -3
- package/src/cli/router.ts +13 -2
- package/src/shared/html-search/search-html.spec.ts +65 -0
- package/src/shared/html-search/search-html.ts +75 -0
|
@@ -810,7 +810,7 @@ export const runInput = SimpleCLI.input({
|
|
|
810
810
|
help: "Viewport size as WIDTHxHEIGHT (e.g. 1920x1080)",
|
|
811
811
|
}),
|
|
812
812
|
provider: SimpleCLI.option(z.string().optional(), {
|
|
813
|
-
help: "Browser provider (local, kernel, browserbase)",
|
|
813
|
+
help: "Browser provider (local, kernel, browserbase, steel)",
|
|
814
814
|
aliases: ["-p"],
|
|
815
815
|
}),
|
|
816
816
|
},
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { DaemonClient } from "../core/daemon/ipc.js";
|
|
3
|
+
import { resolveExperiments } from "../core/experiments.js";
|
|
4
|
+
import {
|
|
5
|
+
formatHtmlForSearch,
|
|
6
|
+
searchFormattedHtml,
|
|
7
|
+
} from "../../shared/html-search/search-html.js";
|
|
8
|
+
import { pageOption, sessionOption, withRequiredSession } from "./shared.js";
|
|
9
|
+
import { SimpleCLI } from "affordance";
|
|
10
|
+
|
|
11
|
+
export const searchInput = SimpleCLI.input({
|
|
12
|
+
positionals: [
|
|
13
|
+
SimpleCLI.positional("pattern", z.string().optional(), {
|
|
14
|
+
help: "JavaScript regex pattern to search for in the formatted HTML snapshot",
|
|
15
|
+
}),
|
|
16
|
+
],
|
|
17
|
+
named: {
|
|
18
|
+
session: sessionOption(),
|
|
19
|
+
page: pageOption(),
|
|
20
|
+
},
|
|
21
|
+
}).refine(
|
|
22
|
+
(input) => input.pattern !== undefined,
|
|
23
|
+
"Usage: libretto search <regex> --session <name> [--page <id>]",
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
export const searchCommand = SimpleCLI.command({
|
|
27
|
+
description: "Search the current page HTML snapshot",
|
|
28
|
+
})
|
|
29
|
+
.input(searchInput)
|
|
30
|
+
.use(withRequiredSession())
|
|
31
|
+
.handle(async ({ input, ctx }) => {
|
|
32
|
+
if (!resolveExperiments().search) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
[
|
|
35
|
+
'The "search" experiment is disabled.',
|
|
36
|
+
"Enable it with: libretto experiments enable search",
|
|
37
|
+
].join("\n"),
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (!ctx.sessionState.daemonSocketPath) {
|
|
42
|
+
throw new Error(
|
|
43
|
+
`Session "${ctx.session}" has no daemon socket. Close and reopen it with: libretto open <url> --session ${ctx.session}`,
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const client = await DaemonClient.connect(ctx.sessionState.daemonSocketPath);
|
|
48
|
+
try {
|
|
49
|
+
const response = await client.readonlyExec({
|
|
50
|
+
code: "return await page.content()",
|
|
51
|
+
pageId: input.page,
|
|
52
|
+
});
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
throw new Error(response.message);
|
|
55
|
+
}
|
|
56
|
+
if (typeof response.data.result !== "string") {
|
|
57
|
+
throw new Error("Expected page.content() to return an HTML string.");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const formattedHtml = formatHtmlForSearch(response.data.result);
|
|
61
|
+
const matches = searchFormattedHtml(formattedHtml, input.pattern!);
|
|
62
|
+
if (matches.length === 0) {
|
|
63
|
+
console.log(`No matches for /${input.pattern}/.`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
for (const [index, match] of matches.entries()) {
|
|
68
|
+
if (index > 0) console.log("--");
|
|
69
|
+
console.log(match.lines.join("\n"));
|
|
70
|
+
}
|
|
71
|
+
} finally {
|
|
72
|
+
client.destroy();
|
|
73
|
+
}
|
|
74
|
+
});
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import { SimpleCLI } from "affordance";
|
|
5
|
+
|
|
6
|
+
const UPDATE_COMMAND = "curl -fsSL https://libretto.sh/install.sh | bash";
|
|
7
|
+
|
|
8
|
+
type PackageManifest = {
|
|
9
|
+
version?: string;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
function readCurrentCliVersion(): string {
|
|
13
|
+
const packageJsonPath = fileURLToPath(
|
|
14
|
+
new URL("../../../package.json", import.meta.url),
|
|
15
|
+
);
|
|
16
|
+
const manifest = JSON.parse(
|
|
17
|
+
readFileSync(packageJsonPath, "utf8"),
|
|
18
|
+
) as PackageManifest;
|
|
19
|
+
|
|
20
|
+
if (!manifest.version) {
|
|
21
|
+
throw new Error(
|
|
22
|
+
`Unable to determine current libretto version from ${packageJsonPath}.`,
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return manifest.version;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function readLatestNpmVersion(): string {
|
|
30
|
+
const result = spawnSync("npm", ["view", "libretto@latest", "version"], {
|
|
31
|
+
encoding: "utf8",
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
if (result.error) {
|
|
35
|
+
throw new Error(
|
|
36
|
+
[
|
|
37
|
+
"Error: failed to check the latest Libretto version on npm.",
|
|
38
|
+
`Known state: ${result.error.message}`,
|
|
39
|
+
"Try: npm view libretto@latest version",
|
|
40
|
+
"Help: libretto help update",
|
|
41
|
+
].join("\n"),
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (result.status !== 0) {
|
|
46
|
+
const detail = result.stderr.trim();
|
|
47
|
+
throw new Error(
|
|
48
|
+
[
|
|
49
|
+
"Error: failed to check the latest Libretto version on npm.",
|
|
50
|
+
`Known state: npm exited with status ${result.status}.`,
|
|
51
|
+
...(detail ? [`npm stderr: ${detail}`] : []),
|
|
52
|
+
"Try: npm view libretto@latest version",
|
|
53
|
+
"Help: libretto help update",
|
|
54
|
+
].join("\n"),
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const version = result.stdout.trim();
|
|
59
|
+
if (!version) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
[
|
|
62
|
+
"Error: failed to check the latest Libretto version on npm.",
|
|
63
|
+
"Known state: npm did not print a version.",
|
|
64
|
+
"Try: npm view libretto@latest version",
|
|
65
|
+
"Help: libretto help update",
|
|
66
|
+
].join("\n"),
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return version;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export const updateInput = SimpleCLI.input({
|
|
74
|
+
positionals: [],
|
|
75
|
+
named: {
|
|
76
|
+
dryRun: SimpleCLI.flag({
|
|
77
|
+
name: "dry-run",
|
|
78
|
+
help: "Print the update command without running it",
|
|
79
|
+
}),
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
function formatUpdateFailure(
|
|
84
|
+
status: number | null,
|
|
85
|
+
signal: string | null,
|
|
86
|
+
): string {
|
|
87
|
+
const knownState =
|
|
88
|
+
status === null
|
|
89
|
+
? `installer was interrupted${signal ? ` by ${signal}` : ""}.`
|
|
90
|
+
: `installer exited with status ${status}.`;
|
|
91
|
+
|
|
92
|
+
return [
|
|
93
|
+
"Error: failed to update Libretto to the latest version.",
|
|
94
|
+
`Known state: ${knownState}`,
|
|
95
|
+
`Try: ${UPDATE_COMMAND}`,
|
|
96
|
+
"Help: libretto help update",
|
|
97
|
+
].join("\n");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export const updateCommand = SimpleCLI.command({
|
|
101
|
+
description: "Update Libretto to the latest version",
|
|
102
|
+
})
|
|
103
|
+
.input(updateInput)
|
|
104
|
+
.handle(async ({ input }) => {
|
|
105
|
+
if (input.dryRun) {
|
|
106
|
+
console.log("Update command:");
|
|
107
|
+
console.log(` ${UPDATE_COMMAND}`);
|
|
108
|
+
console.log("No changes made.");
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const currentVersion = readCurrentCliVersion();
|
|
113
|
+
const latestVersion = readLatestNpmVersion();
|
|
114
|
+
console.log(`Current version: ${currentVersion}`);
|
|
115
|
+
console.log(`Latest version: ${latestVersion}`);
|
|
116
|
+
|
|
117
|
+
if (currentVersion === latestVersion) {
|
|
118
|
+
console.log(`Libretto is already up to date (${currentVersion}).`);
|
|
119
|
+
console.log("No further action required.");
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
console.log("Updating Libretto to latest...");
|
|
124
|
+
const result = spawnSync("bash", ["-lc", UPDATE_COMMAND], {
|
|
125
|
+
stdio: "inherit",
|
|
126
|
+
env: {
|
|
127
|
+
...process.env,
|
|
128
|
+
LIBRETTO_VERSION: "latest",
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
if (result.error) {
|
|
133
|
+
throw new Error(
|
|
134
|
+
[
|
|
135
|
+
"Error: failed to start the Libretto installer.",
|
|
136
|
+
`Known state: ${result.error.message}`,
|
|
137
|
+
`Try: ${UPDATE_COMMAND}`,
|
|
138
|
+
"Help: libretto help update",
|
|
139
|
+
].join("\n"),
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (result.status !== 0) {
|
|
144
|
+
throw new Error(formatUpdateFailure(result.status, result.signal));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
console.log("Libretto updated to latest.");
|
|
148
|
+
console.log("No further action required.");
|
|
149
|
+
});
|
package/src/cli/core/context.ts
CHANGED
|
@@ -34,6 +34,10 @@ export function getSessionNetworkLogPath(session: string): string {
|
|
|
34
34
|
return join(getSessionDir(session), "network.jsonl");
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
export function getSessionRawNetworkDir(session: string): string {
|
|
38
|
+
return join(getSessionDir(session), "raw-network");
|
|
39
|
+
}
|
|
40
|
+
|
|
37
41
|
export function getSessionActionsLogPath(session: string): string {
|
|
38
42
|
return join(getSessionDir(session), "actions.jsonl");
|
|
39
43
|
}
|
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
import {
|
|
40
40
|
createLoggerForSession,
|
|
41
41
|
getSessionDir,
|
|
42
|
+
getSessionRawNetworkDir,
|
|
42
43
|
getSessionNetworkLogPath,
|
|
43
44
|
getSessionActionsLogPath,
|
|
44
45
|
getSessionProviderClosePath,
|
|
@@ -255,6 +256,7 @@ class BrowserDaemon {
|
|
|
255
256
|
// Telemetry — may fail on connect-mode reconnections where
|
|
256
257
|
// exposeFunction bindings already exist; log and continue.
|
|
257
258
|
const networkLogFile = getSessionNetworkLogPath(session);
|
|
259
|
+
const rawNetworkDir = getSessionRawNetworkDir(session);
|
|
258
260
|
const actionsLogFile = getSessionActionsLogPath(session);
|
|
259
261
|
const logger = createLoggerForSession(session);
|
|
260
262
|
|
|
@@ -263,6 +265,7 @@ class BrowserDaemon {
|
|
|
263
265
|
context,
|
|
264
266
|
initialPage: page,
|
|
265
267
|
includeUserDomActions: true,
|
|
268
|
+
rawNetworkDir,
|
|
266
269
|
logAction: (entry: TelemetryEntry) => {
|
|
267
270
|
appendFileSync(actionsLogFile, JSON.stringify(entry) + "\n");
|
|
268
271
|
},
|
|
@@ -11,7 +11,21 @@ export type ExperimentMetadata = {
|
|
|
11
11
|
defaultValue: boolean;
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
export const EXPERIMENTS: Readonly<Record<string, ExperimentMetadata>> = {
|
|
14
|
+
export const EXPERIMENTS: Readonly<Record<string, ExperimentMetadata>> = {
|
|
15
|
+
search: {
|
|
16
|
+
title: "HTML Search",
|
|
17
|
+
oneSentenceDescription:
|
|
18
|
+
"Adds a search command that greps the current page's formatted HTML snapshot.",
|
|
19
|
+
docs: [
|
|
20
|
+
"Adds a search command for inspecting the current page's HTML snapshot with a JavaScript regex.",
|
|
21
|
+
"",
|
|
22
|
+
"Usage: libretto search <regex> --session <name> [--page <id>]",
|
|
23
|
+
"",
|
|
24
|
+
"The command captures page HTML through read-only execution, condenses and formats it, then prints matching regions with up to four lines of surrounding context.",
|
|
25
|
+
].join("\n"),
|
|
26
|
+
defaultValue: false,
|
|
27
|
+
},
|
|
28
|
+
};
|
|
15
29
|
|
|
16
30
|
export type ExperimentName = string;
|
|
17
31
|
export type Experiments = Record<ExperimentName, boolean>;
|
|
@@ -2,12 +2,14 @@ import { readLibrettoConfig } from "../config.js";
|
|
|
2
2
|
import { createBrowserbaseProvider } from "./browserbase.js";
|
|
3
3
|
import { createKernelProvider } from "./kernel.js";
|
|
4
4
|
import { createLibrettoCloudProvider } from "./libretto-cloud.js";
|
|
5
|
+
import { createSteelProvider } from "./steel.js";
|
|
5
6
|
import type { ProviderApi } from "./types.js";
|
|
6
7
|
|
|
7
8
|
const VALID_PROVIDERS = new Set([
|
|
8
9
|
"local",
|
|
9
10
|
"kernel",
|
|
10
11
|
"browserbase",
|
|
12
|
+
"steel",
|
|
11
13
|
"libretto-cloud",
|
|
12
14
|
] as const);
|
|
13
15
|
export type ProviderName =
|
|
@@ -56,12 +58,14 @@ export function getCloudProviderApi(name: string): ProviderApi {
|
|
|
56
58
|
return createKernelProvider();
|
|
57
59
|
case "browserbase":
|
|
58
60
|
return createBrowserbaseProvider();
|
|
61
|
+
case "steel":
|
|
62
|
+
return createSteelProvider();
|
|
59
63
|
case "libretto-cloud":
|
|
60
64
|
console.warn("Note: The libretto-cloud provider is in alpha.");
|
|
61
65
|
return createLibrettoCloudProvider();
|
|
62
66
|
default:
|
|
63
67
|
throw new Error(
|
|
64
|
-
`Unknown provider "${name}". Valid cloud providers: kernel, browserbase`,
|
|
68
|
+
`Unknown provider "${name}". Valid cloud providers: kernel, browserbase, steel`,
|
|
65
69
|
);
|
|
66
70
|
}
|
|
67
71
|
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import type { ProviderApi } from "./types.js";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_STEEL_API_ENDPOINT = "https://api.steel.dev";
|
|
4
|
+
const DEFAULT_STEEL_CONNECT_ENDPOINT = "wss://connect.steel.dev";
|
|
5
|
+
|
|
6
|
+
type SteelSessionResponse = {
|
|
7
|
+
id: string;
|
|
8
|
+
sessionViewerUrl?: string;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type SteelProviderOptions = {
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export function createSteelProvider(
|
|
16
|
+
options: SteelProviderOptions = {},
|
|
17
|
+
): ProviderApi {
|
|
18
|
+
const apiKey = options.apiKey ?? process.env.STEEL_API_KEY;
|
|
19
|
+
if (!apiKey) throw new Error("STEEL_API_KEY is required for Steel provider.");
|
|
20
|
+
|
|
21
|
+
const endpoint = process.env.STEEL_BASE_URL ?? DEFAULT_STEEL_API_ENDPOINT;
|
|
22
|
+
const connectEndpoint =
|
|
23
|
+
process.env.STEEL_CONNECT_URL ?? DEFAULT_STEEL_CONNECT_ENDPOINT;
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
async createSession() {
|
|
27
|
+
const resp = await fetch(`${endpoint}/v1/sessions`, {
|
|
28
|
+
method: "POST",
|
|
29
|
+
headers: {
|
|
30
|
+
"steel-api-key": apiKey,
|
|
31
|
+
"Content-Type": "application/json",
|
|
32
|
+
},
|
|
33
|
+
body: JSON.stringify({}),
|
|
34
|
+
});
|
|
35
|
+
if (!resp.ok) {
|
|
36
|
+
const body = await resp.text();
|
|
37
|
+
throw new Error(`Steel API error (${resp.status}): ${body}`);
|
|
38
|
+
}
|
|
39
|
+
const json = (await resp.json()) as SteelSessionResponse;
|
|
40
|
+
return {
|
|
41
|
+
sessionId: json.id,
|
|
42
|
+
cdpEndpoint: buildSteelCdpEndpoint(connectEndpoint, apiKey, json.id),
|
|
43
|
+
liveViewUrl: json.sessionViewerUrl,
|
|
44
|
+
};
|
|
45
|
+
},
|
|
46
|
+
async closeSession(sessionId) {
|
|
47
|
+
const resp = await fetch(`${endpoint}/v1/sessions/${sessionId}/release`, {
|
|
48
|
+
method: "POST",
|
|
49
|
+
headers: {
|
|
50
|
+
"steel-api-key": apiKey,
|
|
51
|
+
"Content-Type": "application/json",
|
|
52
|
+
},
|
|
53
|
+
body: JSON.stringify({}),
|
|
54
|
+
});
|
|
55
|
+
if (!resp.ok) {
|
|
56
|
+
const body = await resp.text();
|
|
57
|
+
throw new Error(
|
|
58
|
+
`Steel API error closing session ${sessionId} (${resp.status}): ${body}`,
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
return {};
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function buildSteelCdpEndpoint(
|
|
67
|
+
connectEndpoint: string,
|
|
68
|
+
apiKey: string,
|
|
69
|
+
sessionId: string,
|
|
70
|
+
): string {
|
|
71
|
+
const endpoint = new URL(connectEndpoint);
|
|
72
|
+
endpoint.searchParams.set("apiKey", apiKey);
|
|
73
|
+
endpoint.searchParams.set("sessionId", sessionId);
|
|
74
|
+
return endpoint.toString();
|
|
75
|
+
}
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import type { BrowserContext, Page } from "playwright";
|
|
2
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { gzipSync } from "node:zlib";
|
|
2
5
|
import {
|
|
3
6
|
filterSemanticClasses,
|
|
4
7
|
INTERACTIVE_ROLE_NAMES,
|
|
@@ -16,18 +19,69 @@ type InstallSessionTelemetryOptions = {
|
|
|
16
19
|
logAction: (entry: TelemetryEntry) => void;
|
|
17
20
|
logNetwork: (entry: TelemetryEntry) => void;
|
|
18
21
|
includeUserDomActions?: boolean;
|
|
22
|
+
rawNetworkDir?: string;
|
|
19
23
|
};
|
|
20
24
|
|
|
25
|
+
const BODY_PREVIEW_CHARS = 4096;
|
|
26
|
+
const MAX_SAVED_BODY_BYTES = 10 * 1024 * 1024;
|
|
27
|
+
const LOG_RESOURCE_TYPES = new Set(["document", "xhr", "fetch"]);
|
|
28
|
+
const SKIP_RESOURCE_TYPES = new Set(["image", "font", "media", "stylesheet"]);
|
|
29
|
+
const NOISE_URL_RE =
|
|
30
|
+
/(google-analytics|googletagmanager|googleadservices|googlesyndication|doubleclick|facebook\.com\/tr|pinterest|criteo|snapchat|2mdn\.net|adtrafficquality|safeframe|recaptcha|analytics|beacon|pixel|\/ads?\/|\/collect|\/event|\/pagead\/|\/gmp\/conversion|\/ccm\/|\/rmkt\/|favicon|\.map(?:\?|$))/i;
|
|
31
|
+
const TEXT_CONTENT_TYPE_RE =
|
|
32
|
+
/json|html|text|xml|graphql|javascript|x-www-form-urlencoded/i;
|
|
33
|
+
|
|
34
|
+
function shouldLogNetworkEntry(
|
|
35
|
+
method: string,
|
|
36
|
+
url: string,
|
|
37
|
+
resourceType: string,
|
|
38
|
+
): boolean {
|
|
39
|
+
if (url.startsWith("chrome-extension://")) return false;
|
|
40
|
+
if (NOISE_URL_RE.test(url)) return false;
|
|
41
|
+
if (resourceType === "ping") return false;
|
|
42
|
+
if (LOG_RESOURCE_TYPES.has(resourceType)) return true;
|
|
43
|
+
if (["POST", "PUT", "PATCH", "DELETE"].includes(method)) return true;
|
|
44
|
+
if (SKIP_RESOURCE_TYPES.has(resourceType)) return false;
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function isTextLikeContentType(contentType: string | null): boolean {
|
|
49
|
+
return contentType !== null && TEXT_CONTENT_TYPE_RE.test(contentType);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function bodyPreview(value: string): string {
|
|
53
|
+
return value.slice(0, BODY_PREVIEW_CHARS);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function saveBodySidecar(
|
|
57
|
+
rawNetworkDir: string | undefined,
|
|
58
|
+
id: number,
|
|
59
|
+
kind: "request" | "response",
|
|
60
|
+
contentType: string | null,
|
|
61
|
+
body: string,
|
|
62
|
+
): string | null {
|
|
63
|
+
if (!rawNetworkDir) return null;
|
|
64
|
+
mkdirSync(rawNetworkDir, { recursive: true });
|
|
65
|
+
const ext = contentType?.includes("json")
|
|
66
|
+
? "json"
|
|
67
|
+
: contentType?.includes("html")
|
|
68
|
+
? "html"
|
|
69
|
+
: "txt";
|
|
70
|
+
const filename = `${String(id).padStart(6, "0")}.${kind}.${ext}.gz`;
|
|
71
|
+
writeFileSync(join(rawNetworkDir, filename), gzipSync(body));
|
|
72
|
+
return `raw-network/${filename}`;
|
|
73
|
+
}
|
|
74
|
+
|
|
21
75
|
export async function installSessionTelemetry(
|
|
22
76
|
options: InstallSessionTelemetryOptions,
|
|
23
77
|
): Promise<void> {
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const { context, initialPage, logAction, logNetwork } = options;
|
|
78
|
+
const { context, initialPage, logAction, logNetwork, rawNetworkDir } =
|
|
79
|
+
options;
|
|
27
80
|
const includeUserDomActions = options.includeUserDomActions ?? false;
|
|
28
81
|
const pageIdCache = new WeakMap<Page, string>();
|
|
29
82
|
const wrappedPages = new WeakSet<Page>();
|
|
30
83
|
const exposedPages = new WeakSet<Page>();
|
|
84
|
+
let networkId = 0;
|
|
31
85
|
|
|
32
86
|
const resolvePageId = async (page: Page): Promise<string> => {
|
|
33
87
|
if (pageIdCache.has(page)) return pageIdCache.get(page)!;
|
|
@@ -748,20 +802,129 @@ export async function installSessionTelemetry(
|
|
|
748
802
|
page.on("response", async (response) => {
|
|
749
803
|
const request = response.request();
|
|
750
804
|
const url = request.url();
|
|
751
|
-
|
|
752
|
-
|
|
805
|
+
const method = request.method();
|
|
806
|
+
const resourceType = request.resourceType();
|
|
807
|
+
if (!shouldLogNetworkEntry(method, url, resourceType)) return;
|
|
808
|
+
|
|
809
|
+
const id = ++networkId;
|
|
810
|
+
const requestHeaders = request.headers();
|
|
811
|
+
const responseHeaders = response.headers();
|
|
812
|
+
const contentType = responseHeaders["content-type"] ?? null;
|
|
813
|
+
const requestContentType = requestHeaders["content-type"] ?? null;
|
|
814
|
+
const requestBody = request.postData();
|
|
815
|
+
const requestBodyBytes =
|
|
816
|
+
requestBody === null ? null : Buffer.byteLength(requestBody);
|
|
817
|
+
let requestBodyPath: string | null = null;
|
|
818
|
+
let requestBodyOmittedReason: string | null = null;
|
|
819
|
+
let responseBodyPreview: string | null = null;
|
|
820
|
+
let responseBodyPath: string | null = null;
|
|
821
|
+
let responseBodyBytes: number | null = null;
|
|
822
|
+
let responseBodyTruncated = false;
|
|
823
|
+
let responseBodyOmittedReason: string | null = null;
|
|
824
|
+
let errorText: string | null = null;
|
|
825
|
+
|
|
826
|
+
if (requestBody === null) {
|
|
827
|
+
requestBodyOmittedReason = "no-request-body";
|
|
828
|
+
} else if (!isTextLikeContentType(requestContentType)) {
|
|
829
|
+
requestBodyOmittedReason = "binary-content-type";
|
|
830
|
+
} else if (requestBodyBytes !== null && requestBodyBytes > MAX_SAVED_BODY_BYTES) {
|
|
831
|
+
requestBodyOmittedReason = "body-too-large";
|
|
832
|
+
} else {
|
|
833
|
+
requestBodyPath = saveBodySidecar(
|
|
834
|
+
rawNetworkDir,
|
|
835
|
+
id,
|
|
836
|
+
"request",
|
|
837
|
+
requestContentType,
|
|
838
|
+
requestBody,
|
|
839
|
+
);
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (!isTextLikeContentType(contentType) || !LOG_RESOURCE_TYPES.has(resourceType)) {
|
|
843
|
+
responseBodyOmittedReason = "binary-content-type";
|
|
844
|
+
} else {
|
|
845
|
+
try {
|
|
846
|
+
const responseBody = await response.text();
|
|
847
|
+
responseBodyBytes = Buffer.byteLength(responseBody);
|
|
848
|
+
responseBodyPreview = bodyPreview(responseBody);
|
|
849
|
+
if (responseBodyBytes > MAX_SAVED_BODY_BYTES) {
|
|
850
|
+
responseBodyTruncated = true;
|
|
851
|
+
responseBodyOmittedReason = "body-too-large";
|
|
852
|
+
} else {
|
|
853
|
+
responseBodyPath = saveBodySidecar(
|
|
854
|
+
rawNetworkDir,
|
|
855
|
+
id,
|
|
856
|
+
"response",
|
|
857
|
+
contentType,
|
|
858
|
+
responseBody,
|
|
859
|
+
);
|
|
860
|
+
}
|
|
861
|
+
} catch (error: any) {
|
|
862
|
+
responseBodyOmittedReason = "read-error";
|
|
863
|
+
errorText = error?.message ?? String(error);
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
753
867
|
emitNetwork({
|
|
868
|
+
id,
|
|
754
869
|
pageId,
|
|
755
|
-
method
|
|
870
|
+
method,
|
|
756
871
|
url,
|
|
872
|
+
resourceType,
|
|
757
873
|
status: response.status(),
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
874
|
+
statusText: response.statusText(),
|
|
875
|
+
contentType,
|
|
876
|
+
requestHeaders,
|
|
877
|
+
responseHeaders,
|
|
878
|
+
requestBodyPreview: requestBody ? bodyPreview(requestBody) : null,
|
|
879
|
+
requestBodyPath,
|
|
880
|
+
requestBodyBytes,
|
|
881
|
+
requestBodyTruncated:
|
|
882
|
+
requestBody !== null &&
|
|
883
|
+
requestBodyBytes !== null &&
|
|
884
|
+
requestBodyBytes > MAX_SAVED_BODY_BYTES,
|
|
885
|
+
requestBodyOmittedReason,
|
|
886
|
+
responseBodyPreview,
|
|
887
|
+
responseBodyPath,
|
|
888
|
+
responseBodyBytes,
|
|
889
|
+
responseBodyTruncated,
|
|
890
|
+
responseBodyOmittedReason,
|
|
891
|
+
errorText,
|
|
892
|
+
postData: requestBody ? bodyPreview(requestBody) : undefined,
|
|
893
|
+
responseBody: null,
|
|
894
|
+
size: null,
|
|
895
|
+
durationMs: null,
|
|
896
|
+
});
|
|
897
|
+
});
|
|
898
|
+
|
|
899
|
+
page.on("requestfailed", async (request) => {
|
|
900
|
+
const url = request.url();
|
|
901
|
+
const method = request.method();
|
|
902
|
+
const resourceType = request.resourceType();
|
|
903
|
+
if (!shouldLogNetworkEntry(method, url, resourceType)) return;
|
|
904
|
+
|
|
905
|
+
const id = ++networkId;
|
|
906
|
+
emitNetwork({
|
|
907
|
+
id,
|
|
908
|
+
pageId,
|
|
909
|
+
method,
|
|
910
|
+
url,
|
|
911
|
+
resourceType,
|
|
912
|
+
status: null,
|
|
913
|
+
statusText: null,
|
|
914
|
+
contentType: null,
|
|
915
|
+
requestHeaders: request.headers(),
|
|
916
|
+
responseHeaders: null,
|
|
917
|
+
requestBodyPreview: null,
|
|
918
|
+
requestBodyPath: null,
|
|
919
|
+
requestBodyBytes: null,
|
|
920
|
+
requestBodyTruncated: false,
|
|
921
|
+
requestBodyOmittedReason: null,
|
|
922
|
+
responseBodyPreview: null,
|
|
923
|
+
responseBodyPath: null,
|
|
924
|
+
responseBodyBytes: null,
|
|
925
|
+
responseBodyTruncated: false,
|
|
926
|
+
responseBodyOmittedReason: "request-failed",
|
|
927
|
+
errorText: request.failure()?.errorText ?? null,
|
|
765
928
|
responseBody: null,
|
|
766
929
|
size: null,
|
|
767
930
|
durationMs: null,
|
|
@@ -11,16 +11,32 @@ import {
|
|
|
11
11
|
import { assertSessionStateExistsOrThrow } from "./session.js";
|
|
12
12
|
|
|
13
13
|
export type NetworkLogEntry = {
|
|
14
|
+
id?: number;
|
|
14
15
|
ts: string;
|
|
15
16
|
pageId?: string;
|
|
16
17
|
method: string;
|
|
17
18
|
url: string;
|
|
18
|
-
|
|
19
|
+
resourceType?: string;
|
|
20
|
+
status: number | null;
|
|
21
|
+
statusText?: string | null;
|
|
19
22
|
contentType: string | null;
|
|
23
|
+
requestHeaders?: Record<string, string> | null;
|
|
24
|
+
responseHeaders?: Record<string, string> | null;
|
|
25
|
+
requestBodyPreview?: string | null;
|
|
26
|
+
requestBodyPath?: string | null;
|
|
27
|
+
requestBodyBytes?: number | null;
|
|
28
|
+
requestBodyTruncated?: boolean;
|
|
29
|
+
requestBodyOmittedReason?: string | null;
|
|
30
|
+
responseBodyPreview?: string | null;
|
|
31
|
+
responseBodyPath?: string | null;
|
|
32
|
+
responseBodyBytes?: number | null;
|
|
33
|
+
responseBodyTruncated?: boolean;
|
|
34
|
+
responseBodyOmittedReason?: string | null;
|
|
35
|
+
errorText?: string | null;
|
|
20
36
|
postData?: string;
|
|
21
37
|
responseBody?: string | null;
|
|
22
|
-
size
|
|
23
|
-
durationMs
|
|
38
|
+
size?: number | null;
|
|
39
|
+
durationMs?: number | null;
|
|
24
40
|
};
|
|
25
41
|
|
|
26
42
|
export function readNetworkLog(
|