libretto 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -36
- package/dist/cli/cli.js +39 -113
- package/dist/cli/commands/ai.js +1 -1
- package/dist/cli/commands/browser.js +87 -60
- package/dist/cli/commands/execution.js +201 -88
- package/dist/cli/commands/init.js +30 -8
- package/dist/cli/commands/logs.js +5 -6
- package/dist/cli/commands/shared.js +30 -29
- package/dist/cli/commands/snapshot.js +26 -39
- package/dist/cli/core/ai-config.js +9 -2
- package/dist/cli/core/api-snapshot-analyzer.js +15 -5
- package/dist/cli/core/browser.js +141 -33
- package/dist/cli/core/context.js +7 -18
- package/dist/cli/core/session-telemetry.js +5 -2
- package/dist/cli/core/session.js +23 -10
- package/dist/cli/core/snapshot-analyzer.js +16 -33
- package/dist/cli/core/snapshot-api-config.js +2 -6
- package/dist/cli/core/telemetry.js +10 -2
- package/dist/cli/framework/simple-cli.js +45 -25
- package/dist/cli/router.js +14 -21
- package/dist/cli/workers/run-integration-runtime.js +26 -7
- package/dist/cli/workers/run-integration-worker-protocol.js +3 -1
- package/dist/cli/workers/run-integration-worker.js +1 -4
- package/dist/index.d.ts +1 -2
- package/dist/index.js +7 -10
- package/dist/runtime/download/download.js +5 -1
- package/dist/runtime/extract/extract.js +11 -2
- package/dist/runtime/network/network.js +8 -1
- package/dist/runtime/recovery/agent.js +6 -2
- package/dist/runtime/recovery/errors.js +3 -1
- package/dist/runtime/recovery/recovery.js +3 -1
- package/dist/shared/condense-dom/condense-dom.js +6 -13
- package/dist/shared/config/config.d.ts +1 -9
- package/dist/shared/config/config.js +0 -18
- package/dist/shared/config/index.d.ts +2 -1
- package/dist/shared/config/index.js +0 -10
- package/dist/shared/debug/pause.js +9 -3
- package/dist/shared/instrumentation/instrument.js +101 -5
- package/dist/shared/llm/ai-sdk-adapter.js +3 -1
- package/dist/shared/llm/client.js +3 -1
- package/dist/shared/logger/index.js +4 -1
- package/dist/shared/paths/paths.js +2 -1
- package/dist/shared/paths/repo-root.d.ts +3 -0
- package/dist/shared/paths/repo-root.js +24 -0
- package/dist/shared/run/api.js +3 -1
- package/dist/shared/run/browser.js +7 -2
- package/dist/shared/state/session-state.d.ts +2 -1
- package/dist/shared/state/session-state.js +5 -2
- package/dist/shared/visualization/ghost-cursor.js +19 -10
- package/dist/shared/visualization/highlight.js +9 -6
- package/dist/shared/workflow/workflow.d.ts +4 -5
- package/dist/shared/workflow/workflow.js +3 -5
- package/package.json +11 -8
- package/scripts/check-skills-sync.mjs +25 -0
- package/scripts/compare-eval-summary.mjs +47 -0
- package/scripts/postinstall.mjs +26 -17
- package/scripts/prepare-release.sh +97 -0
- package/scripts/skills-libretto.mjs +103 -0
- package/scripts/summarize-evals.mjs +135 -0
- package/scripts/sync-skills.mjs +12 -0
- package/skills/libretto/SKILL.md +130 -377
- package/skills/libretto/references/auth-profiles.md +30 -0
- package/skills/libretto/{code-generation-rules.md → references/code-generation-rules.md} +27 -42
- package/skills/libretto/references/configuration-file-reference.md +53 -0
- package/skills/libretto/references/pages-and-page-targeting.md +29 -0
- package/skills/libretto/references/site-security-review.md +143 -0
- package/src/cli/cli.ts +86 -0
- package/src/cli/commands/ai.ts +35 -0
- package/src/cli/commands/browser.ts +189 -0
- package/src/cli/commands/execution.ts +822 -0
- package/src/cli/commands/init.ts +350 -0
- package/src/cli/commands/logs.ts +128 -0
- package/src/cli/commands/shared.ts +69 -0
- package/src/cli/commands/snapshot.ts +312 -0
- package/src/cli/core/ai-config.ts +264 -0
- package/src/cli/core/api-snapshot-analyzer.ts +108 -0
- package/src/cli/core/browser.ts +976 -0
- package/src/cli/core/context.ts +127 -0
- package/src/cli/core/pause-signals.ts +35 -0
- package/src/cli/core/session-telemetry.ts +564 -0
- package/src/cli/core/session.ts +223 -0
- package/src/cli/core/snapshot-analyzer.ts +855 -0
- package/src/cli/core/snapshot-api-config.ts +231 -0
- package/src/cli/core/telemetry.ts +459 -0
- package/src/cli/framework/simple-cli.ts +1340 -0
- package/src/cli/index.ts +13 -0
- package/src/cli/router.ts +20 -0
- package/src/cli/workers/run-integration-runtime.ts +338 -0
- package/src/cli/workers/run-integration-worker-protocol.ts +16 -0
- package/src/cli/workers/run-integration-worker.ts +72 -0
- package/src/index.ts +127 -0
- package/src/runtime/download/download.ts +104 -0
- package/src/runtime/download/index.ts +7 -0
- package/src/runtime/extract/extract.ts +102 -0
- package/src/runtime/extract/index.ts +1 -0
- package/src/runtime/network/index.ts +5 -0
- package/src/runtime/network/network.ts +119 -0
- package/{dist/runtime/recovery/agent.cjs → src/runtime/recovery/agent.ts} +114 -76
- package/src/runtime/recovery/errors.ts +155 -0
- package/src/runtime/recovery/index.ts +7 -0
- package/src/runtime/recovery/recovery.ts +53 -0
- package/{dist/shared/condense-dom/condense-dom.cjs → src/shared/condense-dom/condense-dom.ts} +249 -124
- package/src/shared/config/config.ts +3 -0
- package/src/shared/config/index.ts +0 -0
- package/src/shared/debug/index.ts +1 -0
- package/src/shared/debug/pause.ts +91 -0
- package/src/shared/instrumentation/errors.ts +84 -0
- package/src/shared/instrumentation/index.ts +9 -0
- package/src/shared/instrumentation/instrument.ts +406 -0
- package/src/shared/llm/ai-sdk-adapter.ts +81 -0
- package/{dist/shared/llm/client.cjs → src/shared/llm/client.ts} +86 -80
- package/src/shared/llm/index.ts +3 -0
- package/src/shared/llm/types.ts +63 -0
- package/src/shared/logger/index.ts +13 -0
- package/src/shared/logger/logger.ts +358 -0
- package/src/shared/logger/sinks.ts +148 -0
- package/src/shared/paths/paths.ts +110 -0
- package/src/shared/paths/repo-root.ts +27 -0
- package/src/shared/run/api.ts +6 -0
- package/src/shared/run/browser.ts +107 -0
- package/src/shared/state/index.ts +11 -0
- package/src/shared/state/session-state.ts +77 -0
- package/src/shared/visualization/ghost-cursor.ts +213 -0
- package/src/shared/visualization/highlight.ts +149 -0
- package/src/shared/visualization/index.ts +18 -0
- package/src/shared/workflow/workflow.ts +36 -0
- package/dist/index.cjs +0 -144
- package/dist/index.d.cts +0 -21
- package/dist/runtime/download/download.cjs +0 -70
- package/dist/runtime/download/download.d.cts +0 -35
- package/dist/runtime/download/index.cjs +0 -30
- package/dist/runtime/download/index.d.cts +0 -3
- package/dist/runtime/extract/extract.cjs +0 -88
- package/dist/runtime/extract/extract.d.cts +0 -23
- package/dist/runtime/extract/index.cjs +0 -28
- package/dist/runtime/extract/index.d.cts +0 -5
- package/dist/runtime/network/index.cjs +0 -28
- package/dist/runtime/network/index.d.cts +0 -4
- package/dist/runtime/network/network.cjs +0 -91
- package/dist/runtime/network/network.d.cts +0 -28
- package/dist/runtime/recovery/agent.d.cts +0 -13
- package/dist/runtime/recovery/errors.cjs +0 -124
- package/dist/runtime/recovery/errors.d.cts +0 -31
- package/dist/runtime/recovery/index.cjs +0 -34
- package/dist/runtime/recovery/index.d.cts +0 -7
- package/dist/runtime/recovery/recovery.cjs +0 -55
- package/dist/runtime/recovery/recovery.d.cts +0 -12
- package/dist/shared/condense-dom/condense-dom.d.cts +0 -34
- package/dist/shared/config/config.cjs +0 -44
- package/dist/shared/config/config.d.cts +0 -10
- package/dist/shared/config/index.cjs +0 -32
- package/dist/shared/config/index.d.cts +0 -1
- package/dist/shared/debug/index.cjs +0 -28
- package/dist/shared/debug/index.d.cts +0 -1
- package/dist/shared/debug/pause.cjs +0 -86
- package/dist/shared/debug/pause.d.cts +0 -12
- package/dist/shared/instrumentation/errors.cjs +0 -81
- package/dist/shared/instrumentation/errors.d.cts +0 -12
- package/dist/shared/instrumentation/index.cjs +0 -35
- package/dist/shared/instrumentation/index.d.cts +0 -6
- package/dist/shared/instrumentation/instrument.cjs +0 -206
- package/dist/shared/instrumentation/instrument.d.cts +0 -32
- package/dist/shared/llm/ai-sdk-adapter.cjs +0 -71
- package/dist/shared/llm/ai-sdk-adapter.d.cts +0 -22
- package/dist/shared/llm/client.d.cts +0 -13
- package/dist/shared/llm/index.cjs +0 -31
- package/dist/shared/llm/index.d.cts +0 -5
- package/dist/shared/llm/types.cjs +0 -16
- package/dist/shared/llm/types.d.cts +0 -67
- package/dist/shared/logger/index.cjs +0 -37
- package/dist/shared/logger/index.d.cts +0 -2
- package/dist/shared/logger/logger.cjs +0 -232
- package/dist/shared/logger/logger.d.cts +0 -86
- package/dist/shared/logger/sinks.cjs +0 -160
- package/dist/shared/logger/sinks.d.cts +0 -9
- package/dist/shared/paths/paths.cjs +0 -104
- package/dist/shared/paths/paths.d.cts +0 -10
- package/dist/shared/run/api.cjs +0 -28
- package/dist/shared/run/api.d.cts +0 -2
- package/dist/shared/run/browser.cjs +0 -98
- package/dist/shared/run/browser.d.cts +0 -22
- package/dist/shared/state/index.cjs +0 -38
- package/dist/shared/state/index.d.cts +0 -2
- package/dist/shared/state/session-state.cjs +0 -92
- package/dist/shared/state/session-state.d.cts +0 -40
- package/dist/shared/visualization/ghost-cursor.cjs +0 -174
- package/dist/shared/visualization/ghost-cursor.d.cts +0 -37
- package/dist/shared/visualization/highlight.cjs +0 -134
- package/dist/shared/visualization/highlight.d.cts +0 -22
- package/dist/shared/visualization/index.cjs +0 -45
- package/dist/shared/visualization/index.d.cts +0 -3
- package/dist/shared/workflow/workflow.cjs +0 -47
- package/dist/shared/workflow/workflow.d.cts +0 -21
- package/skills/libretto/integration-approach-selection.md +0 -174
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import type z from "zod";
|
|
3
|
+
import {
|
|
4
|
+
type MinimalLogger,
|
|
5
|
+
defaultLogger,
|
|
6
|
+
} from "../../shared/logger/logger.js";
|
|
7
|
+
import type { LLMClient } from "../../shared/llm/types.js";
|
|
8
|
+
|
|
9
|
+
export type ExtractOptions<T extends z.ZodType> = {
|
|
10
|
+
page: Page;
|
|
11
|
+
instruction: string;
|
|
12
|
+
schema: T;
|
|
13
|
+
llmClient: LLMClient;
|
|
14
|
+
logger?: MinimalLogger;
|
|
15
|
+
/** Optional CSS selector to scope extraction to a specific element. */
|
|
16
|
+
selector?: string;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Generic AI-powered data extraction from page elements.
|
|
21
|
+
* Takes a screenshot (full-page via CDP or scoped to an element),
|
|
22
|
+
* captures DOM content, and uses an LLM to extract structured data
|
|
23
|
+
* matching the provided Zod schema.
|
|
24
|
+
*/
|
|
25
|
+
export async function extractFromPage<T extends z.ZodType>(
|
|
26
|
+
options: ExtractOptions<T>,
|
|
27
|
+
): Promise<z.infer<T>> {
|
|
28
|
+
const {
|
|
29
|
+
page,
|
|
30
|
+
instruction,
|
|
31
|
+
schema,
|
|
32
|
+
selector,
|
|
33
|
+
logger = defaultLogger,
|
|
34
|
+
llmClient,
|
|
35
|
+
} = options;
|
|
36
|
+
|
|
37
|
+
let screenshot: string;
|
|
38
|
+
let domContent: string | undefined;
|
|
39
|
+
|
|
40
|
+
if (selector) {
|
|
41
|
+
const element = page.locator(selector);
|
|
42
|
+
await element.waitFor({ state: "visible", timeout: 10_000 });
|
|
43
|
+
|
|
44
|
+
const screenshotBuffer = await element.screenshot();
|
|
45
|
+
screenshot = screenshotBuffer.toString("base64");
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
domContent = await element.innerHTML();
|
|
49
|
+
if (domContent.length > 30000) {
|
|
50
|
+
domContent = domContent.slice(0, 30000) + "\n... [truncated]";
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
domContent = undefined;
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
const cdpClient = await page.context().newCDPSession(page);
|
|
57
|
+
await cdpClient.send("Page.enable");
|
|
58
|
+
const { data } = await cdpClient.send("Page.captureScreenshot", {
|
|
59
|
+
format: "png",
|
|
60
|
+
});
|
|
61
|
+
screenshot = data;
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const htmlContent = await page.content();
|
|
65
|
+
domContent =
|
|
66
|
+
htmlContent.length > 50000
|
|
67
|
+
? htmlContent.slice(0, 50000) + "\n... [truncated]"
|
|
68
|
+
: htmlContent;
|
|
69
|
+
} catch {
|
|
70
|
+
domContent = undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const prompt = `You are analyzing a screenshot${selector ? " of a specific element" : ""} from a web page to extract structured data.
|
|
75
|
+
|
|
76
|
+
Instruction: ${instruction}
|
|
77
|
+
|
|
78
|
+
${domContent ? `Here is the HTML content for additional context:\n<html>\n${domContent}\n</html>` : ""}
|
|
79
|
+
|
|
80
|
+
Extract the requested information from the screenshot and return it in the specified format. Be precise and only extract what is visible.`;
|
|
81
|
+
|
|
82
|
+
const result = await llmClient.generateObjectFromMessages({
|
|
83
|
+
schema,
|
|
84
|
+
messages: [
|
|
85
|
+
{
|
|
86
|
+
role: "user",
|
|
87
|
+
content: [
|
|
88
|
+
{ type: "text", text: prompt },
|
|
89
|
+
{ type: "image", image: `data:image/png;base64,${screenshot}` },
|
|
90
|
+
],
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
temperature: 0,
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
logger.info("extractFromPage completed", {
|
|
97
|
+
selector,
|
|
98
|
+
instruction: instruction.slice(0, 100),
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return result;
|
|
102
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { extractFromPage, type ExtractOptions } from "./extract.js";
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import type z from "zod";
|
|
3
|
+
import type { MinimalLogger } from "../../shared/logger/logger.js";
|
|
4
|
+
|
|
5
|
+
export type RequestConfig = {
|
|
6
|
+
url: string;
|
|
7
|
+
method?: "GET" | "POST" | "PUT" | "DELETE" | "PATCH";
|
|
8
|
+
headers?: Record<string, string>;
|
|
9
|
+
body?: Record<string, any> | string;
|
|
10
|
+
/** How to serialize the body. Defaults to "json". */
|
|
11
|
+
bodyType?: "json" | "form";
|
|
12
|
+
/** How to parse the response. Defaults to "json". */
|
|
13
|
+
responseType?: "json" | "text" | "xml";
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type PageRequestOptions<T extends z.ZodType | undefined = undefined> = {
|
|
17
|
+
logger?: MinimalLogger;
|
|
18
|
+
/** Optional Zod schema to validate the response body. */
|
|
19
|
+
schema?: T;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
type PageRequestResult<T extends z.ZodType | undefined> = T extends z.ZodType
|
|
23
|
+
? z.infer<T>
|
|
24
|
+
: any;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Executes a fetch() call inside the browser context via page.evaluate().
|
|
28
|
+
* Provides typed request config, automatic response parsing, optional Zod
|
|
29
|
+
* validation, and logging.
|
|
30
|
+
*/
|
|
31
|
+
export async function pageRequest<T extends z.ZodType | undefined = undefined>(
|
|
32
|
+
page: Page,
|
|
33
|
+
config: RequestConfig,
|
|
34
|
+
options?: PageRequestOptions<T>,
|
|
35
|
+
): Promise<PageRequestResult<T>> {
|
|
36
|
+
const {
|
|
37
|
+
url,
|
|
38
|
+
method = "GET",
|
|
39
|
+
headers = {},
|
|
40
|
+
body,
|
|
41
|
+
bodyType = "json",
|
|
42
|
+
responseType = "json",
|
|
43
|
+
} = config;
|
|
44
|
+
const { logger, schema } = options ?? {};
|
|
45
|
+
|
|
46
|
+
const startTime = Date.now();
|
|
47
|
+
|
|
48
|
+
// Build fetch options to pass into page.evaluate
|
|
49
|
+
const fetchHeaders: Record<string, string> = { ...headers };
|
|
50
|
+
let fetchBody: string | undefined;
|
|
51
|
+
|
|
52
|
+
if (body !== undefined) {
|
|
53
|
+
if (bodyType === "form") {
|
|
54
|
+
fetchHeaders["Content-Type"] = "application/x-www-form-urlencoded";
|
|
55
|
+
if (typeof body === "string") {
|
|
56
|
+
fetchBody = body;
|
|
57
|
+
} else {
|
|
58
|
+
fetchBody = new URLSearchParams(
|
|
59
|
+
Object.entries(body).map(([k, v]) => [k, String(v)]),
|
|
60
|
+
).toString();
|
|
61
|
+
}
|
|
62
|
+
} else {
|
|
63
|
+
fetchHeaders["Content-Type"] = "application/json";
|
|
64
|
+
fetchBody = typeof body === "string" ? body : JSON.stringify(body);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const result = await page.evaluate(
|
|
69
|
+
async ({ url, method, headers, body, responseType }) => {
|
|
70
|
+
const res = await fetch(url, {
|
|
71
|
+
method,
|
|
72
|
+
headers,
|
|
73
|
+
body: body ?? undefined,
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
const status = res.status;
|
|
77
|
+
const ok = res.ok;
|
|
78
|
+
let data: any;
|
|
79
|
+
|
|
80
|
+
if (responseType === "json") {
|
|
81
|
+
data = await res.json();
|
|
82
|
+
} else {
|
|
83
|
+
data = await res.text();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return { status, ok, data };
|
|
87
|
+
},
|
|
88
|
+
{ url, method, headers: fetchHeaders, body: fetchBody, responseType },
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
const duration = Date.now() - startTime;
|
|
92
|
+
|
|
93
|
+
if (!result.ok) {
|
|
94
|
+
logger?.warn("network:request:error", {
|
|
95
|
+
method,
|
|
96
|
+
url,
|
|
97
|
+
status: result.status,
|
|
98
|
+
duration,
|
|
99
|
+
body:
|
|
100
|
+
typeof result.data === "string" ? result.data.slice(0, 500) : undefined,
|
|
101
|
+
});
|
|
102
|
+
throw new Error(
|
|
103
|
+
`pageRequest failed: ${method} ${url} returned ${result.status}`,
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
logger?.info("network:request", {
|
|
108
|
+
method,
|
|
109
|
+
url,
|
|
110
|
+
status: result.status,
|
|
111
|
+
duration,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
if (schema) {
|
|
115
|
+
return schema.parse(result.data) as PageRequestResult<T>;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return result.data as PageRequestResult<T>;
|
|
119
|
+
}
|
|
@@ -1,32 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
function delay(ms) {
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import {
|
|
3
|
+
type MinimalLogger,
|
|
4
|
+
defaultLogger,
|
|
5
|
+
} from "../../shared/logger/logger.js";
|
|
6
|
+
import type { LLMClient } from "../../shared/llm/types.js";
|
|
7
|
+
|
|
8
|
+
type BrowserAction =
|
|
9
|
+
| { type: "click"; x: number; y: number; button?: string }
|
|
10
|
+
| { type: "double_click"; x: number; y: number }
|
|
11
|
+
| {
|
|
12
|
+
type: "scroll";
|
|
13
|
+
x: number;
|
|
14
|
+
y: number;
|
|
15
|
+
scroll_x: number;
|
|
16
|
+
scroll_y: number;
|
|
17
|
+
}
|
|
18
|
+
| { type: "keypress"; keys: string[] }
|
|
19
|
+
| { type: "type"; text: string }
|
|
20
|
+
| { type: "wait" }
|
|
21
|
+
| { type: "screenshot" }
|
|
22
|
+
| { type: "drag"; path: { x: number; y: number }[] }
|
|
23
|
+
| { type: "move"; x: number; y: number }
|
|
24
|
+
| { type: "done" };
|
|
25
|
+
|
|
26
|
+
function delay(ms: number): Promise<void> {
|
|
27
27
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
28
28
|
}
|
|
29
|
-
|
|
29
|
+
|
|
30
|
+
const KEY_MAPPINGS: Record<string, string> = {
|
|
30
31
|
ENTER: "Enter",
|
|
31
32
|
RETURN: "Enter",
|
|
32
33
|
TAB: "Tab",
|
|
@@ -49,16 +50,25 @@ const KEY_MAPPINGS = {
|
|
|
49
50
|
SHIFT: "Shift",
|
|
50
51
|
META: "Meta",
|
|
51
52
|
CMD: "Meta",
|
|
52
|
-
COMMAND: "Meta"
|
|
53
|
+
COMMAND: "Meta",
|
|
53
54
|
};
|
|
54
|
-
|
|
55
|
+
|
|
56
|
+
function mapKeyName(key: string): string {
|
|
55
57
|
return KEY_MAPPINGS[key.toUpperCase()] ?? key;
|
|
56
58
|
}
|
|
57
|
-
|
|
59
|
+
|
|
60
|
+
async function executeBrowserAction(
|
|
61
|
+
page: Page,
|
|
62
|
+
action: BrowserAction,
|
|
63
|
+
logger: MinimalLogger = defaultLogger,
|
|
64
|
+
): Promise<void> {
|
|
58
65
|
switch (action.type) {
|
|
59
66
|
case "click": {
|
|
60
67
|
const { x, y, button = "left" } = action;
|
|
61
|
-
const playwrightButton =
|
|
68
|
+
const playwrightButton =
|
|
69
|
+
button === "wheel" || button === "back" || button === "forward"
|
|
70
|
+
? ("left" as const)
|
|
71
|
+
: (button as "left" | "right" | "middle");
|
|
62
72
|
await page.mouse.click(x, y, { button: playwrightButton });
|
|
63
73
|
logger.info(`Clicked at (${x}, ${y}) with ${button} button`);
|
|
64
74
|
break;
|
|
@@ -90,7 +100,7 @@ async function executeBrowserAction(page, action, logger = import_logger.default
|
|
|
90
100
|
break;
|
|
91
101
|
}
|
|
92
102
|
case "wait": {
|
|
93
|
-
await delay(
|
|
103
|
+
await delay(2000);
|
|
94
104
|
logger.info("Waited 2 seconds");
|
|
95
105
|
break;
|
|
96
106
|
}
|
|
@@ -110,7 +120,9 @@ async function executeBrowserAction(page, action, logger = import_logger.default
|
|
|
110
120
|
if (point) await page.mouse.move(point.x, point.y);
|
|
111
121
|
}
|
|
112
122
|
await page.mouse.up();
|
|
113
|
-
logger.info(
|
|
123
|
+
logger.info(
|
|
124
|
+
`Dragged from (${start.x}, ${start.y}) to (${end.x}, ${end.y})`,
|
|
125
|
+
);
|
|
114
126
|
}
|
|
115
127
|
break;
|
|
116
128
|
}
|
|
@@ -125,56 +137,80 @@ async function executeBrowserAction(page, action, logger = import_logger.default
|
|
|
125
137
|
}
|
|
126
138
|
}
|
|
127
139
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
140
|
+
|
|
141
|
+
import { z } from "zod";
|
|
142
|
+
|
|
143
|
+
const recoveryActionSchema = z.object({
|
|
144
|
+
reasoning: z
|
|
145
|
+
.string()
|
|
146
|
+
.describe("Your reasoning about what you see and what action to take"),
|
|
147
|
+
action: z.discriminatedUnion("type", [
|
|
148
|
+
z.object({
|
|
149
|
+
type: z.literal("click"),
|
|
150
|
+
x: z.number(),
|
|
151
|
+
y: z.number(),
|
|
152
|
+
}),
|
|
153
|
+
z.object({
|
|
154
|
+
type: z.literal("type"),
|
|
155
|
+
text: z.string(),
|
|
135
156
|
}),
|
|
136
|
-
|
|
137
|
-
type:
|
|
138
|
-
|
|
157
|
+
z.object({
|
|
158
|
+
type: z.literal("keypress"),
|
|
159
|
+
keys: z.array(z.string()),
|
|
139
160
|
}),
|
|
140
|
-
|
|
141
|
-
type:
|
|
142
|
-
|
|
161
|
+
z.object({
|
|
162
|
+
type: z.literal("scroll"),
|
|
163
|
+
x: z.number(),
|
|
164
|
+
y: z.number(),
|
|
165
|
+
scroll_x: z.number(),
|
|
166
|
+
scroll_y: z.number(),
|
|
143
167
|
}),
|
|
144
|
-
|
|
145
|
-
type:
|
|
146
|
-
x: import_zod.z.number(),
|
|
147
|
-
y: import_zod.z.number(),
|
|
148
|
-
scroll_x: import_zod.z.number(),
|
|
149
|
-
scroll_y: import_zod.z.number()
|
|
168
|
+
z.object({
|
|
169
|
+
type: z.literal("wait"),
|
|
150
170
|
}),
|
|
151
|
-
|
|
152
|
-
type:
|
|
171
|
+
z.object({
|
|
172
|
+
type: z.literal("done"),
|
|
153
173
|
}),
|
|
154
|
-
|
|
155
|
-
type: import_zod.z.literal("done")
|
|
156
|
-
})
|
|
157
|
-
])
|
|
174
|
+
]),
|
|
158
175
|
});
|
|
159
|
-
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Executes a vision-based recovery agent to recover from browser automation failures.
|
|
179
|
+
* Takes a screenshot, sends it to the LLM with the instruction, and executes
|
|
180
|
+
* the LLM's suggested browser actions.
|
|
181
|
+
*/
|
|
182
|
+
export async function executeRecoveryAgent(
|
|
183
|
+
page: Page,
|
|
184
|
+
instruction: string,
|
|
185
|
+
logger?: MinimalLogger,
|
|
186
|
+
llmClient?: LLMClient,
|
|
187
|
+
): Promise<void> {
|
|
160
188
|
if (!llmClient) {
|
|
161
189
|
return;
|
|
162
190
|
}
|
|
163
|
-
const log = logger ??
|
|
191
|
+
const log = logger ?? defaultLogger;
|
|
164
192
|
log.info("Executing vision-based recovery agent", { instruction });
|
|
193
|
+
|
|
165
194
|
const viewport = page.viewportSize();
|
|
166
195
|
if (!viewport) {
|
|
167
196
|
throw new Error("Viewport size not found");
|
|
168
197
|
}
|
|
169
|
-
|
|
198
|
+
|
|
199
|
+
let screenshot: string;
|
|
170
200
|
try {
|
|
171
|
-
screenshot = (
|
|
201
|
+
screenshot = (
|
|
202
|
+
await page.screenshot({ fullPage: false, timeout: 10000 })
|
|
203
|
+
).toString("base64");
|
|
172
204
|
} catch (screenshotError) {
|
|
173
205
|
log.warn("Failed to take screenshot for recovery agent, skipping", {
|
|
174
|
-
screenshotError:
|
|
206
|
+
screenshotError:
|
|
207
|
+
screenshotError instanceof Error
|
|
208
|
+
? screenshotError.message
|
|
209
|
+
: String(screenshotError),
|
|
175
210
|
});
|
|
176
211
|
throw new Error("Failed to take screenshot for recovery agent");
|
|
177
212
|
}
|
|
213
|
+
|
|
178
214
|
const maxSteps = 3;
|
|
179
215
|
for (let step = 1; step <= maxSteps; step++) {
|
|
180
216
|
const result = await llmClient.generateObjectFromMessages({
|
|
@@ -190,34 +226,36 @@ async function executeRecoveryAgent(page, instruction, logger, llmClient) {
|
|
|
190
226
|
Your task: ${instruction}
|
|
191
227
|
|
|
192
228
|
Viewport: ${viewport.width}x${viewport.height}px. Complete this in as few steps as possible.
|
|
193
|
-
Analyze the screenshot and decide what action to take. If the task is complete or no action is needed, use the "done" action type
|
|
229
|
+
Analyze the screenshot and decide what action to take. If the task is complete or no action is needed, use the "done" action type.`,
|
|
194
230
|
},
|
|
195
231
|
{
|
|
196
232
|
type: "image",
|
|
197
|
-
image: `data:image/png;base64,${screenshot}
|
|
198
|
-
}
|
|
199
|
-
]
|
|
200
|
-
}
|
|
233
|
+
image: `data:image/png;base64,${screenshot}`,
|
|
234
|
+
},
|
|
235
|
+
],
|
|
236
|
+
},
|
|
201
237
|
],
|
|
202
|
-
temperature: 0
|
|
238
|
+
temperature: 0,
|
|
203
239
|
});
|
|
240
|
+
|
|
204
241
|
log.info(`Recovery step ${step}/${maxSteps}`, {
|
|
205
242
|
reasoning: result.reasoning,
|
|
206
|
-
action: result.action
|
|
243
|
+
action: result.action,
|
|
207
244
|
});
|
|
245
|
+
|
|
208
246
|
if (result.action.type === "done") {
|
|
209
247
|
log.info("Recovery agent completed - no more actions needed");
|
|
210
248
|
break;
|
|
211
249
|
}
|
|
250
|
+
|
|
212
251
|
await executeBrowserAction(page, result.action, log);
|
|
213
|
-
await delay(
|
|
252
|
+
await delay(2000);
|
|
253
|
+
|
|
254
|
+
// Take new screenshot for next iteration
|
|
214
255
|
screenshot = (await page.screenshot({ fullPage: false })).toString(
|
|
215
|
-
"base64"
|
|
256
|
+
"base64",
|
|
216
257
|
);
|
|
217
258
|
}
|
|
259
|
+
|
|
218
260
|
log.info("Recovery agent execution completed");
|
|
219
261
|
}
|
|
220
|
-
// Annotate the CommonJS export names for ESM import in node:
|
|
221
|
-
0 && (module.exports = {
|
|
222
|
-
executeRecoveryAgent
|
|
223
|
-
});
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import type { Page } from "playwright";
|
|
2
|
+
import {
|
|
3
|
+
type MinimalLogger,
|
|
4
|
+
defaultLogger,
|
|
5
|
+
} from "../../shared/logger/logger.js";
|
|
6
|
+
import type { LLMClient } from "../../shared/llm/types.js";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Known error type for classifying submission errors.
|
|
11
|
+
* errorPatterns are what the LLM should look for on screen.
|
|
12
|
+
* userMessage is the friendly message returned when matched.
|
|
13
|
+
*/
|
|
14
|
+
export type KnownSubmissionError = {
|
|
15
|
+
id: string;
|
|
16
|
+
errorPatterns: string[];
|
|
17
|
+
userMessage: string;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export type DetectedSubmissionError = {
|
|
21
|
+
matched: true;
|
|
22
|
+
errorId: string;
|
|
23
|
+
message: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const detectSubmissionErrorSchema = z.object({
|
|
27
|
+
hasError: z.boolean().describe("Whether an error is visible on the page"),
|
|
28
|
+
matchedKnownErrorId: z
|
|
29
|
+
.string()
|
|
30
|
+
.nullable()
|
|
31
|
+
.describe("The ID of the matched known error, or null if no match"),
|
|
32
|
+
errorMessage: z
|
|
33
|
+
.string()
|
|
34
|
+
.nullable()
|
|
35
|
+
.describe("The error message visible on screen, or null if no error"),
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Uses screenshot + LLM vision to detect if an error occurred during a submission process.
|
|
40
|
+
* Captures a screenshot via CDP (handles unresponsive pages), sends it to the LLM,
|
|
41
|
+
* and checks against the provided known error patterns.
|
|
42
|
+
*
|
|
43
|
+
* @returns DetectedSubmissionError if a known error is matched
|
|
44
|
+
* @throws The original error if no known error matches
|
|
45
|
+
*/
|
|
46
|
+
export async function detectSubmissionError(
|
|
47
|
+
page: Page,
|
|
48
|
+
error: unknown,
|
|
49
|
+
logContext: string,
|
|
50
|
+
llmClient: LLMClient,
|
|
51
|
+
knownErrors: KnownSubmissionError[] = [],
|
|
52
|
+
logger?: MinimalLogger,
|
|
53
|
+
): Promise<DetectedSubmissionError> {
|
|
54
|
+
const log = logger ?? defaultLogger;
|
|
55
|
+
// Capture screenshot using CDP to handle unresponsive pages
|
|
56
|
+
let screenshot: string;
|
|
57
|
+
let domSnapshot: string | undefined;
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const cdpClient = await page.context().newCDPSession(page);
|
|
61
|
+
await cdpClient.send("Page.enable");
|
|
62
|
+
const { data } = await cdpClient.send("Page.captureScreenshot", {
|
|
63
|
+
format: "png",
|
|
64
|
+
});
|
|
65
|
+
screenshot = data;
|
|
66
|
+
} catch (screenshotError) {
|
|
67
|
+
log.warn(
|
|
68
|
+
"Failed to take screenshot via CDP for error detection, skipping LLM analysis",
|
|
69
|
+
{ screenshotError, originalError: error },
|
|
70
|
+
);
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Capture DOM snapshot for additional context
|
|
75
|
+
try {
|
|
76
|
+
const htmlContent = await page.content();
|
|
77
|
+
domSnapshot =
|
|
78
|
+
htmlContent.length > 50000
|
|
79
|
+
? htmlContent.slice(0, 50000) + "\n... [truncated]"
|
|
80
|
+
: htmlContent;
|
|
81
|
+
} catch (domError) {
|
|
82
|
+
log.warn("Failed to capture DOM snapshot", {
|
|
83
|
+
domError: domError instanceof Error ? domError.message : String(domError),
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const knownErrorsDescription =
|
|
88
|
+
knownErrors.length > 0
|
|
89
|
+
? `\nKnown error patterns to look for:\n${knownErrors.map((e, i) => `${i + 1}. ID: "${e.id}" - Patterns: ${e.errorPatterns.join(", ")}`).join("\n")}\n`
|
|
90
|
+
: "";
|
|
91
|
+
|
|
92
|
+
const prompt = `You are analyzing a screenshot and DOM of a web page to detect if an error occurred during a browser automation process.
|
|
93
|
+
|
|
94
|
+
Context: ${logContext}
|
|
95
|
+
|
|
96
|
+
${knownErrorsDescription}
|
|
97
|
+
|
|
98
|
+
Analyze the screenshot and DOM snapshot to determine:
|
|
99
|
+
1. Is there any error message, warning, or indication of failure visible on the page?
|
|
100
|
+
2. If yes, does it match any of the known error patterns listed above?
|
|
101
|
+
3. What is the exact error message or description of the problem?
|
|
102
|
+
|
|
103
|
+
IMPORTANT:
|
|
104
|
+
- Look carefully for error alerts, warning banners, error modals, red text, or any indication of failure
|
|
105
|
+
- Check the DOM snapshot for error messages that may not be visible in the screenshot
|
|
106
|
+
- If you see a known error pattern, use its exact ID in matchedKnownErrorId
|
|
107
|
+
- If there's an error but it doesn't match any known pattern, set matchedKnownErrorId to null
|
|
108
|
+
- If the page looks normal with no errors, set hasError to false
|
|
109
|
+
|
|
110
|
+
${domSnapshot ? `<dom_snapshot>\n${domSnapshot}\n</dom_snapshot>` : ""}`;
|
|
111
|
+
|
|
112
|
+
const result = await llmClient.generateObjectFromMessages({
|
|
113
|
+
schema: detectSubmissionErrorSchema,
|
|
114
|
+
messages: [
|
|
115
|
+
{
|
|
116
|
+
role: "user",
|
|
117
|
+
content: [
|
|
118
|
+
{ type: "text", text: prompt },
|
|
119
|
+
{ type: "image", image: `data:image/png;base64,${screenshot}` },
|
|
120
|
+
],
|
|
121
|
+
},
|
|
122
|
+
],
|
|
123
|
+
temperature: 0,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
if (!result.hasError) {
|
|
127
|
+
log.info("No error detected by LLM", { result });
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Check if it matches a known error
|
|
131
|
+
if (result.matchedKnownErrorId) {
|
|
132
|
+
const knownError = knownErrors.find(
|
|
133
|
+
(e) => e.id === result.matchedKnownErrorId,
|
|
134
|
+
);
|
|
135
|
+
if (knownError) {
|
|
136
|
+
log.warn(logContext, {
|
|
137
|
+
error,
|
|
138
|
+
browserError: result.errorMessage,
|
|
139
|
+
knownErrorId: result.matchedKnownErrorId,
|
|
140
|
+
});
|
|
141
|
+
return {
|
|
142
|
+
matched: true,
|
|
143
|
+
errorId: knownError.id,
|
|
144
|
+
message: knownError.userMessage,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Log and re-throw for unknown errors
|
|
150
|
+
log.warn(logContext, {
|
|
151
|
+
error,
|
|
152
|
+
browserError: result.errorMessage,
|
|
153
|
+
});
|
|
154
|
+
throw error;
|
|
155
|
+
}
|