libretto 0.6.21 → 0.6.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/README.template.md +5 -1
- package/dist/cli/commands/execution.js +8 -1
- package/dist/cli/core/browser.js +8 -3
- package/dist/cli/core/daemon/daemon.js +8 -6
- package/dist/cli/core/providers/kernel.js +107 -29
- package/dist/cli/core/providers/steel.js +10 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.js +15 -1
- package/dist/runtime/recovery/agent.d.ts +50 -2
- package/dist/runtime/recovery/agent.js +159 -45
- package/dist/runtime/recovery/index.d.ts +2 -1
- package/dist/runtime/recovery/index.js +16 -2
- package/dist/runtime/recovery/page-fallbacks.d.ts +45 -0
- package/dist/runtime/recovery/page-fallbacks.js +389 -0
- package/dist/shared/state/index.d.ts +1 -1
- package/dist/shared/state/session-state.d.ts +4 -1
- package/dist/shared/state/session-state.js +2 -1
- package/dist/shared/workflow/workflow.d.ts +19 -6
- package/dist/shared/workflow/workflow.js +38 -9
- package/docs/reference/runtime/page-fallbacks.mdx +85 -0
- package/docs/understand-libretto/error-handling-and-recovery.mdx +45 -0
- package/package.json +4 -12
- package/skills/libretto/SKILL.md +8 -2
- package/skills/libretto/references/code-generation-rules.md +23 -6
- package/skills/libretto-readonly/SKILL.md +1 -1
- package/src/cli/commands/execution.ts +8 -1
- package/src/cli/core/browser.ts +7 -2
- package/src/cli/core/daemon/daemon.ts +9 -4
- package/src/cli/core/daemon/ipc.ts +1 -0
- package/src/cli/core/providers/kernel.ts +153 -29
- package/src/cli/core/providers/steel.ts +11 -1
- package/src/cli/core/providers/types.ts +3 -0
- package/src/index.ts +22 -2
- package/src/runtime/recovery/agent.ts +227 -50
- package/src/runtime/recovery/index.ts +21 -1
- package/src/runtime/recovery/page-fallbacks.ts +527 -0
- package/src/shared/state/index.ts +1 -0
- package/src/shared/state/session-state.ts +2 -0
- package/src/shared/workflow/workflow.ts +90 -20
|
@@ -33,6 +33,83 @@ const KEY_MAPPINGS = {
|
|
|
33
33
|
function mapKeyName(key) {
|
|
34
34
|
return KEY_MAPPINGS[key.toUpperCase()] ?? key;
|
|
35
35
|
}
|
|
36
|
+
function clamp(value, min, max) {
|
|
37
|
+
return Math.min(Math.max(value, min), max);
|
|
38
|
+
}
|
|
39
|
+
function scalePoint(x, y, scale) {
|
|
40
|
+
return {
|
|
41
|
+
x: clamp(x * scale.scaleX, 0, Math.max(scale.viewportWidth - 1, 0)),
|
|
42
|
+
y: clamp(y * scale.scaleY, 0, Math.max(scale.viewportHeight - 1, 0))
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function scaleBrowserAction(action, scale) {
|
|
46
|
+
switch (action.type) {
|
|
47
|
+
case "click": {
|
|
48
|
+
const point = scalePoint(action.x, action.y, scale);
|
|
49
|
+
return { ...action, ...point };
|
|
50
|
+
}
|
|
51
|
+
case "double_click": {
|
|
52
|
+
const point = scalePoint(action.x, action.y, scale);
|
|
53
|
+
return { ...action, ...point };
|
|
54
|
+
}
|
|
55
|
+
case "scroll": {
|
|
56
|
+
const point = scalePoint(action.x, action.y, scale);
|
|
57
|
+
return {
|
|
58
|
+
...action,
|
|
59
|
+
...point,
|
|
60
|
+
scroll_x: action.scroll_x * scale.scaleX,
|
|
61
|
+
scroll_y: action.scroll_y * scale.scaleY
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
case "drag":
|
|
65
|
+
return {
|
|
66
|
+
...action,
|
|
67
|
+
path: action.path.map((point) => scalePoint(point.x, point.y, scale))
|
|
68
|
+
};
|
|
69
|
+
case "move": {
|
|
70
|
+
const point = scalePoint(action.x, action.y, scale);
|
|
71
|
+
return { ...action, ...point };
|
|
72
|
+
}
|
|
73
|
+
case "keypress":
|
|
74
|
+
case "type":
|
|
75
|
+
case "wait":
|
|
76
|
+
case "screenshot":
|
|
77
|
+
case "done":
|
|
78
|
+
return action;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
function readPngDimensions(buffer) {
|
|
82
|
+
const pngSignature = "89504e470d0a1a0a";
|
|
83
|
+
if (buffer.subarray(0, 8).toString("hex") !== pngSignature) {
|
|
84
|
+
throw new Error("Recovery screenshot is not a PNG image.");
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
width: buffer.readUInt32BE(16),
|
|
88
|
+
height: buffer.readUInt32BE(20)
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
async function takeViewportScreenshot(page) {
|
|
92
|
+
const viewport = page.viewportSize();
|
|
93
|
+
if (!viewport) {
|
|
94
|
+
throw new Error("Viewport size not found");
|
|
95
|
+
}
|
|
96
|
+
const screenshot = await page.screenshot({
|
|
97
|
+
fullPage: false,
|
|
98
|
+
scale: "css",
|
|
99
|
+
timeout: 1e4
|
|
100
|
+
});
|
|
101
|
+
const dimensions = readPngDimensions(screenshot);
|
|
102
|
+
return {
|
|
103
|
+
screenshot,
|
|
104
|
+
dimensions,
|
|
105
|
+
scale: {
|
|
106
|
+
scaleX: viewport.width / dimensions.width,
|
|
107
|
+
scaleY: viewport.height / dimensions.height,
|
|
108
|
+
viewportWidth: viewport.width,
|
|
109
|
+
viewportHeight: viewport.height
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
}
|
|
36
113
|
async function executeBrowserAction(page, action, logger = defaultLogger) {
|
|
37
114
|
switch (action.type) {
|
|
38
115
|
case "click": {
|
|
@@ -109,56 +186,79 @@ async function executeBrowserAction(page, action, logger = defaultLogger) {
|
|
|
109
186
|
import { z } from "zod";
|
|
110
187
|
const recoveryActionSchema = z.object({
|
|
111
188
|
reasoning: z.string().describe("Your reasoning about what you see and what action to take"),
|
|
112
|
-
action: z.
|
|
113
|
-
z.
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
z.
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
}),
|
|
122
|
-
z.object({
|
|
123
|
-
type: z.literal("keypress"),
|
|
124
|
-
keys: z.array(z.string())
|
|
125
|
-
}),
|
|
126
|
-
z.object({
|
|
127
|
-
type: z.literal("scroll"),
|
|
128
|
-
x: z.number(),
|
|
129
|
-
y: z.number(),
|
|
130
|
-
scroll_x: z.number(),
|
|
131
|
-
scroll_y: z.number()
|
|
132
|
-
}),
|
|
133
|
-
z.object({
|
|
134
|
-
type: z.literal("wait")
|
|
135
|
-
}),
|
|
136
|
-
z.object({
|
|
137
|
-
type: z.literal("done")
|
|
138
|
-
})
|
|
139
|
-
])
|
|
189
|
+
action: z.object({
|
|
190
|
+
type: z.enum(["click", "type", "keypress", "scroll", "wait", "done"]).describe("The browser action to execute."),
|
|
191
|
+
x: z.number().nullable().describe("The screenshot pixel x coordinate for click/scroll."),
|
|
192
|
+
y: z.number().nullable().describe("The screenshot pixel y coordinate for click/scroll."),
|
|
193
|
+
text: z.string().nullable().describe("Text for type actions."),
|
|
194
|
+
keys: z.array(z.string()).nullable().describe("Keys for keypress actions."),
|
|
195
|
+
scroll_x: z.number().nullable().describe("Horizontal scroll delta."),
|
|
196
|
+
scroll_y: z.number().nullable().describe("Vertical scroll delta.")
|
|
197
|
+
})
|
|
140
198
|
});
|
|
141
|
-
|
|
199
|
+
function numberOrThrow(value, field) {
|
|
200
|
+
if (typeof value === "number") return value;
|
|
201
|
+
throw new Error(`Recovery action is missing ${field}.`);
|
|
202
|
+
}
|
|
203
|
+
function normalizeRecoveryAction(action) {
|
|
204
|
+
switch (action.type) {
|
|
205
|
+
case "click":
|
|
206
|
+
return {
|
|
207
|
+
type: "click",
|
|
208
|
+
x: numberOrThrow(action.x, "x"),
|
|
209
|
+
y: numberOrThrow(action.y, "y")
|
|
210
|
+
};
|
|
211
|
+
case "type":
|
|
212
|
+
return { type: "type", text: action.text ?? "" };
|
|
213
|
+
case "keypress":
|
|
214
|
+
return { type: "keypress", keys: action.keys ?? [] };
|
|
215
|
+
case "scroll":
|
|
216
|
+
return {
|
|
217
|
+
type: "scroll",
|
|
218
|
+
x: numberOrThrow(action.x, "x"),
|
|
219
|
+
y: numberOrThrow(action.y, "y"),
|
|
220
|
+
scroll_x: numberOrThrow(action.scroll_x, "scroll_x"),
|
|
221
|
+
scroll_y: numberOrThrow(action.scroll_y, "scroll_y")
|
|
222
|
+
};
|
|
223
|
+
case "wait":
|
|
224
|
+
return { type: "wait" };
|
|
225
|
+
case "done":
|
|
226
|
+
return { type: "done" };
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
function getRecoveryStatus(steps) {
|
|
230
|
+
if (steps.length === 0) {
|
|
231
|
+
return "skipped";
|
|
232
|
+
}
|
|
233
|
+
const actionSteps = steps.filter((step) => step.action.type !== "done");
|
|
234
|
+
const completed = steps.at(-1)?.action.type === "done";
|
|
235
|
+
if (actionSteps.length === 0 && completed) {
|
|
236
|
+
return "no-action-needed";
|
|
237
|
+
}
|
|
238
|
+
if (completed) {
|
|
239
|
+
return "action-taken";
|
|
240
|
+
}
|
|
241
|
+
return "incomplete";
|
|
242
|
+
}
|
|
243
|
+
const DEFAULT_RECOVERY_MAX_STEPS = 3;
|
|
244
|
+
async function executeRecoveryAgent(page, instruction, logger, model, maxSteps = DEFAULT_RECOVERY_MAX_STEPS) {
|
|
142
245
|
if (!model) {
|
|
143
|
-
return;
|
|
246
|
+
return { status: "skipped", steps: [] };
|
|
144
247
|
}
|
|
145
248
|
const log = logger ?? defaultLogger;
|
|
146
249
|
log.info("Executing vision-based recovery agent", { instruction });
|
|
147
|
-
|
|
148
|
-
if (!viewport) {
|
|
149
|
-
throw new Error("Viewport size not found");
|
|
150
|
-
}
|
|
151
|
-
let screenshot;
|
|
250
|
+
let screenshotState;
|
|
152
251
|
try {
|
|
153
|
-
|
|
252
|
+
screenshotState = await takeViewportScreenshot(page);
|
|
154
253
|
} catch (screenshotError) {
|
|
155
254
|
log.warn("Failed to take screenshot for recovery agent, skipping", {
|
|
156
255
|
screenshotError: screenshotError instanceof Error ? screenshotError.message : String(screenshotError)
|
|
157
256
|
});
|
|
158
257
|
throw new Error("Failed to take screenshot for recovery agent");
|
|
159
258
|
}
|
|
160
|
-
const
|
|
259
|
+
const steps = [];
|
|
161
260
|
for (let step = 1; step <= maxSteps; step++) {
|
|
261
|
+
const { screenshot, dimensions, scale } = screenshotState;
|
|
162
262
|
const { object: result } = await generateObject({
|
|
163
263
|
model,
|
|
164
264
|
schema: recoveryActionSchema,
|
|
@@ -172,33 +272,47 @@ async function executeRecoveryAgent(page, instruction, logger, model) {
|
|
|
172
272
|
|
|
173
273
|
Your task: ${instruction}
|
|
174
274
|
|
|
175
|
-
|
|
275
|
+
Screenshot: ${dimensions.width}x${dimensions.height}px. Coordinates must be screenshot pixel coordinates relative to the top-left corner of the screenshot. Complete this in as few steps as possible.
|
|
176
276
|
Analyze the screenshot and decide what action to take. If the task is complete or no action is needed, use the "done" action type.`
|
|
177
277
|
},
|
|
178
278
|
{
|
|
179
279
|
type: "image",
|
|
180
|
-
image:
|
|
280
|
+
image: screenshot
|
|
181
281
|
}
|
|
182
282
|
]
|
|
183
283
|
}
|
|
184
284
|
],
|
|
185
285
|
temperature: 0
|
|
186
286
|
});
|
|
287
|
+
const imageAction = normalizeRecoveryAction(result.action);
|
|
288
|
+
const action = scaleBrowserAction(imageAction, scale);
|
|
187
289
|
log.info(`Recovery step ${step}/${maxSteps}`, {
|
|
188
290
|
reasoning: result.reasoning,
|
|
189
|
-
|
|
291
|
+
imageAction,
|
|
292
|
+
action,
|
|
293
|
+
screenshot: dimensions,
|
|
294
|
+
scale
|
|
295
|
+
});
|
|
296
|
+
steps.push({
|
|
297
|
+
step,
|
|
298
|
+
reasoning: result.reasoning,
|
|
299
|
+
action
|
|
190
300
|
});
|
|
191
|
-
if (
|
|
301
|
+
if (action.type === "done") {
|
|
192
302
|
log.info("Recovery agent completed - no more actions needed");
|
|
193
303
|
break;
|
|
194
304
|
}
|
|
195
|
-
await executeBrowserAction(page,
|
|
305
|
+
await executeBrowserAction(page, action, log);
|
|
196
306
|
await delay(2e3);
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
307
|
+
if (step < maxSteps) {
|
|
308
|
+
screenshotState = await takeViewportScreenshot(page);
|
|
309
|
+
}
|
|
200
310
|
}
|
|
201
311
|
log.info("Recovery agent execution completed");
|
|
312
|
+
return {
|
|
313
|
+
status: getRecoveryStatus(steps),
|
|
314
|
+
steps
|
|
315
|
+
};
|
|
202
316
|
}
|
|
203
317
|
export {
|
|
204
318
|
executeRecoveryAgent
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export { executeRecoveryAgent } from './agent.js';
|
|
1
|
+
export { BrowserAction, RecoveryAgentResult, RecoveryAgentStep, executeRecoveryAgent } from './agent.js';
|
|
2
2
|
export { attemptWithRecovery } from './recovery.js';
|
|
3
3
|
export { DetectedSubmissionError, KnownSubmissionError, detectSubmissionError } from './errors.js';
|
|
4
|
+
export { COMPUTER_USE_RECOVERY_MODELS, ComputerUseRecoveryActionOptions, POPUP_RECOVERY_INSTRUCTION, PopupRecoveryActionOptions, RecoveryAction, RecoveryActionContext, RecoveryActionHandler, RecoveryActionOptions, RecoveryActionResult, RecoveryActionTargetType, computerUseRecoveryAction, createRecoveryPage, popupRecoveryAction } from './page-fallbacks.js';
|
|
4
5
|
import 'playwright';
|
|
5
6
|
import '../../shared/logger/logger.js';
|
|
6
7
|
import 'ai';
|
|
@@ -1,10 +1,24 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
executeRecoveryAgent
|
|
3
|
+
} from "./agent.js";
|
|
2
4
|
import { attemptWithRecovery } from "./recovery.js";
|
|
3
5
|
import {
|
|
4
6
|
detectSubmissionError
|
|
5
7
|
} from "./errors.js";
|
|
8
|
+
import {
|
|
9
|
+
COMPUTER_USE_RECOVERY_MODELS,
|
|
10
|
+
POPUP_RECOVERY_INSTRUCTION,
|
|
11
|
+
computerUseRecoveryAction,
|
|
12
|
+
createRecoveryPage,
|
|
13
|
+
popupRecoveryAction
|
|
14
|
+
} from "./page-fallbacks.js";
|
|
6
15
|
export {
|
|
16
|
+
COMPUTER_USE_RECOVERY_MODELS,
|
|
17
|
+
POPUP_RECOVERY_INSTRUCTION,
|
|
7
18
|
attemptWithRecovery,
|
|
19
|
+
computerUseRecoveryAction,
|
|
20
|
+
createRecoveryPage,
|
|
8
21
|
detectSubmissionError,
|
|
9
|
-
executeRecoveryAgent
|
|
22
|
+
executeRecoveryAgent,
|
|
23
|
+
popupRecoveryAction
|
|
10
24
|
};
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { Page } from 'playwright';
|
|
2
|
+
import { LanguageModel } from 'ai';
|
|
3
|
+
|
|
4
|
+
type RecoveryActionTargetType = "page" | "locator";
|
|
5
|
+
type RecoveryActionContext = {
|
|
6
|
+
page: Page;
|
|
7
|
+
targetType: RecoveryActionTargetType;
|
|
8
|
+
method: string;
|
|
9
|
+
args: readonly unknown[];
|
|
10
|
+
error: unknown;
|
|
11
|
+
};
|
|
12
|
+
type RecoveryActionResult = Record<string, unknown> | void;
|
|
13
|
+
type RecoveryActionHandler = (context: RecoveryActionContext) => Promise<RecoveryActionResult>;
|
|
14
|
+
type RecoveryAction = RecoveryActionHandler;
|
|
15
|
+
type RecoveryActionOptions = {
|
|
16
|
+
recoveryAction: RecoveryAction;
|
|
17
|
+
};
|
|
18
|
+
type ComputerUseRecoveryModelOptions = {
|
|
19
|
+
languageModel: LanguageModel;
|
|
20
|
+
} | {
|
|
21
|
+
provider: "openai";
|
|
22
|
+
apiKey: string;
|
|
23
|
+
model?: "gpt-5.5";
|
|
24
|
+
} | {
|
|
25
|
+
provider: "anthropic";
|
|
26
|
+
apiKey: string;
|
|
27
|
+
model?: "claude-sonnet-4-6";
|
|
28
|
+
};
|
|
29
|
+
type ComputerUseRecoveryActionOptions = ComputerUseRecoveryModelOptions & {
|
|
30
|
+
instruction: string;
|
|
31
|
+
maxSteps?: number;
|
|
32
|
+
};
|
|
33
|
+
type PopupRecoveryActionOptions = ComputerUseRecoveryModelOptions & {
|
|
34
|
+
maxSteps?: number;
|
|
35
|
+
};
|
|
36
|
+
declare const POPUP_RECOVERY_INSTRUCTION: string;
|
|
37
|
+
declare const COMPUTER_USE_RECOVERY_MODELS: {
|
|
38
|
+
readonly anthropic: "claude-sonnet-4-6";
|
|
39
|
+
readonly openai: "gpt-5.5";
|
|
40
|
+
};
|
|
41
|
+
declare function createRecoveryPage(page: Page, options: RecoveryActionOptions): Page;
|
|
42
|
+
declare function computerUseRecoveryAction(options: ComputerUseRecoveryActionOptions): RecoveryAction;
|
|
43
|
+
declare function popupRecoveryAction(options: PopupRecoveryActionOptions): RecoveryAction;
|
|
44
|
+
|
|
45
|
+
export { COMPUTER_USE_RECOVERY_MODELS, type ComputerUseRecoveryActionOptions, POPUP_RECOVERY_INSTRUCTION, type PopupRecoveryActionOptions, type RecoveryAction, type RecoveryActionContext, type RecoveryActionHandler, type RecoveryActionOptions, type RecoveryActionResult, type RecoveryActionTargetType, computerUseRecoveryAction, createRecoveryPage, popupRecoveryAction };
|