libretto 0.6.21 → 0.6.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +5 -1
  2. package/README.template.md +5 -1
  3. package/dist/cli/commands/execution.js +8 -1
  4. package/dist/cli/core/browser.js +8 -3
  5. package/dist/cli/core/daemon/daemon.js +8 -6
  6. package/dist/cli/core/providers/kernel.js +107 -29
  7. package/dist/cli/core/providers/steel.js +10 -1
  8. package/dist/index.d.ts +3 -2
  9. package/dist/index.js +15 -1
  10. package/dist/runtime/recovery/agent.d.ts +50 -2
  11. package/dist/runtime/recovery/agent.js +159 -45
  12. package/dist/runtime/recovery/index.d.ts +2 -1
  13. package/dist/runtime/recovery/index.js +16 -2
  14. package/dist/runtime/recovery/page-fallbacks.d.ts +45 -0
  15. package/dist/runtime/recovery/page-fallbacks.js +389 -0
  16. package/dist/shared/state/index.d.ts +1 -1
  17. package/dist/shared/state/session-state.d.ts +4 -1
  18. package/dist/shared/state/session-state.js +2 -1
  19. package/dist/shared/workflow/workflow.d.ts +19 -6
  20. package/dist/shared/workflow/workflow.js +38 -9
  21. package/docs/reference/runtime/page-fallbacks.mdx +85 -0
  22. package/docs/understand-libretto/error-handling-and-recovery.mdx +45 -0
  23. package/package.json +4 -12
  24. package/skills/libretto/SKILL.md +8 -2
  25. package/skills/libretto/references/code-generation-rules.md +23 -6
  26. package/skills/libretto-readonly/SKILL.md +1 -1
  27. package/src/cli/commands/execution.ts +8 -1
  28. package/src/cli/core/browser.ts +7 -2
  29. package/src/cli/core/daemon/daemon.ts +9 -4
  30. package/src/cli/core/daemon/ipc.ts +1 -0
  31. package/src/cli/core/providers/kernel.ts +153 -29
  32. package/src/cli/core/providers/steel.ts +11 -1
  33. package/src/cli/core/providers/types.ts +3 -0
  34. package/src/index.ts +22 -2
  35. package/src/runtime/recovery/agent.ts +227 -50
  36. package/src/runtime/recovery/index.ts +21 -1
  37. package/src/runtime/recovery/page-fallbacks.ts +527 -0
  38. package/src/shared/state/index.ts +1 -0
  39. package/src/shared/state/session-state.ts +2 -0
  40. package/src/shared/workflow/workflow.ts +90 -20
@@ -0,0 +1,527 @@
1
+ import type { FrameLocator, Locator, Page } from "playwright";
2
+ import type { LanguageModel } from "ai";
3
+ import { executeRecoveryAgent, type RecoveryAgentResult } from "./agent.js";
4
+ import { defaultLogger } from "../../shared/logger/logger.js";
5
+
6
+ export type RecoveryActionTargetType = "page" | "locator";
7
+
8
+ export type RecoveryActionContext = {
9
+ page: Page;
10
+ targetType: RecoveryActionTargetType;
11
+ method: string;
12
+ args: readonly unknown[];
13
+ error: unknown;
14
+ };
15
+
16
+ export type RecoveryActionResult = Record<string, unknown> | void;
17
+
18
+ export type RecoveryActionHandler = (
19
+ context: RecoveryActionContext,
20
+ ) => Promise<RecoveryActionResult>;
21
+
22
+ export type RecoveryAction = RecoveryActionHandler;
23
+
24
+ export type RecoveryActionOptions = {
25
+ recoveryAction: RecoveryAction;
26
+ };
27
+
28
+ type ComputerUseRecoveryModelOptions =
29
+ | {
30
+ languageModel: LanguageModel;
31
+ }
32
+ | {
33
+ provider: "openai";
34
+ apiKey: string;
35
+ model?: "gpt-5.5";
36
+ }
37
+ | {
38
+ provider: "anthropic";
39
+ apiKey: string;
40
+ model?: "claude-sonnet-4-6";
41
+ };
42
+
43
+ export type ComputerUseRecoveryActionOptions = ComputerUseRecoveryModelOptions & {
44
+ instruction: string;
45
+ maxSteps?: number;
46
+ };
47
+
48
+ export type PopupRecoveryActionOptions = ComputerUseRecoveryModelOptions & {
49
+ maxSteps?: number;
50
+ };
51
+
52
+ export const POPUP_RECOVERY_INSTRUCTION = [
53
+ "Look at the page for any popup, modal, cookie banner, overlay, dialog, or interstitial that blocks interaction.",
54
+ "If any blocking popup is visible, close it before returning done.",
55
+ "Prefer obvious close, dismiss, continue, accept, or X buttons.",
56
+ "Do not return done while a blocking overlay or dialog is still visible.",
57
+ ].join(" ");
58
+
59
+ export const COMPUTER_USE_RECOVERY_MODELS = {
60
+ anthropic: "claude-sonnet-4-6",
61
+ openai: "gpt-5.5",
62
+ } as const;
63
+
64
+ const PAGE_UI_METHODS = new Set([
65
+ "click",
66
+ "dblclick",
67
+ "tap",
68
+ "hover",
69
+ "fill",
70
+ "type",
71
+ "press",
72
+ "pressSequentially",
73
+ "check",
74
+ "uncheck",
75
+ "setChecked",
76
+ "selectOption",
77
+ "setInputFiles",
78
+ "selectText",
79
+ "dispatchEvent",
80
+ "focus",
81
+ "blur",
82
+ "dragAndDrop",
83
+ ]);
84
+
85
+ const PAGE_READ_METHODS = new Set([
86
+ "title",
87
+ "content",
88
+ "screenshot",
89
+ "waitForLoadState",
90
+ "waitForRequest",
91
+ "waitForResponse",
92
+ "waitForURL",
93
+ ]);
94
+
95
+ const LOCATOR_UI_METHODS = new Set([
96
+ "click",
97
+ "dblclick",
98
+ "tap",
99
+ "hover",
100
+ "fill",
101
+ "type",
102
+ "press",
103
+ "pressSequentially",
104
+ "check",
105
+ "uncheck",
106
+ "setChecked",
107
+ "selectOption",
108
+ "setInputFiles",
109
+ "selectText",
110
+ "dispatchEvent",
111
+ "focus",
112
+ "blur",
113
+ "clear",
114
+ "dragTo",
115
+ "scrollIntoViewIfNeeded",
116
+ ]);
117
+
118
+ const LOCATOR_READ_METHODS = new Set([
119
+ "textContent",
120
+ "innerText",
121
+ "innerHTML",
122
+ "allTextContents",
123
+ "allInnerTexts",
124
+ "ariaSnapshot",
125
+ "boundingBox",
126
+ "count",
127
+ "getAttribute",
128
+ "inputValue",
129
+ "isChecked",
130
+ "isDisabled",
131
+ "isEditable",
132
+ "isEnabled",
133
+ "isVisible",
134
+ "isHidden",
135
+ "screenshot",
136
+ "waitFor",
137
+ ]);
138
+
139
+ const PAGE_LOCATOR_FACTORY_METHODS = new Set([
140
+ "locator",
141
+ "getByRole",
142
+ "getByText",
143
+ "getByLabel",
144
+ "getByPlaceholder",
145
+ "getByAltText",
146
+ "getByTitle",
147
+ "getByTestId",
148
+ ]);
149
+
150
+ const LOCATOR_FACTORY_METHODS = new Set([
151
+ "locator",
152
+ "getByRole",
153
+ "getByText",
154
+ "getByLabel",
155
+ "getByPlaceholder",
156
+ "getByAltText",
157
+ "getByTitle",
158
+ "getByTestId",
159
+ "filter",
160
+ "and",
161
+ "or",
162
+ "first",
163
+ "last",
164
+ "nth",
165
+ ]);
166
+
167
+ const FRAME_LOCATOR_FACTORY_METHODS = new Set([
168
+ "locator",
169
+ "getByRole",
170
+ "getByText",
171
+ "getByLabel",
172
+ "getByPlaceholder",
173
+ "getByAltText",
174
+ "getByTitle",
175
+ "getByTestId",
176
+ "owner",
177
+ "first",
178
+ "last",
179
+ "nth",
180
+ "frameLocator",
181
+ ]);
182
+
183
+ function isUiMethod(
184
+ targetType: RecoveryActionTargetType,
185
+ method: string,
186
+ ): boolean {
187
+ return targetType === "page"
188
+ ? PAGE_UI_METHODS.has(method)
189
+ : LOCATOR_UI_METHODS.has(method);
190
+ }
191
+
192
+ function isReadMethod(
193
+ targetType: RecoveryActionTargetType,
194
+ method: string,
195
+ ): boolean {
196
+ return targetType === "page"
197
+ ? PAGE_READ_METHODS.has(method)
198
+ : LOCATOR_READ_METHODS.has(method);
199
+ }
200
+
201
+ function isSupportedMethod(
202
+ targetType: RecoveryActionTargetType,
203
+ method: string,
204
+ ): boolean {
205
+ return isUiMethod(targetType, method) || isReadMethod(targetType, method);
206
+ }
207
+
208
+ async function runWithFallback<T>(args: {
209
+ page: Page;
210
+ targetType: RecoveryActionTargetType;
211
+ method: string;
212
+ methodArgs: readonly unknown[];
213
+ invoke: () => T | Promise<T>;
214
+ options: RecoveryActionOptions;
215
+ }): Promise<T> {
216
+ try {
217
+ return await args.invoke();
218
+ } catch (originalError) {
219
+ const baseContext = {
220
+ page: args.page,
221
+ targetType: args.targetType,
222
+ method: args.method,
223
+ args: args.methodArgs,
224
+ } as const;
225
+ if (!isSupportedMethod(baseContext.targetType, baseContext.method)) {
226
+ throw originalError;
227
+ }
228
+
229
+ defaultLogger.info("Action failed, attempting recovery", {
230
+ targetType: baseContext.targetType,
231
+ method: baseContext.method,
232
+ argsCount: baseContext.args.length,
233
+ error: formatErrorForLog(originalError),
234
+ });
235
+
236
+ let recoveryResult: RecoveryActionResult;
237
+ try {
238
+ recoveryResult = await args.options.recoveryAction({
239
+ ...baseContext,
240
+ error: originalError,
241
+ });
242
+ } catch (recoveryError) {
243
+ defaultLogger.warn("Recovery action failed", {
244
+ targetType: baseContext.targetType,
245
+ method: baseContext.method,
246
+ originalError: formatErrorForLog(originalError),
247
+ recoveryError: formatErrorForLog(recoveryError),
248
+ });
249
+ throw new AggregateError(
250
+ [originalError, recoveryError],
251
+ "Recovery action failed after the original action failed.",
252
+ );
253
+ }
254
+
255
+ defaultLogger.info("Recovery action completed, retrying original action", {
256
+ targetType: baseContext.targetType,
257
+ method: baseContext.method,
258
+ recoveryResult,
259
+ });
260
+
261
+ try {
262
+ const result = await args.invoke();
263
+ defaultLogger.info("Recovered action retry succeeded", {
264
+ targetType: baseContext.targetType,
265
+ method: baseContext.method,
266
+ });
267
+ return result;
268
+ } catch (retryError) {
269
+ defaultLogger.warn("Recovered action retry failed", {
270
+ targetType: baseContext.targetType,
271
+ method: baseContext.method,
272
+ originalError: formatErrorForLog(originalError),
273
+ retryError: formatErrorForLog(retryError),
274
+ });
275
+ throw originalError;
276
+ }
277
+ }
278
+ }
279
+
280
+ function formatErrorForLog(error: unknown): Record<string, unknown> {
281
+ if (error instanceof Error) {
282
+ return {
283
+ name: error.name,
284
+ message: error.message,
285
+ stack: error.stack,
286
+ };
287
+ }
288
+ return { value: String(error) };
289
+ }
290
+
291
+ type ProxyCaches = {
292
+ locators: WeakMap<Locator, Locator>;
293
+ frameLocators: WeakMap<FrameLocator, FrameLocator>;
294
+ };
295
+
296
+ function bindOrWrapLocatorMethod(
297
+ locator: Locator,
298
+ rawPage: Page,
299
+ method: string,
300
+ value: unknown,
301
+ options: RecoveryActionOptions,
302
+ caches: ProxyCaches,
303
+ ): unknown {
304
+ if (typeof value !== "function") return value;
305
+
306
+ if (LOCATOR_FACTORY_METHODS.has(method)) {
307
+ return (...args: unknown[]) => {
308
+ const nextLocator = value.apply(locator, args) as Locator;
309
+ return createFallbackLocator(nextLocator, rawPage, options, caches);
310
+ };
311
+ }
312
+
313
+ if (method === "all") {
314
+ return async (...args: unknown[]) => {
315
+ const locators = (await value.apply(locator, args)) as Locator[];
316
+ return locators.map((nextLocator) =>
317
+ createFallbackLocator(nextLocator, rawPage, options, caches),
318
+ );
319
+ };
320
+ }
321
+
322
+ if (method === "contentFrame") {
323
+ return (...args: unknown[]) => {
324
+ const frameLocator = value.apply(locator, args) as FrameLocator;
325
+ return createFallbackFrameLocator(frameLocator, rawPage, options, caches);
326
+ };
327
+ }
328
+
329
+ if (!isSupportedMethod("locator", method)) {
330
+ return value.bind(locator);
331
+ }
332
+
333
+ return (...args: unknown[]) =>
334
+ runWithFallback({
335
+ page: rawPage,
336
+ targetType: "locator",
337
+ method,
338
+ methodArgs: args,
339
+ invoke: () => value.apply(locator, args),
340
+ options,
341
+ });
342
+ }
343
+
344
+ function createFallbackLocator(
345
+ locator: Locator,
346
+ rawPage: Page,
347
+ options: RecoveryActionOptions,
348
+ caches: ProxyCaches,
349
+ ): Locator {
350
+ const cached = caches.locators.get(locator);
351
+ if (cached) return cached;
352
+
353
+ const proxy = new Proxy(locator, {
354
+ get(target, prop, receiver) {
355
+ if (typeof prop !== "string") {
356
+ return Reflect.get(target, prop, receiver);
357
+ }
358
+ return bindOrWrapLocatorMethod(
359
+ target,
360
+ rawPage,
361
+ prop,
362
+ Reflect.get(target, prop, target),
363
+ options,
364
+ caches,
365
+ );
366
+ },
367
+ }) as Locator;
368
+
369
+ caches.locators.set(locator, proxy);
370
+ return proxy;
371
+ }
372
+
373
+ function createFallbackFrameLocator(
374
+ frameLocator: FrameLocator,
375
+ rawPage: Page,
376
+ options: RecoveryActionOptions,
377
+ caches: ProxyCaches,
378
+ ): FrameLocator {
379
+ const cached = caches.frameLocators.get(frameLocator);
380
+ if (cached) return cached;
381
+
382
+ const proxy = new Proxy(frameLocator, {
383
+ get(target, prop, receiver) {
384
+ if (typeof prop !== "string") {
385
+ return Reflect.get(target, prop, receiver);
386
+ }
387
+ const value = Reflect.get(target, prop, target);
388
+ if (typeof value !== "function") return value;
389
+
390
+ if (FRAME_LOCATOR_FACTORY_METHODS.has(prop)) {
391
+ return (...args: unknown[]) => {
392
+ const result = value.apply(target, args);
393
+ if (prop === "first" || prop === "last" || prop === "nth") {
394
+ return createFallbackFrameLocator(
395
+ result as FrameLocator,
396
+ rawPage,
397
+ options,
398
+ caches,
399
+ );
400
+ }
401
+ if (prop === "frameLocator") {
402
+ return createFallbackFrameLocator(
403
+ result as FrameLocator,
404
+ rawPage,
405
+ options,
406
+ caches,
407
+ );
408
+ }
409
+ return createFallbackLocator(
410
+ result as Locator,
411
+ rawPage,
412
+ options,
413
+ caches,
414
+ );
415
+ };
416
+ }
417
+
418
+ return value.bind(target);
419
+ },
420
+ }) as FrameLocator;
421
+
422
+ caches.frameLocators.set(frameLocator, proxy);
423
+ return proxy;
424
+ }
425
+
426
+ export function createRecoveryPage(
427
+ page: Page,
428
+ options: RecoveryActionOptions,
429
+ ): Page {
430
+ const caches: ProxyCaches = {
431
+ locators: new WeakMap(),
432
+ frameLocators: new WeakMap(),
433
+ };
434
+
435
+ return new Proxy(page, {
436
+ get(target, prop, receiver) {
437
+ if (typeof prop !== "string") {
438
+ return Reflect.get(target, prop, receiver);
439
+ }
440
+
441
+ const value = Reflect.get(target, prop, target);
442
+ if (typeof value !== "function") return value;
443
+
444
+ if (PAGE_LOCATOR_FACTORY_METHODS.has(prop)) {
445
+ return (...args: unknown[]) => {
446
+ const locator = value.apply(target, args) as Locator;
447
+ return createFallbackLocator(locator, page, options, caches);
448
+ };
449
+ }
450
+
451
+ if (prop === "frameLocator") {
452
+ return (...args: unknown[]) => {
453
+ const frameLocator = value.apply(target, args) as FrameLocator;
454
+ return createFallbackFrameLocator(frameLocator, page, options, caches);
455
+ };
456
+ }
457
+
458
+ if (!isSupportedMethod("page", prop)) {
459
+ return value.bind(target);
460
+ }
461
+
462
+ return (...args: unknown[]) =>
463
+ runWithFallback({
464
+ page,
465
+ targetType: "page",
466
+ method: prop,
467
+ methodArgs: args,
468
+ invoke: () => value.apply(target, args),
469
+ options,
470
+ });
471
+ },
472
+ }) as Page;
473
+ }
474
+
475
+ async function resolveComputerUseRecoveryModel(
476
+ options: ComputerUseRecoveryActionOptions,
477
+ ): Promise<LanguageModel> {
478
+ if ("provider" in options) {
479
+ if (options.provider === "openai") {
480
+ const model: string = options.model ?? COMPUTER_USE_RECOVERY_MODELS.openai;
481
+ if (model !== COMPUTER_USE_RECOVERY_MODELS.openai) {
482
+ throw new Error(
483
+ `Unsupported OpenAI computer use recovery model "${model}". Supported model: ${COMPUTER_USE_RECOVERY_MODELS.openai}.`,
484
+ );
485
+ }
486
+ return import("@ai-sdk/openai").then(({ createOpenAI }) =>
487
+ createOpenAI({ apiKey: options.apiKey })(model),
488
+ );
489
+ }
490
+ const model: string =
491
+ options.model ?? COMPUTER_USE_RECOVERY_MODELS.anthropic;
492
+ if (model !== COMPUTER_USE_RECOVERY_MODELS.anthropic) {
493
+ throw new Error(
494
+ `Unsupported Anthropic computer use recovery model "${model}". Supported model: ${COMPUTER_USE_RECOVERY_MODELS.anthropic}.`,
495
+ );
496
+ }
497
+ return import("@ai-sdk/anthropic").then(({ createAnthropic }) =>
498
+ createAnthropic({ apiKey: options.apiKey })(model),
499
+ );
500
+ }
501
+
502
+ return options.languageModel;
503
+ }
504
+
505
+ export function computerUseRecoveryAction(
506
+ options: ComputerUseRecoveryActionOptions,
507
+ ): RecoveryAction {
508
+ return async ({ page }): Promise<RecoveryAgentResult> => {
509
+ const model = await resolveComputerUseRecoveryModel(options);
510
+ return executeRecoveryAgent(
511
+ page,
512
+ options.instruction,
513
+ undefined,
514
+ model,
515
+ options.maxSteps,
516
+ );
517
+ };
518
+ }
519
+
520
+ export function popupRecoveryAction(
521
+ options: PopupRecoveryActionOptions,
522
+ ): RecoveryAction {
523
+ return computerUseRecoveryAction({
524
+ ...options,
525
+ instruction: POPUP_RECOVERY_INSTRUCTION,
526
+ });
527
+ }
@@ -7,6 +7,7 @@ export {
7
7
  parseSessionStateContent,
8
8
  serializeSessionState,
9
9
  type SessionAccessMode,
10
+ type ProviderState,
10
11
  type SessionStatus,
11
12
  type SessionState,
12
13
  type SessionStateFile,
@@ -19,6 +19,7 @@ export const SessionViewportSchema = z.object({
19
19
  export const ProviderStateSchema = z.object({
20
20
  name: z.string(),
21
21
  sessionId: z.string(),
22
+ recordingUrl: z.string().url().optional(),
22
23
  });
23
24
 
24
25
  export const SessionStateFileSchema = z.object({
@@ -38,6 +39,7 @@ export const SessionStateFileSchema = z.object({
38
39
 
39
40
  export type SessionStatus = z.infer<typeof SessionStatusSchema>;
40
41
  export type SessionAccessMode = z.infer<typeof SessionAccessModeSchema>;
42
+ export type ProviderState = z.infer<typeof ProviderStateSchema>;
41
43
  export type SessionStateFile = z.infer<typeof SessionStateFileSchema>;
42
44
  export type SessionState = Omit<SessionStateFile, "version">;
43
45