libretto 0.6.22 → 0.6.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,16 +88,23 @@ function readPngDimensions(buffer) {
88
88
  height: buffer.readUInt32BE(20)
89
89
  };
90
90
  }
91
- async function takeViewportScreenshot(page) {
92
- const viewport = page.viewportSize();
93
- if (!viewport) {
94
- throw new Error("Viewport size not found");
95
- }
96
- const screenshot = await page.screenshot({
97
- fullPage: false,
98
- scale: "css",
99
- timeout: 1e4
100
- });
91
+ function toPositiveNumber(value) {
92
+ return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : void 0;
93
+ }
94
+ async function getViewportFromPage(page) {
95
+ const metrics = await page.evaluate(() => ({
96
+ visualViewportWidth: window.visualViewport?.width,
97
+ visualViewportHeight: window.visualViewport?.height,
98
+ innerWidth: window.innerWidth,
99
+ innerHeight: window.innerHeight,
100
+ documentElementClientWidth: document.documentElement?.clientWidth,
101
+ documentElementClientHeight: document.documentElement?.clientHeight
102
+ }));
103
+ const width = toPositiveNumber(metrics.visualViewportWidth) ?? toPositiveNumber(metrics.innerWidth) ?? toPositiveNumber(metrics.documentElementClientWidth);
104
+ const height = toPositiveNumber(metrics.visualViewportHeight) ?? toPositiveNumber(metrics.innerHeight) ?? toPositiveNumber(metrics.documentElementClientHeight);
105
+ return width && height ? { width, height } : null;
106
+ }
107
+ function screenshotState(screenshot, viewport) {
101
108
  const dimensions = readPngDimensions(screenshot);
102
109
  return {
103
110
  screenshot,
@@ -110,6 +117,18 @@ async function takeViewportScreenshot(page) {
110
117
  }
111
118
  };
112
119
  }
120
+ async function takeViewportScreenshot(page) {
121
+ const viewport = page.viewportSize() ?? await getViewportFromPage(page).catch(() => null);
122
+ if (!viewport) {
123
+ throw new Error("Viewport size not found");
124
+ }
125
+ const screenshot = await page.screenshot({
126
+ fullPage: false,
127
+ scale: "css",
128
+ timeout: 1e4
129
+ });
130
+ return screenshotState(screenshot, viewport);
131
+ }
113
132
  async function executeBrowserAction(page, action, logger = defaultLogger) {
114
133
  switch (action.type) {
115
134
  case "click": {
@@ -247,9 +266,9 @@ async function executeRecoveryAgent(page, instruction, logger, model, maxSteps =
247
266
  }
248
267
  const log = logger ?? defaultLogger;
249
268
  log.info("Executing vision-based recovery agent", { instruction });
250
- let screenshotState;
269
+ let screenshotState2;
251
270
  try {
252
- screenshotState = await takeViewportScreenshot(page);
271
+ screenshotState2 = await takeViewportScreenshot(page);
253
272
  } catch (screenshotError) {
254
273
  log.warn("Failed to take screenshot for recovery agent, skipping", {
255
274
  screenshotError: screenshotError instanceof Error ? screenshotError.message : String(screenshotError)
@@ -258,7 +277,7 @@ async function executeRecoveryAgent(page, instruction, logger, model, maxSteps =
258
277
  }
259
278
  const steps = [];
260
279
  for (let step = 1; step <= maxSteps; step++) {
261
- const { screenshot, dimensions, scale } = screenshotState;
280
+ const { screenshot, dimensions, scale } = screenshotState2;
262
281
  const { object: result } = await generateObject({
263
282
  model,
264
283
  schema: recoveryActionSchema,
@@ -305,7 +324,7 @@ Analyze the screenshot and decide what action to take. If the task is complete o
305
324
  await executeBrowserAction(page, action, log);
306
325
  await delay(2e3);
307
326
  if (step < maxSteps) {
308
- screenshotState = await takeViewportScreenshot(page);
327
+ screenshotState2 = await takeViewportScreenshot(page);
309
328
  }
310
329
  }
311
330
  log.info("Recovery agent execution completed");
@@ -1,4 +1,5 @@
1
1
  import { executeRecoveryAgent } from "./agent.js";
2
+ import { defaultLogger } from "../../shared/logger/logger.js";
2
3
  const POPUP_RECOVERY_INSTRUCTION = [
3
4
  "Look at the page for any popup, modal, cookie banner, overlay, dialog, or interstitial that blocks interaction.",
4
5
  "If any blocking popup is visible, close it before returning done.",
@@ -143,17 +144,63 @@ async function runWithFallback(args) {
143
144
  if (!isSupportedMethod(baseContext.targetType, baseContext.method)) {
144
145
  throw originalError;
145
146
  }
147
+ defaultLogger.info("Action failed, attempting recovery", {
148
+ targetType: baseContext.targetType,
149
+ method: baseContext.method,
150
+ argsCount: baseContext.args.length,
151
+ error: formatErrorForLog(originalError)
152
+ });
153
+ let recoveryResult;
146
154
  try {
147
- await args.options.recoveryAction({
155
+ recoveryResult = await args.options.recoveryAction({
148
156
  ...baseContext,
149
157
  error: originalError
150
158
  });
151
- return await args.invoke();
152
- } catch {
159
+ } catch (recoveryError) {
160
+ defaultLogger.warn("Recovery action failed", {
161
+ targetType: baseContext.targetType,
162
+ method: baseContext.method,
163
+ originalError: formatErrorForLog(originalError),
164
+ recoveryError: formatErrorForLog(recoveryError)
165
+ });
166
+ throw new AggregateError(
167
+ [originalError, recoveryError],
168
+ "Recovery action failed after the original action failed."
169
+ );
170
+ }
171
+ defaultLogger.info("Recovery action completed, retrying original action", {
172
+ targetType: baseContext.targetType,
173
+ method: baseContext.method,
174
+ recoveryResult
175
+ });
176
+ try {
177
+ const result = await args.invoke();
178
+ defaultLogger.info("Recovered action retry succeeded", {
179
+ targetType: baseContext.targetType,
180
+ method: baseContext.method
181
+ });
182
+ return result;
183
+ } catch (retryError) {
184
+ defaultLogger.warn("Recovered action retry failed", {
185
+ targetType: baseContext.targetType,
186
+ method: baseContext.method,
187
+ originalError: formatErrorForLog(originalError),
188
+ retryError: formatErrorForLog(retryError)
189
+ });
153
190
  throw originalError;
154
191
  }
155
192
  }
156
193
  }
194
+ function formatErrorForLog(error) {
195
+ if (error instanceof Error) {
196
+ return {
197
+ name: error.name,
198
+ message: error.message,
199
+ stack: error.stack
200
+ };
201
+ }
202
+ return { value: String(error) };
203
+ }
157
204
  function bindOrWrapLocatorMethod(locator, rawPage, method, value, options, caches) {
158
205
  if (typeof value !== "function") return value;
159
206
  if (LOCATOR_FACTORY_METHODS.has(method)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "libretto",
3
- "version": "0.6.22",
3
+ "version": "0.6.24",
4
4
  "description": "AI-powered browser automation library and CLI built on Playwright",
5
5
  "license": "MIT",
6
6
  "homepage": "https://libretto.sh",
@@ -31,30 +31,20 @@
31
31
  }
32
32
  },
33
33
  "peerDependencies": {
34
- "@ai-sdk/anthropic": "^3.0.58",
35
34
  "@ai-sdk/google": "^3.0.51",
36
- "@ai-sdk/google-vertex": "^4.0.80",
37
- "@ai-sdk/openai": "^3.0.41"
35
+ "@ai-sdk/google-vertex": "^4.0.80"
38
36
  },
39
37
  "peerDependenciesMeta": {
40
- "@ai-sdk/anthropic": {
41
- "optional": true
42
- },
43
38
  "@ai-sdk/google": {
44
39
  "optional": true
45
40
  },
46
41
  "@ai-sdk/google-vertex": {
47
42
  "optional": true
48
- },
49
- "@ai-sdk/openai": {
50
- "optional": true
51
43
  }
52
44
  },
53
45
  "devDependencies": {
54
- "@ai-sdk/anthropic": "^3.0.58",
55
46
  "@ai-sdk/google": "^3.0.51",
56
47
  "@ai-sdk/google-vertex": "^4.0.80",
57
- "@ai-sdk/openai": "^3.0.41",
58
48
  "@anthropic-ai/claude-agent-sdk": "^0.2.75",
59
49
  "@mariozechner/pi-agent-core": "^0.62.0",
60
50
  "@mariozechner/pi-ai": "^0.62.0",
@@ -70,6 +60,8 @@
70
60
  "vitest": "^4.1.5"
71
61
  },
72
62
  "dependencies": {
63
+ "@ai-sdk/anthropic": "^3.0.66",
64
+ "@ai-sdk/openai": "^3.0.66",
73
65
  "ai": "^6.0.116",
74
66
  "esbuild": "^0.27.0",
75
67
  "playwright": "^1.58.2",
@@ -4,7 +4,7 @@ description: "Browser automation CLI for building, maintaining, and running brow
4
4
  license: MIT
5
5
  metadata:
6
6
  author: saffron-health
7
- version: "0.6.22"
7
+ version: "0.6.24"
8
8
  ---
9
9
 
10
10
  ## How Libretto Works
@@ -4,7 +4,7 @@ description: "Read-only Libretto workflow for diagnosing live browser state with
4
4
  license: MIT
5
5
  metadata:
6
6
  author: saffron-health
7
- version: "0.6.22"
7
+ version: "0.6.24"
8
8
  ---
9
9
 
10
10
  ## How Libretto Read-Only Works
@@ -153,21 +153,41 @@ function readPngDimensions(buffer: Buffer): ImageDimensions {
153
153
  };
154
154
  }
155
155
 
156
- async function takeViewportScreenshot(page: Page): Promise<{
156
+ function toPositiveNumber(value: unknown): number | undefined {
157
+ return typeof value === "number" && Number.isFinite(value) && value > 0
158
+ ? value
159
+ : undefined;
160
+ }
161
+
162
+ async function getViewportFromPage(page: Page): Promise<ImageDimensions | null> {
163
+ const metrics = await page.evaluate(() => ({
164
+ visualViewportWidth: window.visualViewport?.width,
165
+ visualViewportHeight: window.visualViewport?.height,
166
+ innerWidth: window.innerWidth,
167
+ innerHeight: window.innerHeight,
168
+ documentElementClientWidth: document.documentElement?.clientWidth,
169
+ documentElementClientHeight: document.documentElement?.clientHeight,
170
+ }));
171
+ const width =
172
+ toPositiveNumber(metrics.visualViewportWidth) ??
173
+ toPositiveNumber(metrics.innerWidth) ??
174
+ toPositiveNumber(metrics.documentElementClientWidth);
175
+ const height =
176
+ toPositiveNumber(metrics.visualViewportHeight) ??
177
+ toPositiveNumber(metrics.innerHeight) ??
178
+ toPositiveNumber(metrics.documentElementClientHeight);
179
+
180
+ return width && height ? { width, height } : null;
181
+ }
182
+
183
+ function screenshotState(
184
+ screenshot: Buffer,
185
+ viewport: ImageDimensions,
186
+ ): {
157
187
  screenshot: Buffer;
158
188
  dimensions: ImageDimensions;
159
189
  scale: CoordinateScale;
160
- }> {
161
- const viewport = page.viewportSize();
162
- if (!viewport) {
163
- throw new Error("Viewport size not found");
164
- }
165
-
166
- const screenshot = await page.screenshot({
167
- fullPage: false,
168
- scale: "css",
169
- timeout: 10000,
170
- });
190
+ } {
171
191
  const dimensions = readPngDimensions(screenshot);
172
192
  return {
173
193
  screenshot,
@@ -181,6 +201,25 @@ async function takeViewportScreenshot(page: Page): Promise<{
181
201
  };
182
202
  }
183
203
 
204
+ async function takeViewportScreenshot(page: Page): Promise<{
205
+ screenshot: Buffer;
206
+ dimensions: ImageDimensions;
207
+ scale: CoordinateScale;
208
+ }> {
209
+ const viewport =
210
+ page.viewportSize() ?? (await getViewportFromPage(page).catch(() => null));
211
+ if (!viewport) {
212
+ throw new Error("Viewport size not found");
213
+ }
214
+
215
+ const screenshot = await page.screenshot({
216
+ fullPage: false,
217
+ scale: "css",
218
+ timeout: 10000,
219
+ });
220
+ return screenshotState(screenshot, viewport);
221
+ }
222
+
184
223
  async function executeBrowserAction(
185
224
  page: Page,
186
225
  action: BrowserAction,
@@ -1,6 +1,7 @@
1
1
  import type { FrameLocator, Locator, Page } from "playwright";
2
2
  import type { LanguageModel } from "ai";
3
3
  import { executeRecoveryAgent, type RecoveryAgentResult } from "./agent.js";
4
+ import { defaultLogger } from "../../shared/logger/logger.js";
4
5
 
5
6
  export type RecoveryActionTargetType = "page" | "locator";
6
7
 
@@ -225,18 +226,68 @@ async function runWithFallback<T>(args: {
225
226
  throw originalError;
226
227
  }
227
228
 
229
+ defaultLogger.info("Action failed, attempting recovery", {
230
+ targetType: baseContext.targetType,
231
+ method: baseContext.method,
232
+ argsCount: baseContext.args.length,
233
+ error: formatErrorForLog(originalError),
234
+ });
235
+
236
+ let recoveryResult: RecoveryActionResult;
228
237
  try {
229
- await args.options.recoveryAction({
238
+ recoveryResult = await args.options.recoveryAction({
230
239
  ...baseContext,
231
240
  error: originalError,
232
241
  });
233
- return await args.invoke();
234
- } catch {
242
+ } catch (recoveryError) {
243
+ defaultLogger.warn("Recovery action failed", {
244
+ targetType: baseContext.targetType,
245
+ method: baseContext.method,
246
+ originalError: formatErrorForLog(originalError),
247
+ recoveryError: formatErrorForLog(recoveryError),
248
+ });
249
+ throw new AggregateError(
250
+ [originalError, recoveryError],
251
+ "Recovery action failed after the original action failed.",
252
+ );
253
+ }
254
+
255
+ defaultLogger.info("Recovery action completed, retrying original action", {
256
+ targetType: baseContext.targetType,
257
+ method: baseContext.method,
258
+ recoveryResult,
259
+ });
260
+
261
+ try {
262
+ const result = await args.invoke();
263
+ defaultLogger.info("Recovered action retry succeeded", {
264
+ targetType: baseContext.targetType,
265
+ method: baseContext.method,
266
+ });
267
+ return result;
268
+ } catch (retryError) {
269
+ defaultLogger.warn("Recovered action retry failed", {
270
+ targetType: baseContext.targetType,
271
+ method: baseContext.method,
272
+ originalError: formatErrorForLog(originalError),
273
+ retryError: formatErrorForLog(retryError),
274
+ });
235
275
  throw originalError;
236
276
  }
237
277
  }
238
278
  }
239
279
 
280
+ function formatErrorForLog(error: unknown): Record<string, unknown> {
281
+ if (error instanceof Error) {
282
+ return {
283
+ name: error.name,
284
+ message: error.message,
285
+ stack: error.stack,
286
+ };
287
+ }
288
+ return { value: String(error) };
289
+ }
290
+
240
291
  type ProxyCaches = {
241
292
  locators: WeakMap<Locator, Locator>;
242
293
  frameLocators: WeakMap<FrameLocator, FrameLocator>;