libretto 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +109 -35
  2. package/dist/cli/cli.js +22 -97
  3. package/dist/cli/commands/browser.js +86 -59
  4. package/dist/cli/commands/execution.js +199 -86
  5. package/dist/cli/commands/init.js +34 -29
  6. package/dist/cli/commands/logs.js +4 -5
  7. package/dist/cli/commands/shared.js +30 -29
  8. package/dist/cli/commands/snapshot.js +26 -39
  9. package/dist/cli/core/ai-config.js +21 -4
  10. package/dist/cli/core/api-snapshot-analyzer.js +15 -5
  11. package/dist/cli/core/browser.js +207 -37
  12. package/dist/cli/core/context.js +4 -1
  13. package/dist/cli/core/session-telemetry.js +434 -174
  14. package/dist/cli/core/session.js +21 -8
  15. package/dist/cli/core/snapshot-analyzer.js +14 -31
  16. package/dist/cli/core/snapshot-api-config.js +2 -6
  17. package/dist/cli/core/telemetry.js +20 -4
  18. package/dist/cli/framework/simple-cli.js +45 -25
  19. package/dist/cli/router.js +14 -21
  20. package/dist/cli/workers/run-integration-runtime.js +24 -5
  21. package/dist/cli/workers/run-integration-worker-protocol.js +3 -1
  22. package/dist/cli/workers/run-integration-worker.js +1 -4
  23. package/dist/index.d.ts +1 -2
  24. package/dist/index.js +7 -10
  25. package/dist/runtime/download/download.js +5 -1
  26. package/dist/runtime/extract/extract.js +11 -2
  27. package/dist/runtime/network/network.js +8 -1
  28. package/dist/runtime/recovery/agent.js +6 -2
  29. package/dist/runtime/recovery/errors.js +3 -1
  30. package/dist/runtime/recovery/recovery.js +3 -1
  31. package/dist/shared/condense-dom/condense-dom.js +17 -69
  32. package/dist/shared/config/config.d.ts +1 -9
  33. package/dist/shared/config/config.js +0 -18
  34. package/dist/shared/config/index.d.ts +2 -1
  35. package/dist/shared/config/index.js +0 -10
  36. package/dist/shared/debug/pause.js +9 -3
  37. package/dist/shared/dom-semantics.d.ts +8 -0
  38. package/dist/shared/dom-semantics.js +69 -0
  39. package/dist/shared/instrumentation/instrument.js +101 -5
  40. package/dist/shared/llm/ai-sdk-adapter.js +3 -1
  41. package/dist/shared/llm/client.js +3 -1
  42. package/dist/shared/logger/index.js +4 -1
  43. package/dist/shared/run/api.js +3 -1
  44. package/dist/shared/run/browser.js +47 -3
  45. package/dist/shared/state/session-state.d.ts +2 -1
  46. package/dist/shared/state/session-state.js +5 -2
  47. package/dist/shared/visualization/ghost-cursor.js +36 -14
  48. package/dist/shared/visualization/highlight.js +9 -6
  49. package/dist/shared/workflow/workflow.d.ts +4 -5
  50. package/dist/shared/workflow/workflow.js +3 -5
  51. package/package.json +6 -2
  52. package/scripts/check-skills-sync.mjs +25 -0
  53. package/scripts/compare-eval-summary.mjs +47 -0
  54. package/scripts/postinstall.mjs +15 -15
  55. package/scripts/prepare-release.sh +97 -0
  56. package/scripts/skills-libretto.mjs +103 -0
  57. package/scripts/summarize-evals.mjs +135 -0
  58. package/scripts/sync-skills.mjs +12 -0
  59. package/skills/libretto/SKILL.md +132 -54
  60. package/skills/libretto/references/action-logs.md +101 -0
  61. package/skills/libretto/references/auth-profiles.md +1 -2
  62. package/skills/libretto/references/code-generation-rules.md +210 -0
  63. package/skills/libretto/references/configuration-file-reference.md +53 -0
  64. package/skills/libretto/references/pages-and-page-targeting.md +1 -1
  65. package/skills/libretto/references/site-security-review.md +143 -0
  66. package/src/cli/cli.ts +23 -110
  67. package/src/cli/commands/browser.ts +94 -70
  68. package/src/cli/commands/execution.ts +233 -102
  69. package/src/cli/commands/init.ts +37 -33
  70. package/src/cli/commands/logs.ts +7 -7
  71. package/src/cli/commands/shared.ts +36 -37
  72. package/src/cli/commands/snapshot.ts +44 -59
  73. package/src/cli/core/ai-config.ts +24 -4
  74. package/src/cli/core/api-snapshot-analyzer.ts +17 -6
  75. package/src/cli/core/browser.ts +260 -49
  76. package/src/cli/core/context.ts +7 -2
  77. package/src/cli/core/session-telemetry.ts +449 -197
  78. package/src/cli/core/session.ts +21 -7
  79. package/src/cli/core/snapshot-analyzer.ts +26 -46
  80. package/src/cli/core/snapshot-api-config.ts +170 -175
  81. package/src/cli/core/telemetry.ts +39 -4
  82. package/src/cli/framework/simple-cli.ts +144 -77
  83. package/src/cli/router.ts +13 -21
  84. package/src/cli/workers/run-integration-runtime.ts +36 -9
  85. package/src/cli/workers/run-integration-worker-protocol.ts +2 -0
  86. package/src/cli/workers/run-integration-worker.ts +1 -4
  87. package/src/index.ts +73 -66
  88. package/src/runtime/download/download.ts +62 -58
  89. package/src/runtime/download/index.ts +5 -5
  90. package/src/runtime/extract/extract.ts +71 -61
  91. package/src/runtime/network/index.ts +3 -3
  92. package/src/runtime/network/network.ts +99 -93
  93. package/src/runtime/recovery/agent.ts +217 -212
  94. package/src/runtime/recovery/errors.ts +107 -104
  95. package/src/runtime/recovery/index.ts +3 -3
  96. package/src/runtime/recovery/recovery.ts +38 -35
  97. package/src/shared/condense-dom/condense-dom.ts +27 -82
  98. package/src/shared/config/config.ts +0 -19
  99. package/src/shared/config/index.ts +0 -5
  100. package/src/shared/debug/pause.ts +57 -51
  101. package/src/shared/dom-semantics.ts +68 -0
  102. package/src/shared/instrumentation/errors.ts +64 -62
  103. package/src/shared/instrumentation/index.ts +5 -5
  104. package/src/shared/instrumentation/instrument.ts +339 -209
  105. package/src/shared/llm/ai-sdk-adapter.ts +58 -55
  106. package/src/shared/llm/client.ts +181 -174
  107. package/src/shared/llm/types.ts +39 -39
  108. package/src/shared/logger/index.ts +11 -4
  109. package/src/shared/logger/logger.ts +312 -306
  110. package/src/shared/logger/sinks.ts +118 -114
  111. package/src/shared/paths/paths.ts +50 -49
  112. package/src/shared/paths/repo-root.ts +17 -17
  113. package/src/shared/run/api.ts +5 -1
  114. package/src/shared/run/browser.ts +65 -3
  115. package/src/shared/state/index.ts +9 -9
  116. package/src/shared/state/session-state.ts +46 -43
  117. package/src/shared/visualization/ghost-cursor.ts +180 -149
  118. package/src/shared/visualization/highlight.ts +89 -86
  119. package/src/shared/visualization/index.ts +13 -13
  120. package/src/shared/workflow/workflow.ts +19 -25
  121. package/skills/libretto/references/reverse-engineering-network-requests.md +0 -39
  122. package/skills/libretto/references/user-action-log.md +0 -31
@@ -1,172 +1,177 @@
1
1
  import type { Page } from "playwright";
2
- import { type MinimalLogger, defaultLogger } from "../../shared/logger/logger.js";
2
+ import {
3
+ type MinimalLogger,
4
+ defaultLogger,
5
+ } from "../../shared/logger/logger.js";
3
6
  import type { LLMClient } from "../../shared/llm/types.js";
4
7
 
5
8
  type BrowserAction =
6
- | { type: "click"; x: number; y: number; button?: string }
7
- | { type: "double_click"; x: number; y: number }
8
- | {
9
- type: "scroll";
10
- x: number;
11
- y: number;
12
- scroll_x: number;
13
- scroll_y: number;
14
- }
15
- | { type: "keypress"; keys: string[] }
16
- | { type: "type"; text: string }
17
- | { type: "wait" }
18
- | { type: "screenshot" }
19
- | { type: "drag"; path: { x: number; y: number }[] }
20
- | { type: "move"; x: number; y: number }
21
- | { type: "done" };
9
+ | { type: "click"; x: number; y: number; button?: string }
10
+ | { type: "double_click"; x: number; y: number }
11
+ | {
12
+ type: "scroll";
13
+ x: number;
14
+ y: number;
15
+ scroll_x: number;
16
+ scroll_y: number;
17
+ }
18
+ | { type: "keypress"; keys: string[] }
19
+ | { type: "type"; text: string }
20
+ | { type: "wait" }
21
+ | { type: "screenshot" }
22
+ | { type: "drag"; path: { x: number; y: number }[] }
23
+ | { type: "move"; x: number; y: number }
24
+ | { type: "done" };
22
25
 
23
26
  function delay(ms: number): Promise<void> {
24
- return new Promise((resolve) => setTimeout(resolve, ms));
27
+ return new Promise((resolve) => setTimeout(resolve, ms));
25
28
  }
26
29
 
27
30
  const KEY_MAPPINGS: Record<string, string> = {
28
- ENTER: "Enter",
29
- RETURN: "Enter",
30
- TAB: "Tab",
31
- SPACE: " ",
32
- BACKSPACE: "Backspace",
33
- DELETE: "Delete",
34
- ESCAPE: "Escape",
35
- ESC: "Escape",
36
- UP: "ArrowUp",
37
- DOWN: "ArrowDown",
38
- LEFT: "ArrowLeft",
39
- RIGHT: "ArrowRight",
40
- HOME: "Home",
41
- END: "End",
42
- PAGEUP: "PageUp",
43
- PAGEDOWN: "PageDown",
44
- CTRL: "Control",
45
- CONTROL: "Control",
46
- ALT: "Alt",
47
- SHIFT: "Shift",
48
- META: "Meta",
49
- CMD: "Meta",
50
- COMMAND: "Meta",
31
+ ENTER: "Enter",
32
+ RETURN: "Enter",
33
+ TAB: "Tab",
34
+ SPACE: " ",
35
+ BACKSPACE: "Backspace",
36
+ DELETE: "Delete",
37
+ ESCAPE: "Escape",
38
+ ESC: "Escape",
39
+ UP: "ArrowUp",
40
+ DOWN: "ArrowDown",
41
+ LEFT: "ArrowLeft",
42
+ RIGHT: "ArrowRight",
43
+ HOME: "Home",
44
+ END: "End",
45
+ PAGEUP: "PageUp",
46
+ PAGEDOWN: "PageDown",
47
+ CTRL: "Control",
48
+ CONTROL: "Control",
49
+ ALT: "Alt",
50
+ SHIFT: "Shift",
51
+ META: "Meta",
52
+ CMD: "Meta",
53
+ COMMAND: "Meta",
51
54
  };
52
55
 
53
56
  function mapKeyName(key: string): string {
54
- return KEY_MAPPINGS[key.toUpperCase()] ?? key;
57
+ return KEY_MAPPINGS[key.toUpperCase()] ?? key;
55
58
  }
56
59
 
57
60
  async function executeBrowserAction(
58
- page: Page,
59
- action: BrowserAction,
60
- logger: MinimalLogger = defaultLogger,
61
+ page: Page,
62
+ action: BrowserAction,
63
+ logger: MinimalLogger = defaultLogger,
61
64
  ): Promise<void> {
62
- switch (action.type) {
63
- case "click": {
64
- const { x, y, button = "left" } = action;
65
- const playwrightButton =
66
- button === "wheel" || button === "back" || button === "forward"
67
- ? ("left" as const)
68
- : (button as "left" | "right" | "middle");
69
- await page.mouse.click(x, y, { button: playwrightButton });
70
- logger.info(`Clicked at (${x}, ${y}) with ${button} button`);
71
- break;
72
- }
73
- case "double_click": {
74
- const { x, y } = action;
75
- await page.mouse.dblclick(x, y);
76
- logger.info(`Double-clicked at (${x}, ${y})`);
77
- break;
78
- }
79
- case "scroll": {
80
- const { x, y, scroll_x, scroll_y } = action;
81
- await page.mouse.move(x, y);
82
- await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
83
- logger.info(`Scrolled at (${x}, ${y}) by (${scroll_x}, ${scroll_y})`);
84
- break;
85
- }
86
- case "keypress": {
87
- for (const key of action.keys) {
88
- const mapped = mapKeyName(key);
89
- await page.keyboard.press(mapped);
90
- logger.info(`Pressed key: ${key} (mapped to ${mapped})`);
91
- }
92
- break;
93
- }
94
- case "type": {
95
- await page.keyboard.type(action.text);
96
- logger.info(`Typed text: ${action.text}`);
97
- break;
98
- }
99
- case "wait": {
100
- await delay(2000);
101
- logger.info("Waited 2 seconds");
102
- break;
103
- }
104
- case "screenshot": {
105
- logger.info("Screenshot action (no-op, taken automatically)");
106
- break;
107
- }
108
- case "drag": {
109
- const { path } = action;
110
- const start = path[0];
111
- const end = path[path.length - 1];
112
- if (path.length >= 2 && start && end) {
113
- await page.mouse.move(start.x, start.y);
114
- await page.mouse.down();
115
- for (let i = 1; i < path.length; i++) {
116
- const point = path[i];
117
- if (point) await page.mouse.move(point.x, point.y);
118
- }
119
- await page.mouse.up();
120
- logger.info(`Dragged from (${start.x}, ${start.y}) to (${end.x}, ${end.y})`);
121
- }
122
- break;
123
- }
124
- case "move": {
125
- const { x, y } = action;
126
- await page.mouse.move(x, y);
127
- logger.info(`Moved mouse to (${x}, ${y})`);
128
- break;
129
- }
130
- case "done": {
131
- break;
132
- }
133
- }
65
+ switch (action.type) {
66
+ case "click": {
67
+ const { x, y, button = "left" } = action;
68
+ const playwrightButton =
69
+ button === "wheel" || button === "back" || button === "forward"
70
+ ? ("left" as const)
71
+ : (button as "left" | "right" | "middle");
72
+ await page.mouse.click(x, y, { button: playwrightButton });
73
+ logger.info(`Clicked at (${x}, ${y}) with ${button} button`);
74
+ break;
75
+ }
76
+ case "double_click": {
77
+ const { x, y } = action;
78
+ await page.mouse.dblclick(x, y);
79
+ logger.info(`Double-clicked at (${x}, ${y})`);
80
+ break;
81
+ }
82
+ case "scroll": {
83
+ const { x, y, scroll_x, scroll_y } = action;
84
+ await page.mouse.move(x, y);
85
+ await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
86
+ logger.info(`Scrolled at (${x}, ${y}) by (${scroll_x}, ${scroll_y})`);
87
+ break;
88
+ }
89
+ case "keypress": {
90
+ for (const key of action.keys) {
91
+ const mapped = mapKeyName(key);
92
+ await page.keyboard.press(mapped);
93
+ logger.info(`Pressed key: ${key} (mapped to ${mapped})`);
94
+ }
95
+ break;
96
+ }
97
+ case "type": {
98
+ await page.keyboard.type(action.text);
99
+ logger.info(`Typed text: ${action.text}`);
100
+ break;
101
+ }
102
+ case "wait": {
103
+ await delay(2000);
104
+ logger.info("Waited 2 seconds");
105
+ break;
106
+ }
107
+ case "screenshot": {
108
+ logger.info("Screenshot action (no-op, taken automatically)");
109
+ break;
110
+ }
111
+ case "drag": {
112
+ const { path } = action;
113
+ const start = path[0];
114
+ const end = path[path.length - 1];
115
+ if (path.length >= 2 && start && end) {
116
+ await page.mouse.move(start.x, start.y);
117
+ await page.mouse.down();
118
+ for (let i = 1; i < path.length; i++) {
119
+ const point = path[i];
120
+ if (point) await page.mouse.move(point.x, point.y);
121
+ }
122
+ await page.mouse.up();
123
+ logger.info(
124
+ `Dragged from (${start.x}, ${start.y}) to (${end.x}, ${end.y})`,
125
+ );
126
+ }
127
+ break;
128
+ }
129
+ case "move": {
130
+ const { x, y } = action;
131
+ await page.mouse.move(x, y);
132
+ logger.info(`Moved mouse to (${x}, ${y})`);
133
+ break;
134
+ }
135
+ case "done": {
136
+ break;
137
+ }
138
+ }
134
139
  }
135
140
 
136
141
  import { z } from "zod";
137
142
 
138
143
  const recoveryActionSchema = z.object({
139
- reasoning: z
140
- .string()
141
- .describe("Your reasoning about what you see and what action to take"),
142
- action: z.discriminatedUnion("type", [
143
- z.object({
144
- type: z.literal("click"),
145
- x: z.number(),
146
- y: z.number(),
147
- }),
148
- z.object({
149
- type: z.literal("type"),
150
- text: z.string(),
151
- }),
152
- z.object({
153
- type: z.literal("keypress"),
154
- keys: z.array(z.string()),
155
- }),
156
- z.object({
157
- type: z.literal("scroll"),
158
- x: z.number(),
159
- y: z.number(),
160
- scroll_x: z.number(),
161
- scroll_y: z.number(),
162
- }),
163
- z.object({
164
- type: z.literal("wait"),
165
- }),
166
- z.object({
167
- type: z.literal("done"),
168
- }),
169
- ]),
144
+ reasoning: z
145
+ .string()
146
+ .describe("Your reasoning about what you see and what action to take"),
147
+ action: z.discriminatedUnion("type", [
148
+ z.object({
149
+ type: z.literal("click"),
150
+ x: z.number(),
151
+ y: z.number(),
152
+ }),
153
+ z.object({
154
+ type: z.literal("type"),
155
+ text: z.string(),
156
+ }),
157
+ z.object({
158
+ type: z.literal("keypress"),
159
+ keys: z.array(z.string()),
160
+ }),
161
+ z.object({
162
+ type: z.literal("scroll"),
163
+ x: z.number(),
164
+ y: z.number(),
165
+ scroll_x: z.number(),
166
+ scroll_y: z.number(),
167
+ }),
168
+ z.object({
169
+ type: z.literal("wait"),
170
+ }),
171
+ z.object({
172
+ type: z.literal("done"),
173
+ }),
174
+ ]),
170
175
  });
171
176
 
172
177
  /**
@@ -175,82 +180,82 @@ const recoveryActionSchema = z.object({
175
180
  * the LLM's suggested browser actions.
176
181
  */
177
182
  export async function executeRecoveryAgent(
178
- page: Page,
179
- instruction: string,
180
- logger?: MinimalLogger,
181
- llmClient?: LLMClient,
183
+ page: Page,
184
+ instruction: string,
185
+ logger?: MinimalLogger,
186
+ llmClient?: LLMClient,
182
187
  ): Promise<void> {
183
- if (!llmClient) {
184
- return;
185
- }
186
- const log = logger ?? defaultLogger;
187
- log.info("Executing vision-based recovery agent", { instruction });
188
+ if (!llmClient) {
189
+ return;
190
+ }
191
+ const log = logger ?? defaultLogger;
192
+ log.info("Executing vision-based recovery agent", { instruction });
188
193
 
189
- const viewport = page.viewportSize();
190
- if (!viewport) {
191
- throw new Error("Viewport size not found");
192
- }
194
+ const viewport = page.viewportSize();
195
+ if (!viewport) {
196
+ throw new Error("Viewport size not found");
197
+ }
193
198
 
194
- let screenshot: string;
195
- try {
196
- screenshot = (
197
- await page.screenshot({ fullPage: false, timeout: 10000 })
198
- ).toString("base64");
199
- } catch (screenshotError) {
200
- log.warn("Failed to take screenshot for recovery agent, skipping", {
201
- screenshotError:
202
- screenshotError instanceof Error
203
- ? screenshotError.message
204
- : String(screenshotError),
205
- });
206
- throw new Error("Failed to take screenshot for recovery agent");
207
- }
199
+ let screenshot: string;
200
+ try {
201
+ screenshot = (
202
+ await page.screenshot({ fullPage: false, timeout: 10000 })
203
+ ).toString("base64");
204
+ } catch (screenshotError) {
205
+ log.warn("Failed to take screenshot for recovery agent, skipping", {
206
+ screenshotError:
207
+ screenshotError instanceof Error
208
+ ? screenshotError.message
209
+ : String(screenshotError),
210
+ });
211
+ throw new Error("Failed to take screenshot for recovery agent");
212
+ }
208
213
 
209
- const maxSteps = 3;
210
- for (let step = 1; step <= maxSteps; step++) {
211
- const result = await llmClient.generateObjectFromMessages({
212
- schema: recoveryActionSchema,
213
- messages: [
214
- {
215
- role: "user",
216
- content: [
217
- {
218
- type: "text",
219
- text: `You are an expert browser support agent. Your job is to resolve issues when browser automation encounters unexpected website behavior (e.g., popups blocking interaction).
214
+ const maxSteps = 3;
215
+ for (let step = 1; step <= maxSteps; step++) {
216
+ const result = await llmClient.generateObjectFromMessages({
217
+ schema: recoveryActionSchema,
218
+ messages: [
219
+ {
220
+ role: "user",
221
+ content: [
222
+ {
223
+ type: "text",
224
+ text: `You are an expert browser support agent. Your job is to resolve issues when browser automation encounters unexpected website behavior (e.g., popups blocking interaction).
220
225
 
221
226
  Your task: ${instruction}
222
227
 
223
228
  Viewport: ${viewport.width}x${viewport.height}px. Complete this in as few steps as possible.
224
229
  Analyze the screenshot and decide what action to take. If the task is complete or no action is needed, use the "done" action type.`,
225
- },
226
- {
227
- type: "image",
228
- image: `data:image/png;base64,${screenshot}`,
229
- },
230
- ],
231
- },
232
- ],
233
- temperature: 0,
234
- });
230
+ },
231
+ {
232
+ type: "image",
233
+ image: `data:image/png;base64,${screenshot}`,
234
+ },
235
+ ],
236
+ },
237
+ ],
238
+ temperature: 0,
239
+ });
235
240
 
236
- log.info(`Recovery step ${step}/${maxSteps}`, {
237
- reasoning: result.reasoning,
238
- action: result.action,
239
- });
241
+ log.info(`Recovery step ${step}/${maxSteps}`, {
242
+ reasoning: result.reasoning,
243
+ action: result.action,
244
+ });
240
245
 
241
- if (result.action.type === "done") {
242
- log.info("Recovery agent completed - no more actions needed");
243
- break;
244
- }
246
+ if (result.action.type === "done") {
247
+ log.info("Recovery agent completed - no more actions needed");
248
+ break;
249
+ }
245
250
 
246
- await executeBrowserAction(page, result.action, log);
247
- await delay(2000);
251
+ await executeBrowserAction(page, result.action, log);
252
+ await delay(2000);
248
253
 
249
- // Take new screenshot for next iteration
250
- screenshot = (await page.screenshot({ fullPage: false })).toString(
251
- "base64",
252
- );
253
- }
254
+ // Take new screenshot for next iteration
255
+ screenshot = (await page.screenshot({ fullPage: false })).toString(
256
+ "base64",
257
+ );
258
+ }
254
259
 
255
- log.info("Recovery agent execution completed");
260
+ log.info("Recovery agent execution completed");
256
261
  }