@ticktockbent/charlotte 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +36 -0
  2. package/README.md +2 -4
  3. package/dist/browser/browser-manager.d.ts +3 -0
  4. package/dist/browser/browser-manager.d.ts.map +1 -1
  5. package/dist/browser/browser-manager.js +13 -2
  6. package/dist/browser/browser-manager.js.map +1 -1
  7. package/dist/cli.d.ts +1 -0
  8. package/dist/cli.d.ts.map +1 -1
  9. package/dist/cli.js +41 -16
  10. package/dist/cli.js.map +1 -1
  11. package/dist/index.js +2 -6
  12. package/dist/index.js.map +1 -1
  13. package/dist/renderer/interactive-extractor.d.ts.map +1 -1
  14. package/dist/renderer/interactive-extractor.js +10 -2
  15. package/dist/renderer/interactive-extractor.js.map +1 -1
  16. package/dist/renderer/layout-extractor.d.ts.map +1 -1
  17. package/dist/renderer/layout-extractor.js +10 -2
  18. package/dist/renderer/layout-extractor.js.map +1 -1
  19. package/dist/renderer/renderer-pipeline.d.ts +2 -2
  20. package/dist/renderer/renderer-pipeline.d.ts.map +1 -1
  21. package/dist/renderer/renderer-pipeline.js +5 -3
  22. package/dist/renderer/renderer-pipeline.js.map +1 -1
  23. package/dist/server.d.ts.map +1 -1
  24. package/dist/server.js +9 -4
  25. package/dist/server.js.map +1 -1
  26. package/dist/state/snapshot-store.d.ts +4 -0
  27. package/dist/state/snapshot-store.d.ts.map +1 -1
  28. package/dist/state/snapshot-store.js +15 -3
  29. package/dist/state/snapshot-store.js.map +1 -1
  30. package/dist/tools/dev-mode.d.ts.map +1 -1
  31. package/dist/tools/dev-mode.js +10 -10
  32. package/dist/tools/dev-mode.js.map +1 -1
  33. package/dist/tools/dialog.js +5 -5
  34. package/dist/tools/dialog.js.map +1 -1
  35. package/dist/tools/evaluate.d.ts +1 -1
  36. package/dist/tools/evaluate.d.ts.map +1 -1
  37. package/dist/tools/evaluate.js +3 -2
  38. package/dist/tools/evaluate.js.map +1 -1
  39. package/dist/tools/interaction-helpers.d.ts +55 -0
  40. package/dist/tools/interaction-helpers.d.ts.map +1 -0
  41. package/dist/tools/interaction-helpers.js +312 -0
  42. package/dist/tools/interaction-helpers.js.map +1 -0
  43. package/dist/tools/interaction.d.ts +1 -17
  44. package/dist/tools/interaction.d.ts.map +1 -1
  45. package/dist/tools/interaction.js +120 -500
  46. package/dist/tools/interaction.js.map +1 -1
  47. package/dist/tools/meta-tool.d.ts +2 -2
  48. package/dist/tools/meta-tool.js +3 -3
  49. package/dist/tools/monitoring.js +9 -9
  50. package/dist/tools/monitoring.js.map +1 -1
  51. package/dist/tools/navigation.d.ts.map +1 -1
  52. package/dist/tools/navigation.js +36 -76
  53. package/dist/tools/navigation.js.map +1 -1
  54. package/dist/tools/observation.d.ts.map +1 -1
  55. package/dist/tools/observation.js +94 -98
  56. package/dist/tools/observation.js.map +1 -1
  57. package/dist/tools/session.d.ts.map +1 -1
  58. package/dist/tools/session.js +135 -184
  59. package/dist/tools/session.js.map +1 -1
  60. package/dist/tools/tool-groups.d.ts +8 -8
  61. package/dist/tools/tool-groups.d.ts.map +1 -1
  62. package/dist/tools/tool-groups.js +113 -142
  63. package/dist/tools/tool-groups.js.map +1 -1
  64. package/dist/tools/tool-helpers.d.ts +18 -0
  65. package/dist/tools/tool-helpers.d.ts.map +1 -1
  66. package/dist/tools/tool-helpers.js +69 -2
  67. package/dist/tools/tool-helpers.js.map +1 -1
  68. package/dist/tools/wait-for.d.ts +5 -0
  69. package/dist/tools/wait-for.d.ts.map +1 -0
  70. package/dist/tools/wait-for.js +169 -0
  71. package/dist/tools/wait-for.js.map +1 -0
  72. package/dist/types/config.d.ts +16 -0
  73. package/dist/types/config.d.ts.map +1 -1
  74. package/dist/types/config.js +15 -0
  75. package/dist/types/config.js.map +1 -1
  76. package/dist/utils/wait.js +11 -5
  77. package/dist/utils/wait.js.map +1 -1
  78. package/package.json +1 -1
@@ -2,322 +2,15 @@ import * as fs from "node:fs/promises";
2
2
  import { z } from "zod";
3
3
  import { CharlotteError, CharlotteErrorCode } from "../types/errors.js";
4
4
  import { logger } from "../utils/logger.js";
5
- import { renderActivePage, renderAfterAction, resolveElement, formatPageResponse, handleToolError, coercedBoolean, } from "./tool-helpers.js";
6
- /** Maps short modifier names to Puppeteer KeyInput values. */
7
- const MODIFIER_KEY_MAP = {
8
- ctrl: "Control",
9
- shift: "Shift",
10
- alt: "Alt",
11
- meta: "Meta",
12
- };
13
- /**
14
- * Click an element by backend node ID using CDP to get coordinates,
15
- * or more simply by resolving to an XPath/selector and using page.click.
16
- *
17
- * The most reliable approach: use CDP to get the element's coordinates, then click at those coords.
18
- */
19
- async function clickElementByBackendNodeId(page, backendNodeId, clickType = "left", modifiers = []) {
20
- // Get the element's box model to find clickable coordinates
21
- const cdpSession = await page.createCDPSession();
22
- try {
23
- // First, scroll the element into view
24
- await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
25
- // Get box model for coordinates
26
- const { model } = await cdpSession.send("DOM.getBoxModel", {
27
- backendNodeId,
28
- });
29
- if (!model) {
30
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Element has no visible box model — it may be hidden or zero-sized.", "Call charlotte:observe to check the element's state.");
31
- }
32
- // content quad: [x1,y1, x2,y2, x3,y3, x4,y4]
33
- const contentQuad = model.content;
34
- const centerX = (contentQuad[0] + contentQuad[2] + contentQuad[4] + contentQuad[6]) / 4;
35
- const centerY = (contentQuad[1] + contentQuad[3] + contentQuad[5] + contentQuad[7]) / 4;
36
- // Hold down modifier keys before the click
37
- for (const modifier of modifiers) {
38
- const modifierKey = MODIFIER_KEY_MAP[modifier];
39
- await page.keyboard.down(modifierKey);
40
- }
41
- try {
42
- if (clickType === "right") {
43
- await page.mouse.click(centerX, centerY, { button: "right" });
44
- }
45
- else if (clickType === "double") {
46
- await page.mouse.click(centerX, centerY, { clickCount: 2 });
47
- }
48
- else {
49
- await page.mouse.click(centerX, centerY);
50
- }
51
- }
52
- finally {
53
- // Release modifier keys in reverse order (always release even if click fails)
54
- for (const modifier of [...modifiers].reverse()) {
55
- const modifierKey = MODIFIER_KEY_MAP[modifier];
56
- await page.keyboard.up(modifierKey);
57
- }
58
- }
59
- }
60
- finally {
61
- await cdpSession.detach();
62
- }
63
- }
64
- /**
65
- * Wait for any navigation triggered by an action, or fall back to a brief settle pause.
66
- *
67
- * Listens for the `framenavigated` CDP event to detect if a click caused navigation.
68
- * If navigation is detected within `detectionWindowMs`, waits for the page load event
69
- * (up to `loadTimeoutMs`). If no navigation fires, returns after `settleMs`.
70
- *
71
- * Also races against dialog events — if the action triggers a JavaScript dialog
72
- * (alert, confirm, prompt, beforeunload), the action promise will block indefinitely.
73
- * This function detects that and returns early so the caller can surface `pending_dialog`.
74
- */
75
- export async function waitForPossibleNavigation(page, action, { detectionWindowMs = 500, loadTimeoutMs = 10000, settleMs = 50 } = {}) {
76
- let navigationDetected = false;
77
- let dialogDetected = false;
78
- // Listen for navigation start via page event (fires on any navigation)
79
- const navigationStartPromise = new Promise((resolve) => {
80
- const handler = () => {
81
- navigationDetected = true;
82
- page.off("framenavigated", handler);
83
- resolve();
84
- };
85
- page.on("framenavigated", handler);
86
- // Clean up listener if no navigation fires within detection window
87
- setTimeout(() => {
88
- page.off("framenavigated", handler);
89
- resolve();
90
- }, detectionWindowMs);
91
- });
92
- // Listen for dialog (blocks the action from completing)
93
- const dialogPromise = new Promise((resolve) => {
94
- const handler = () => {
95
- dialogDetected = true;
96
- page.off("dialog", handler);
97
- resolve();
98
- };
99
- page.on("dialog", handler);
100
- // Clean up on timeout — if no dialog fires, we don't need this listener
101
- setTimeout(() => {
102
- page.off("dialog", handler);
103
- resolve();
104
- }, detectionWindowMs);
105
- });
106
- // Race: action vs dialog
107
- const actionPromise = action();
108
- await Promise.race([
109
- actionPromise.then(() => "action"),
110
- dialogPromise.then(() => "dialog"),
111
- ]);
112
- if (dialogDetected) {
113
- // Dialog is blocking the action. Don't await actionPromise — it will
114
- // resolve later when the dialog is handled via charlotte:dialog.
115
- // Guard against unhandled rejection from the fire-and-forget promise.
116
- actionPromise.catch(() => {
117
- logger.debug("Post-dialog action promise rejected (expected)");
118
- });
119
- return;
120
- }
121
- // Action completed normally — check for navigation
122
- await navigationStartPromise;
123
- if (navigationDetected) {
124
- // Navigation occurred — wait for the page to finish loading
125
- try {
126
- await page.waitForNavigation({ waitUntil: "load", timeout: loadTimeoutMs });
127
- }
128
- catch {
129
- // Page may have already finished loading before we called waitForNavigation,
130
- // or the load timed out. Either way, render what we have.
131
- logger.debug("Post-navigation load wait ended (page may already be loaded)");
132
- }
133
- }
134
- else {
135
- // No navigation — brief settle for in-page DOM updates
136
- await new Promise((resolve) => setTimeout(resolve, settleMs));
137
- }
138
- }
139
- /**
140
- * Focus an element by backend node ID using CDP.
141
- */
142
- async function focusElementByBackendNodeId(page, backendNodeId) {
143
- const cdpSession = await page.createCDPSession();
144
- try {
145
- await cdpSession.send("DOM.focus", { backendNodeId });
146
- }
147
- finally {
148
- await cdpSession.detach();
149
- }
150
- }
151
- /**
152
- * Hover over an element by backend node ID.
153
- */
154
- async function hoverElementByBackendNodeId(page, backendNodeId) {
155
- const cdpSession = await page.createCDPSession();
156
- try {
157
- await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
158
- const { model } = await cdpSession.send("DOM.getBoxModel", {
159
- backendNodeId,
160
- });
161
- if (!model) {
162
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Element has no visible box model for hover.");
163
- }
164
- const contentQuad = model.content;
165
- const centerX = (contentQuad[0] + contentQuad[2] + contentQuad[4] + contentQuad[6]) / 4;
166
- const centerY = (contentQuad[1] + contentQuad[3] + contentQuad[5] + contentQuad[7]) / 4;
167
- await page.mouse.move(centerX, centerY);
168
- }
169
- finally {
170
- await cdpSession.detach();
171
- }
172
- }
173
- /**
174
- * Get the center coordinates of an element by backend node ID.
175
- * Scrolls the element into view first.
176
- */
177
- async function getElementCenter(page, backendNodeId) {
178
- const cdpSession = await page.createCDPSession();
179
- try {
180
- await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
181
- const { model } = await cdpSession.send("DOM.getBoxModel", {
182
- backendNodeId,
183
- });
184
- if (!model) {
185
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Element has no visible box model — it may be hidden or zero-sized.", "Call charlotte:observe to check the element's state.");
186
- }
187
- const contentQuad = model.content;
188
- return {
189
- x: (contentQuad[0] + contentQuad[2] + contentQuad[4] + contentQuad[6]) / 4,
190
- y: (contentQuad[1] + contentQuad[3] + contentQuad[5] + contentQuad[7]) / 4,
191
- };
192
- }
193
- finally {
194
- await cdpSession.detach();
195
- }
196
- }
197
- /**
198
- * Drag one element to another using mouse primitives.
199
- * Sequence: move to source → mousedown → move to target → mouseup
200
- * Includes intermediate move steps and delays to ensure drag events fire reliably.
201
- */
202
- async function dragElementToElement(page, sourceBackendNodeId, targetBackendNodeId) {
203
- const sourceCenter = await getElementCenter(page, sourceBackendNodeId);
204
- const targetCenter = await getElementCenter(page, targetBackendNodeId);
205
- // Move to source and press down
206
- await page.mouse.move(sourceCenter.x, sourceCenter.y);
207
- await page.mouse.down();
208
- // Intermediate move to trigger dragstart (some browsers need movement to begin a drag)
209
- await page.mouse.move(sourceCenter.x + (targetCenter.x - sourceCenter.x) * 0.25, sourceCenter.y + (targetCenter.y - sourceCenter.y) * 0.25, { steps: 5 });
210
- await new Promise((resolve) => setTimeout(resolve, 50));
211
- // Move to target
212
- await page.mouse.move(targetCenter.x, targetCenter.y, { steps: 10 });
213
- await new Promise((resolve) => setTimeout(resolve, 50));
214
- // Release
215
- await page.mouse.up();
216
- }
217
- /**
218
- * Type text into an input element. Uses CDP to focus, optionally clears, then types via keyboard.
219
- */
220
- async function typeIntoElement(page, backendNodeId, text, clearFirst, pressEnter) {
221
- // Focus the element
222
- await focusElementByBackendNodeId(page, backendNodeId);
223
- if (clearFirst) {
224
- // Select all text then delete — works cross-platform
225
- await page.keyboard.down("Control");
226
- await page.keyboard.press("a");
227
- await page.keyboard.up("Control");
228
- await page.keyboard.press("Backspace");
229
- }
230
- // Type the text character by character
231
- await page.keyboard.type(text);
232
- if (pressEnter) {
233
- await page.keyboard.press("Enter");
234
- }
235
- }
236
- /**
237
- * Select a value in a <select> element using CDP to set the value and dispatch change events.
238
- */
239
- async function selectOptionByBackendNodeId(page, backendNodeId, value) {
240
- const cdpSession = await page.createCDPSession();
241
- try {
242
- // Resolve the node to get a remote object reference
243
- const { object } = await cdpSession.send("DOM.resolveNode", {
244
- backendNodeId,
245
- });
246
- if (!object?.objectId) {
247
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Could not resolve select element.");
248
- }
249
- // Use Runtime.callFunctionOn to set the value and fire events
250
- await cdpSession.send("Runtime.callFunctionOn", {
251
- objectId: object.objectId,
252
- functionDeclaration: `function(targetValue) {
253
- const options = Array.from(this.options);
254
- const matchByValue = options.find(o => o.value === targetValue);
255
- const matchByText = options.find(o => o.textContent.trim() === targetValue);
256
- const match = matchByValue || matchByText;
257
- if (match) {
258
- this.value = match.value;
259
- this.dispatchEvent(new Event('input', { bubbles: true }));
260
- this.dispatchEvent(new Event('change', { bubbles: true }));
261
- } else {
262
- throw new Error('Option "' + targetValue + '" not found');
263
- }
264
- }`,
265
- arguments: [{ value }],
266
- });
267
- }
268
- finally {
269
- await cdpSession.detach();
270
- }
271
- }
272
- /**
273
- * Submit a form by backend node ID — calls form.submit() via CDP.
274
- */
275
- async function submitFormByBackendNodeId(page, backendNodeId) {
276
- const cdpSession = await page.createCDPSession();
277
- try {
278
- const { object } = await cdpSession.send("DOM.resolveNode", {
279
- backendNodeId,
280
- });
281
- if (!object?.objectId) {
282
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Could not resolve form element.");
283
- }
284
- await cdpSession.send("Runtime.callFunctionOn", {
285
- objectId: object.objectId,
286
- functionDeclaration: `function() {
287
- this.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true }));
288
- }`,
289
- });
290
- }
291
- finally {
292
- await cdpSession.detach();
293
- }
294
- }
295
- /**
296
- * Set files on a file input element using CDP DOM.setFileInputFiles.
297
- * Validates that the target element is actually an <input type="file">.
298
- */
299
- async function setFileInputFiles(page, backendNodeId, filePaths) {
300
- const cdpSession = await page.createCDPSession();
301
- try {
302
- const { node } = await cdpSession.send("DOM.describeNode", { backendNodeId });
303
- const isFileInput = node.nodeName === "INPUT" &&
304
- (node.attributes ?? []).some((attr, i, arr) => attr === "type" && arr[i + 1] === "file");
305
- if (!isFileInput) {
306
- throw new CharlotteError(CharlotteErrorCode.SESSION_ERROR, "Element is not a file input.", "Use charlotte:find to locate file_input elements.");
307
- }
308
- await cdpSession.send("DOM.setFileInputFiles", {
309
- files: filePaths,
310
- backendNodeId,
311
- });
312
- }
313
- finally {
314
- await cdpSession.detach();
315
- }
316
- }
5
+ import { ensureReady, renderActivePage, renderAfterAction, resolveElement, formatPageResponse, handleToolError, coercedBoolean, } from "./tool-helpers.js";
6
+ import { MODIFIER_KEY_MAP, clickElementByBackendNodeId, focusElementByBackendNodeId, hoverElementByBackendNodeId, dragElementToElement, typeIntoElement, selectOptionByBackendNodeId, submitFormByBackendNodeId, setFileInputFiles, waitForPossibleNavigation, } from "./interaction-helpers.js";
7
+ import { registerWaitForTools } from "./wait-for.js";
8
+ // Re-export for backward compatibility (used by dialog and popup integration tests)
9
+ export { waitForPossibleNavigation } from "./interaction-helpers.js";
317
10
  export function registerInteractionTools(server, deps) {
318
11
  const tools = {};
319
- // ─── charlotte:click ───
320
- tools["charlotte:click"] = server.registerTool("charlotte:click", {
12
+ // ─── charlotte_click ───
13
+ tools["charlotte_click"] = server.registerTool("charlotte_click", {
321
14
  description: "Click an interactive element on the page. Returns full page representation after the click.",
322
15
  inputSchema: {
323
16
  element_id: z.string().describe("Target element ID from page representation"),
@@ -332,7 +25,7 @@ export function registerInteractionTools(server, deps) {
332
25
  },
333
26
  }, async ({ element_id, click_type, modifiers }) => {
334
27
  try {
335
- await deps.browserManager.ensureConnected();
28
+ await ensureReady(deps);
336
29
  const { page, backendNodeId } = await resolveElement(deps, element_id);
337
30
  const clickVariant = click_type ?? "left";
338
31
  const activeModifiers = modifiers ?? [];
@@ -349,8 +42,8 @@ export function registerInteractionTools(server, deps) {
349
42
  return handleToolError(error);
350
43
  }
351
44
  });
352
- // ─── charlotte:click_at ───
353
- tools["charlotte:click_at"] = server.registerTool("charlotte:click_at", {
45
+ // ─── charlotte_click_at ───
46
+ tools["charlotte_click_at"] = server.registerTool("charlotte_click_at", {
354
47
  description: "Click at specific page coordinates. Use when target elements are not in the accessibility tree (custom widgets, canvas, non-semantic interactive divs). Dispatches real CDP-level mouse events. Returns full page representation after the click.",
355
48
  inputSchema: {
356
49
  x: z.number().describe("X coordinate in page pixels"),
@@ -366,7 +59,7 @@ export function registerInteractionTools(server, deps) {
366
59
  },
367
60
  }, async ({ x, y, click_type, modifiers }) => {
368
61
  try {
369
- await deps.browserManager.ensureConnected();
62
+ await ensureReady(deps);
370
63
  const page = deps.pageManager.getActivePage();
371
64
  const clickVariant = click_type ?? "left";
372
65
  const activeModifiers = modifiers ?? [];
@@ -413,8 +106,8 @@ export function registerInteractionTools(server, deps) {
413
106
  return handleToolError(error);
414
107
  }
415
108
  });
416
- // ─── charlotte:type ───
417
- tools["charlotte:type"] = server.registerTool("charlotte:type", {
109
+ // ─── charlotte_type ───
110
+ tools["charlotte_type"] = server.registerTool("charlotte_type", {
418
111
  description: "Type text into an input element. Returns full page representation after typing.",
419
112
  inputSchema: {
420
113
  element_id: z.string().describe("Target input element ID"),
@@ -425,20 +118,30 @@ export function registerInteractionTools(server, deps) {
425
118
  press_enter: coercedBoolean
426
119
  .optional()
427
120
  .describe("Press Enter after typing (default: false)"),
121
+ slowly: coercedBoolean
122
+ .optional()
123
+ .describe("Type one character at a time with a delay between keystrokes. Use for sites with autocomplete, search-as-you-type, or per-key validation (default: false)"),
124
+ character_delay: z
125
+ .number()
126
+ .min(1)
127
+ .optional()
128
+ .describe("Milliseconds between keystrokes (implies slowly: true). Default when slowly is true: 50ms"),
428
129
  },
429
- }, async ({ element_id, text, clear_first, press_enter }) => {
130
+ }, async ({ element_id, text, clear_first, press_enter, slowly, character_delay }) => {
430
131
  try {
431
- await deps.browserManager.ensureConnected();
132
+ await ensureReady(deps);
432
133
  const { page, backendNodeId } = await resolveElement(deps, element_id);
433
134
  const shouldClearFirst = clear_first ?? true;
434
135
  const shouldPressEnter = press_enter ?? false;
136
+ const delayMs = character_delay ?? (slowly ? 50 : undefined);
435
137
  logger.info("Typing into element", {
436
138
  element_id,
437
139
  textLength: text.length,
438
140
  clearFirst: shouldClearFirst,
439
141
  pressEnter: shouldPressEnter,
142
+ characterDelay: delayMs,
440
143
  });
441
- await typeIntoElement(page, backendNodeId, text, shouldClearFirst, shouldPressEnter);
144
+ await typeIntoElement(page, backendNodeId, text, shouldClearFirst, shouldPressEnter, delayMs);
442
145
  const representation = await renderAfterAction(deps);
443
146
  return formatPageResponse(representation);
444
147
  }
@@ -446,8 +149,8 @@ export function registerInteractionTools(server, deps) {
446
149
  return handleToolError(error);
447
150
  }
448
151
  });
449
- // ─── charlotte:select ───
450
- tools["charlotte:select"] = server.registerTool("charlotte:select", {
152
+ // ─── charlotte_select ───
153
+ tools["charlotte_select"] = server.registerTool("charlotte_select", {
451
154
  description: "Select an option in a select/dropdown element. Returns full page representation after selection.",
452
155
  inputSchema: {
453
156
  element_id: z.string().describe("Target select element ID"),
@@ -455,7 +158,7 @@ export function registerInteractionTools(server, deps) {
455
158
  },
456
159
  }, async ({ element_id, value }) => {
457
160
  try {
458
- await deps.browserManager.ensureConnected();
161
+ await ensureReady(deps);
459
162
  const { page, backendNodeId } = await resolveElement(deps, element_id);
460
163
  logger.info("Selecting option", { element_id, value });
461
164
  await selectOptionByBackendNodeId(page, backendNodeId, value);
@@ -466,15 +169,15 @@ export function registerInteractionTools(server, deps) {
466
169
  return handleToolError(error);
467
170
  }
468
171
  });
469
- // ─── charlotte:toggle ───
470
- tools["charlotte:toggle"] = server.registerTool("charlotte:toggle", {
172
+ // ─── charlotte_toggle ───
173
+ tools["charlotte_toggle"] = server.registerTool("charlotte_toggle", {
471
174
  description: "Toggle a checkbox or switch element. Returns full page representation after toggle.",
472
175
  inputSchema: {
473
176
  element_id: z.string().describe("Target checkbox or switch element ID"),
474
177
  },
475
178
  }, async ({ element_id }) => {
476
179
  try {
477
- await deps.browserManager.ensureConnected();
180
+ await ensureReady(deps);
478
181
  const { page, backendNodeId } = await resolveElement(deps, element_id);
479
182
  logger.info("Toggling element", { element_id });
480
183
  // Toggle by clicking the element
@@ -487,20 +190,20 @@ export function registerInteractionTools(server, deps) {
487
190
  return handleToolError(error);
488
191
  }
489
192
  });
490
- // ─── charlotte:submit ───
491
- tools["charlotte:submit"] = server.registerTool("charlotte:submit", {
193
+ // ─── charlotte_submit ───
194
+ tools["charlotte_submit"] = server.registerTool("charlotte_submit", {
492
195
  description: "Submit a form. Can submit by form ID or by clicking its submit button. Returns full page representation after submission.",
493
196
  inputSchema: {
494
197
  form_id: z.string().describe("Form ID from page representation"),
495
198
  },
496
199
  }, async ({ form_id }) => {
497
200
  try {
498
- await deps.browserManager.ensureConnected();
201
+ await ensureReady(deps);
499
202
  // Find the form in the current representation
500
203
  const representation = await renderActivePage(deps, { detail: "minimal" });
501
204
  const form = representation.forms.find((f) => f.id === form_id);
502
205
  if (!form) {
503
- throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, `Form '${form_id}' not found on page.`, "Call charlotte:observe to get current page state and verify form IDs.");
206
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, `Form '${form_id}' not found on page.`, "Call charlotte_observe to get current page state and verify form IDs.");
504
207
  }
505
208
  const page = deps.pageManager.getActivePage();
506
209
  // If the form has a submit button, click it
@@ -528,8 +231,8 @@ export function registerInteractionTools(server, deps) {
528
231
  return handleToolError(error);
529
232
  }
530
233
  });
531
- // ─── charlotte:scroll ───
532
- tools["charlotte:scroll"] = server.registerTool("charlotte:scroll", {
234
+ // ─── charlotte_scroll ───
235
+ tools["charlotte_scroll"] = server.registerTool("charlotte_scroll", {
533
236
  description: "Scroll the page or a specific container. Returns full page representation after scrolling.",
534
237
  inputSchema: {
535
238
  direction: z.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
@@ -541,14 +244,15 @@ export function registerInteractionTools(server, deps) {
541
244
  },
542
245
  }, async ({ direction, amount, element_id }) => {
543
246
  try {
544
- await deps.browserManager.ensureConnected();
247
+ await ensureReady(deps);
545
248
  const page = deps.pageManager.getActivePage();
546
249
  const scrollAmount = amount ?? "page";
547
250
  logger.info("Scrolling", { direction, amount: scrollAmount, element_id });
548
251
  // Calculate pixel distance
549
252
  const viewport = page.viewport();
550
- const viewportWidth = viewport?.width ?? 1280;
551
- const viewportHeight = viewport?.height ?? 720;
253
+ const { defaultViewport } = deps.config;
254
+ const viewportWidth = viewport?.width ?? defaultViewport.width;
255
+ const viewportHeight = viewport?.height ?? defaultViewport.height;
552
256
  let pixelDistance;
553
257
  if (scrollAmount === "page") {
554
258
  pixelDistance =
@@ -617,15 +321,15 @@ export function registerInteractionTools(server, deps) {
617
321
  return handleToolError(error);
618
322
  }
619
323
  });
620
- // ─── charlotte:hover ───
621
- tools["charlotte:hover"] = server.registerTool("charlotte:hover", {
324
+ // ─── charlotte_hover ───
325
+ tools["charlotte_hover"] = server.registerTool("charlotte_hover", {
622
326
  description: "Hover over an element to trigger hover states. Returns full page representation after hover.",
623
327
  inputSchema: {
624
328
  element_id: z.string().describe("Target element ID"),
625
329
  },
626
330
  }, async ({ element_id }) => {
627
331
  try {
628
- await deps.browserManager.ensureConnected();
332
+ await ensureReady(deps);
629
333
  const { page, backendNodeId } = await resolveElement(deps, element_id);
630
334
  logger.info("Hovering element", { element_id });
631
335
  await hoverElementByBackendNodeId(page, backendNodeId);
@@ -636,8 +340,8 @@ export function registerInteractionTools(server, deps) {
636
340
  return handleToolError(error);
637
341
  }
638
342
  });
639
- // ─── charlotte:drag ───
640
- tools["charlotte:drag"] = server.registerTool("charlotte:drag", {
343
+ // ─── charlotte_drag ───
344
+ tools["charlotte_drag"] = server.registerTool("charlotte_drag", {
641
345
  description: "Drag an element to another element. Uses mouse primitives to simulate drag-and-drop. Returns full page representation after the drag.",
642
346
  inputSchema: {
643
347
  source_id: z.string().describe("Element ID of the drag source"),
@@ -645,7 +349,7 @@ export function registerInteractionTools(server, deps) {
645
349
  },
646
350
  }, async ({ source_id, target_id }) => {
647
351
  try {
648
- await deps.browserManager.ensureConnected();
352
+ await ensureReady(deps);
649
353
  const { page, backendNodeId: sourceNodeId } = await resolveElement(deps, source_id);
650
354
  const { backendNodeId: targetNodeId } = await resolveElement(deps, target_id);
651
355
  logger.info("Dragging element", { source_id, target_id });
@@ -659,8 +363,8 @@ export function registerInteractionTools(server, deps) {
659
363
  return handleToolError(error);
660
364
  }
661
365
  });
662
- // ─── charlotte:key ───
663
- tools["charlotte:key"] = server.registerTool("charlotte:key", {
366
+ // ─── charlotte_key ───
367
+ tools["charlotte_key"] = server.registerTool("charlotte_key", {
664
368
  description: "Send keyboard input to the page or a specific element. Supports single key with modifiers, or a sequence of keys. Use for keyboard-driven UIs (games, terminals, code editors) and non-input elements with keydown listeners.",
665
369
  inputSchema: {
666
370
  key: z
@@ -687,7 +391,7 @@ export function registerInteractionTools(server, deps) {
687
391
  },
688
392
  }, async ({ key, keys, modifiers, element_id, delay }) => {
689
393
  try {
690
- await deps.browserManager.ensureConnected();
394
+ await ensureReady(deps);
691
395
  const page = deps.pageManager.getActivePage();
692
396
  // Validate: exactly one of key or keys must be provided
693
397
  if (key && keys) {
@@ -734,8 +438,8 @@ export function registerInteractionTools(server, deps) {
734
438
  return handleToolError(error);
735
439
  }
736
440
  });
737
- // ─── charlotte:upload ───
738
- tools["charlotte:upload"] = server.registerTool("charlotte:upload", {
441
+ // ─── charlotte_upload ───
442
+ tools["charlotte_upload"] = server.registerTool("charlotte_upload", {
739
443
  description: "Set files on a file input element. Validates that files exist and that the target is a file input. Returns full page representation after upload.",
740
444
  inputSchema: {
741
445
  element_id: z.string().describe("Target file input element ID"),
@@ -743,7 +447,7 @@ export function registerInteractionTools(server, deps) {
743
447
  },
744
448
  }, async ({ element_id, paths }) => {
745
449
  try {
746
- await deps.browserManager.ensureConnected();
450
+ await ensureReady(deps);
747
451
  const { page, backendNodeId } = await resolveElement(deps, element_id);
748
452
  // Validate all files exist before sending to CDP
749
453
  for (const filePath of paths) {
@@ -763,166 +467,82 @@ export function registerInteractionTools(server, deps) {
763
467
  return handleToolError(error);
764
468
  }
765
469
  });
766
- // ─── charlotte:wait_for ───
767
- tools["charlotte:wait_for"] = server.registerTool("charlotte:wait_for", {
768
- description: "Wait for a condition to be met on the page. Returns page representation when the condition is satisfied, or a TIMEOUT error.",
470
+ // ─── charlotte_fill_form ───
471
+ const FILLABLE_TYPES = new Set([
472
+ "text_input", "textarea", "select", "checkbox", "radio", "toggle", "date_input", "color_input",
473
+ ]);
474
+ tools["charlotte_fill_form"] = server.registerTool("charlotte_fill_form", {
475
+ description: "Fill multiple form fields in a single call. Auto-detects element types (text input, select, checkbox, etc.) and applies the appropriate action. Returns a single page representation with delta covering all changes. Validates all fields before mutating any — if one field is invalid, no fields are changed.",
769
476
  inputSchema: {
770
- element_id: z.string().optional().describe("Wait for specific element to appear/change"),
771
- state: z
772
- .enum(["visible", "hidden", "enabled", "disabled", "exists", "removed"])
773
- .optional()
774
- .describe("Target element state to wait for"),
775
- text: z.string().optional().describe("Wait for text to appear on the page"),
776
- selector: z.string().optional().describe("Wait for CSS selector to match"),
777
- js: z.string().optional().describe("Wait for JS expression to return truthy"),
778
- timeout: z.number().optional().describe("Max wait in ms (default: 10000)"),
477
+ fields: z
478
+ .array(z.object({
479
+ element_id: z.string().describe("Element ID of the form field"),
480
+ value: z.string().describe("Value to set: text for inputs/textareas, option value or text for selects. For checkbox/radio/toggle the element is clicked (toggling its state) and value is ignored."),
481
+ }))
482
+ .min(1)
483
+ .describe("Array of {element_id, value} pairs to fill"),
779
484
  },
780
- }, async ({ element_id, state, text, selector, js, timeout }) => {
485
+ }, async ({ fields }) => {
781
486
  try {
782
- await deps.browserManager.ensureConnected();
783
- const page = deps.pageManager.getActivePage();
784
- const waitTimeout = timeout ?? 10000;
785
- // Validate that at least one condition is provided
786
- if (!element_id && !text && !selector && !js) {
787
- throw new CharlotteError(CharlotteErrorCode.SESSION_ERROR, "At least one wait condition is required (element_id, text, selector, or js).");
487
+ await ensureReady(deps);
488
+ // Render to get element types from the interactive array
489
+ const representation = await renderActivePage(deps, { detail: "minimal" });
490
+ // Validate all fields up front before performing any actions
491
+ const resolvedFields = [];
492
+ for (const field of fields) {
493
+ // Check type before resolving — gives better errors for non-fillable elements
494
+ const element = representation.interactive.find((el) => el.id === field.element_id);
495
+ if (!element) {
496
+ // Fall through to resolveElement for proper "not found" with suggestions
497
+ await resolveElement(deps, field.element_id);
498
+ // If resolveElement didn't throw, the element exists but isn't interactive
499
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, `Element '${field.element_id}' is not an interactive form field.`, "Call charlotte_find to locate form fields by role or text.");
500
+ }
501
+ if (!FILLABLE_TYPES.has(element.type)) {
502
+ const hint = element.type === "file_input"
503
+ ? "Use charlotte_upload for file inputs."
504
+ : "fill_form supports: text_input, textarea, select, checkbox, radio, toggle, date_input, color_input.";
505
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_INTERACTIVE, `Element '${field.element_id}' is type '${element.type}' which cannot be filled.`, hint);
506
+ }
507
+ const resolved = await resolveElement(deps, field.element_id);
508
+ resolvedFields.push({
509
+ backendNodeId: resolved.backendNodeId,
510
+ type: element.type,
511
+ value: field.value,
512
+ page: resolved.page,
513
+ });
788
514
  }
789
- logger.info("Waiting for condition", {
790
- element_id,
791
- state,
792
- text,
793
- selector,
794
- js,
795
- timeout: waitTimeout,
796
- });
797
- // Build a composite wait condition
798
- const satisfied = await pollWaitForCondition(deps, page, { element_id, state, text, selector, js }, waitTimeout);
799
- if (!satisfied) {
800
- const representation = await renderAfterAction(deps);
801
- const timeoutError = new CharlotteError(CharlotteErrorCode.TIMEOUT, `Wait condition not met within ${waitTimeout}ms.`, "The current page state is included in the response. Consider increasing timeout or adjusting your condition.");
802
- return {
803
- content: [
804
- {
805
- type: "text",
806
- text: JSON.stringify({
807
- ...timeoutError.toResponse(),
808
- page: representation,
809
- }),
810
- },
811
- ],
812
- isError: true,
813
- };
515
+ logger.info("Filling form fields", { fieldCount: resolvedFields.length });
516
+ // Fill each field using the appropriate action
517
+ for (const field of resolvedFields) {
518
+ switch (field.type) {
519
+ case "text_input":
520
+ case "textarea":
521
+ case "date_input":
522
+ case "color_input":
523
+ await typeIntoElement(field.page, field.backendNodeId, field.value, true, false);
524
+ break;
525
+ case "select":
526
+ await selectOptionByBackendNodeId(field.page, field.backendNodeId, field.value);
527
+ break;
528
+ case "checkbox":
529
+ case "radio":
530
+ case "toggle":
531
+ await clickElementByBackendNodeId(field.page, field.backendNodeId, "left");
532
+ break;
533
+ }
814
534
  }
815
- const representation = await renderAfterAction(deps);
816
- return formatPageResponse(representation);
535
+ // Single render after all fields are filled
536
+ const result = await renderAfterAction(deps);
537
+ return formatPageResponse(result);
817
538
  }
818
539
  catch (error) {
819
540
  return handleToolError(error);
820
541
  }
821
542
  });
543
+ // ─── charlotte_wait_for (delegated to wait-for.ts) ───
544
+ const waitForTools = registerWaitForTools(server, deps);
545
+ Object.assign(tools, waitForTools);
822
546
  return tools;
823
547
  }
824
- /**
825
- * Poll for complex wait_for conditions that may involve element state checks.
826
- */
827
- async function pollWaitForCondition(deps, page, condition, timeoutMs) {
828
- const pollInterval = 100;
829
- const deadline = Date.now() + timeoutMs;
830
- while (Date.now() < deadline) {
831
- let allSatisfied = true;
832
- // Check element_id + state condition
833
- if (condition.element_id) {
834
- const targetState = condition.state ?? "exists";
835
- const elementSatisfied = await checkElementCondition(deps, condition.element_id, targetState);
836
- if (!elementSatisfied)
837
- allSatisfied = false;
838
- }
839
- // Check text condition
840
- if (allSatisfied && condition.text) {
841
- const textFound = await page.evaluate((searchText) => {
842
- return document.body?.innerText?.includes(searchText) ?? false;
843
- }, condition.text);
844
- if (!textFound)
845
- allSatisfied = false;
846
- }
847
- // Check selector condition
848
- if (allSatisfied && condition.selector) {
849
- const selectorMatched = await page.$(condition.selector);
850
- if (!selectorMatched)
851
- allSatisfied = false;
852
- }
853
- // Check JS condition via CDP Runtime.evaluate (matches evaluate.ts pattern)
854
- if (allSatisfied && condition.js) {
855
- const cdpSession = await page.createCDPSession();
856
- try {
857
- const evalResult = await cdpSession.send("Runtime.evaluate", {
858
- expression: condition.js,
859
- returnByValue: true,
860
- awaitPromise: true,
861
- timeout: Math.max(0, deadline - Date.now()),
862
- });
863
- const isTruthy = !evalResult.exceptionDetails && !!evalResult.result.value;
864
- if (!isTruthy)
865
- allSatisfied = false;
866
- }
867
- catch {
868
- allSatisfied = false;
869
- }
870
- finally {
871
- await cdpSession.detach().catch(() => { });
872
- }
873
- }
874
- if (allSatisfied)
875
- return true;
876
- const remainingTime = deadline - Date.now();
877
- if (remainingTime <= 0)
878
- break;
879
- await new Promise((resolve) => setTimeout(resolve, Math.min(pollInterval, remainingTime)));
880
- }
881
- return false;
882
- }
883
- /**
884
- * Check if an element meets a specific state condition.
885
- */
886
- async function checkElementCondition(deps, elementId, targetState) {
887
- switch (targetState) {
888
- case "exists": {
889
- const backendNodeId = deps.elementIdGenerator.resolveId(elementId);
890
- return backendNodeId !== null;
891
- }
892
- case "removed": {
893
- const backendNodeId = deps.elementIdGenerator.resolveId(elementId);
894
- if (backendNodeId !== null) {
895
- // Re-render to check if it's truly still there
896
- await renderActivePage(deps, { detail: "minimal" });
897
- return deps.elementIdGenerator.resolveId(elementId) === null;
898
- }
899
- return true;
900
- }
901
- case "visible":
902
- case "hidden":
903
- case "enabled":
904
- case "disabled": {
905
- // Re-render to get fresh state
906
- const representation = await renderActivePage(deps, { detail: "minimal" });
907
- const element = representation.interactive.find((el) => el.id === elementId);
908
- if (!element) {
909
- // Element doesn't exist — "hidden" and "disabled" are satisfied, others not
910
- return targetState === "hidden" || targetState === "disabled";
911
- }
912
- switch (targetState) {
913
- case "visible":
914
- return element.state.visible === true;
915
- case "hidden":
916
- return element.state.visible === false;
917
- case "enabled":
918
- return element.state.enabled === true;
919
- case "disabled":
920
- return element.state.enabled === false;
921
- }
922
- return false;
923
- }
924
- default:
925
- return false;
926
- }
927
- }
928
548
  //# sourceMappingURL=interaction.js.map