@ticktockbent/charlotte 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/LICENSE +21 -0
  3. package/README.md +254 -0
  4. package/dist/browser/browser-manager.d.ts +14 -0
  5. package/dist/browser/browser-manager.d.ts.map +1 -0
  6. package/dist/browser/browser-manager.js +72 -0
  7. package/dist/browser/browser-manager.js.map +1 -0
  8. package/dist/browser/cdp-session.d.ts +7 -0
  9. package/dist/browser/cdp-session.d.ts.map +1 -0
  10. package/dist/browser/cdp-session.js +35 -0
  11. package/dist/browser/cdp-session.js.map +1 -0
  12. package/dist/browser/page-manager.d.ts +30 -0
  13. package/dist/browser/page-manager.d.ts.map +1 -0
  14. package/dist/browser/page-manager.js +123 -0
  15. package/dist/browser/page-manager.js.map +1 -0
  16. package/dist/dev/auditor.d.ts +39 -0
  17. package/dist/dev/auditor.d.ts.map +1 -0
  18. package/dist/dev/auditor.js +474 -0
  19. package/dist/dev/auditor.js.map +1 -0
  20. package/dist/dev/dev-mode-state.d.ts +24 -0
  21. package/dist/dev/dev-mode-state.d.ts.map +1 -0
  22. package/dist/dev/dev-mode-state.js +93 -0
  23. package/dist/dev/dev-mode-state.js.map +1 -0
  24. package/dist/dev/file-watcher.d.ts +20 -0
  25. package/dist/dev/file-watcher.d.ts.map +1 -0
  26. package/dist/dev/file-watcher.js +78 -0
  27. package/dist/dev/file-watcher.js.map +1 -0
  28. package/dist/dev/static-server.d.ts +18 -0
  29. package/dist/dev/static-server.d.ts.map +1 -0
  30. package/dist/dev/static-server.js +73 -0
  31. package/dist/dev/static-server.js.map +1 -0
  32. package/dist/index.d.ts +3 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +60 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/renderer/accessibility-extractor.d.ts +19 -0
  37. package/dist/renderer/accessibility-extractor.d.ts.map +1 -0
  38. package/dist/renderer/accessibility-extractor.js +138 -0
  39. package/dist/renderer/accessibility-extractor.js.map +1 -0
  40. package/dist/renderer/content-extractor.d.ts +6 -0
  41. package/dist/renderer/content-extractor.d.ts.map +1 -0
  42. package/dist/renderer/content-extractor.js +150 -0
  43. package/dist/renderer/content-extractor.js.map +1 -0
  44. package/dist/renderer/dom-path.d.ts +4 -0
  45. package/dist/renderer/dom-path.d.ts.map +1 -0
  46. package/dist/renderer/dom-path.js +34 -0
  47. package/dist/renderer/dom-path.js.map +1 -0
  48. package/dist/renderer/element-id-generator.d.ts +19 -0
  49. package/dist/renderer/element-id-generator.d.ts.map +1 -0
  50. package/dist/renderer/element-id-generator.js +73 -0
  51. package/dist/renderer/element-id-generator.js.map +1 -0
  52. package/dist/renderer/interactive-extractor.d.ts +13 -0
  53. package/dist/renderer/interactive-extractor.d.ts.map +1 -0
  54. package/dist/renderer/interactive-extractor.js +161 -0
  55. package/dist/renderer/interactive-extractor.js.map +1 -0
  56. package/dist/renderer/layout-extractor.d.ts +8 -0
  57. package/dist/renderer/layout-extractor.d.ts.map +1 -0
  58. package/dist/renderer/layout-extractor.js +48 -0
  59. package/dist/renderer/layout-extractor.js.map +1 -0
  60. package/dist/renderer/renderer-pipeline.d.ts +26 -0
  61. package/dist/renderer/renderer-pipeline.d.ts.map +1 -0
  62. package/dist/renderer/renderer-pipeline.js +163 -0
  63. package/dist/renderer/renderer-pipeline.js.map +1 -0
  64. package/dist/server.d.ts +19 -0
  65. package/dist/server.d.ts.map +1 -0
  66. package/dist/server.js +39 -0
  67. package/dist/server.js.map +1 -0
  68. package/dist/state/differ.d.ts +9 -0
  69. package/dist/state/differ.d.ts.map +1 -0
  70. package/dist/state/differ.js +295 -0
  71. package/dist/state/differ.js.map +1 -0
  72. package/dist/state/snapshot-store.d.ts +52 -0
  73. package/dist/state/snapshot-store.d.ts.map +1 -0
  74. package/dist/state/snapshot-store.js +98 -0
  75. package/dist/state/snapshot-store.js.map +1 -0
  76. package/dist/tools/dev-mode.d.ts +4 -0
  77. package/dist/tools/dev-mode.d.ts.map +1 -0
  78. package/dist/tools/dev-mode.js +160 -0
  79. package/dist/tools/dev-mode.js.map +1 -0
  80. package/dist/tools/evaluate.d.ts +10 -0
  81. package/dist/tools/evaluate.d.ts.map +1 -0
  82. package/dist/tools/evaluate.js +109 -0
  83. package/dist/tools/evaluate.js.map +1 -0
  84. package/dist/tools/interaction.d.ts +4 -0
  85. package/dist/tools/interaction.d.ts.map +1 -0
  86. package/dist/tools/interaction.js +680 -0
  87. package/dist/tools/interaction.js.map +1 -0
  88. package/dist/tools/navigation.d.ts +4 -0
  89. package/dist/tools/navigation.d.ts.map +1 -0
  90. package/dist/tools/navigation.js +136 -0
  91. package/dist/tools/navigation.js.map +1 -0
  92. package/dist/tools/observation.d.ts +4 -0
  93. package/dist/tools/observation.d.ts.map +1 -0
  94. package/dist/tools/observation.js +278 -0
  95. package/dist/tools/observation.js.map +1 -0
  96. package/dist/tools/session.d.ts +4 -0
  97. package/dist/tools/session.d.ts.map +1 -0
  98. package/dist/tools/session.js +372 -0
  99. package/dist/tools/session.js.map +1 -0
  100. package/dist/tools/tool-helpers.d.ts +89 -0
  101. package/dist/tools/tool-helpers.d.ts.map +1 -0
  102. package/dist/tools/tool-helpers.js +127 -0
  103. package/dist/tools/tool-helpers.js.map +1 -0
  104. package/dist/types/config.d.ts +7 -0
  105. package/dist/types/config.d.ts.map +1 -0
  106. package/dist/types/config.js +7 -0
  107. package/dist/types/config.js.map +1 -0
  108. package/dist/types/element-id.d.ts +8 -0
  109. package/dist/types/element-id.d.ts.map +1 -0
  110. package/dist/types/element-id.js +19 -0
  111. package/dist/types/element-id.js.map +1 -0
  112. package/dist/types/errors.d.ts +22 -0
  113. package/dist/types/errors.d.ts.map +1 -0
  114. package/dist/types/errors.js +30 -0
  115. package/dist/types/errors.js.map +1 -0
  116. package/dist/types/page-representation.d.ts +84 -0
  117. package/dist/types/page-representation.d.ts.map +1 -0
  118. package/dist/types/page-representation.js +2 -0
  119. package/dist/types/page-representation.js.map +1 -0
  120. package/dist/types/snapshot.d.ts +22 -0
  121. package/dist/types/snapshot.d.ts.map +1 -0
  122. package/dist/types/snapshot.js +2 -0
  123. package/dist/types/snapshot.js.map +1 -0
  124. package/dist/utils/hash.d.ts +2 -0
  125. package/dist/utils/hash.d.ts.map +1 -0
  126. package/dist/utils/hash.js +6 -0
  127. package/dist/utils/hash.js.map +1 -0
  128. package/dist/utils/logger.d.ts +9 -0
  129. package/dist/utils/logger.d.ts.map +1 -0
  130. package/dist/utils/logger.js +31 -0
  131. package/dist/utils/logger.js.map +1 -0
  132. package/dist/utils/wait.d.ts +21 -0
  133. package/dist/utils/wait.d.ts.map +1 -0
  134. package/dist/utils/wait.js +55 -0
  135. package/dist/utils/wait.js.map +1 -0
  136. package/package.json +67 -0
@@ -0,0 +1,680 @@
1
+ import { z } from "zod";
2
+ import { CharlotteError, CharlotteErrorCode } from "../types/errors.js";
3
+ import { logger } from "../utils/logger.js";
4
+ import { renderActivePage, renderAfterAction, resolveElement, formatPageResponse, handleToolError, } from "./tool-helpers.js";
5
+ /**
6
+ * Click an element by backend node ID using CDP to get coordinates,
7
+ * or more simply by resolving to an XPath/selector and using page.click.
8
+ *
9
+ * The most reliable approach: use CDP to get the element's coordinates, then click at those coords.
10
+ */
11
+ async function clickElementByBackendNodeId(page, backendNodeId, clickType = "left") {
12
+ // Get the element's box model to find clickable coordinates
13
+ const cdpSession = await page.createCDPSession();
14
+ try {
15
+ // First, scroll the element into view
16
+ await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
17
+ // Get box model for coordinates
18
+ const { model } = await cdpSession.send("DOM.getBoxModel", {
19
+ backendNodeId,
20
+ });
21
+ if (!model) {
22
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Element has no visible box model — it may be hidden or zero-sized.", "Call charlotte:observe to check the element's state.");
23
+ }
24
+ // content quad: [x1,y1, x2,y2, x3,y3, x4,y4]
25
+ const contentQuad = model.content;
26
+ const centerX = (contentQuad[0] + contentQuad[2] + contentQuad[4] + contentQuad[6]) / 4;
27
+ const centerY = (contentQuad[1] + contentQuad[3] + contentQuad[5] + contentQuad[7]) / 4;
28
+ if (clickType === "right") {
29
+ await page.mouse.click(centerX, centerY, { button: "right" });
30
+ }
31
+ else if (clickType === "double") {
32
+ await page.mouse.click(centerX, centerY, { clickCount: 2 });
33
+ }
34
+ else {
35
+ await page.mouse.click(centerX, centerY);
36
+ }
37
+ }
38
+ finally {
39
+ await cdpSession.detach();
40
+ }
41
+ }
42
+ /**
43
+ * Wait for any navigation triggered by an action, or fall back to a brief settle pause.
44
+ *
45
+ * Listens for the `framenavigated` CDP event to detect if a click caused navigation.
46
+ * If navigation is detected within `detectionWindowMs`, waits for the page load event
47
+ * (up to `loadTimeoutMs`). If no navigation fires, returns after `settleMs`.
48
+ */
49
+ async function waitForPossibleNavigation(page, action, { detectionWindowMs = 500, loadTimeoutMs = 10000, settleMs = 50 } = {}) {
50
+ let navigationDetected = false;
51
+ // Listen for navigation start via page event (fires on any navigation)
52
+ const navigationStartPromise = new Promise((resolve) => {
53
+ const handler = () => {
54
+ navigationDetected = true;
55
+ page.off("framenavigated", handler);
56
+ resolve();
57
+ };
58
+ page.on("framenavigated", handler);
59
+ // Clean up listener if no navigation fires within detection window
60
+ setTimeout(() => {
61
+ page.off("framenavigated", handler);
62
+ resolve();
63
+ }, detectionWindowMs);
64
+ });
65
+ // Dispatch the action
66
+ await action();
67
+ // Wait for either navigation detection or detection window to expire
68
+ await navigationStartPromise;
69
+ if (navigationDetected) {
70
+ // Navigation occurred — wait for the page to finish loading
71
+ try {
72
+ await page.waitForNavigation({ waitUntil: "load", timeout: loadTimeoutMs });
73
+ }
74
+ catch {
75
+ // Page may have already finished loading before we called waitForNavigation,
76
+ // or the load timed out. Either way, render what we have.
77
+ logger.debug("Post-navigation load wait ended (page may already be loaded)");
78
+ }
79
+ }
80
+ else {
81
+ // No navigation — brief settle for in-page DOM updates
82
+ await new Promise((resolve) => setTimeout(resolve, settleMs));
83
+ }
84
+ }
85
+ /**
86
+ * Focus an element by backend node ID using CDP.
87
+ */
88
+ async function focusElementByBackendNodeId(page, backendNodeId) {
89
+ const cdpSession = await page.createCDPSession();
90
+ try {
91
+ await cdpSession.send("DOM.focus", { backendNodeId });
92
+ }
93
+ finally {
94
+ await cdpSession.detach();
95
+ }
96
+ }
97
+ /**
98
+ * Scroll an element into view by backend node ID.
99
+ */
100
+ async function scrollIntoViewByBackendNodeId(page, backendNodeId) {
101
+ const cdpSession = await page.createCDPSession();
102
+ try {
103
+ await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
104
+ }
105
+ finally {
106
+ await cdpSession.detach();
107
+ }
108
+ }
109
+ /**
110
+ * Hover over an element by backend node ID.
111
+ */
112
+ async function hoverElementByBackendNodeId(page, backendNodeId) {
113
+ const cdpSession = await page.createCDPSession();
114
+ try {
115
+ await cdpSession.send("DOM.scrollIntoViewIfNeeded", { backendNodeId });
116
+ const { model } = await cdpSession.send("DOM.getBoxModel", {
117
+ backendNodeId,
118
+ });
119
+ if (!model) {
120
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Element has no visible box model for hover.");
121
+ }
122
+ const contentQuad = model.content;
123
+ const centerX = (contentQuad[0] + contentQuad[2] + contentQuad[4] + contentQuad[6]) / 4;
124
+ const centerY = (contentQuad[1] + contentQuad[3] + contentQuad[5] + contentQuad[7]) / 4;
125
+ await page.mouse.move(centerX, centerY);
126
+ }
127
+ finally {
128
+ await cdpSession.detach();
129
+ }
130
+ }
131
+ /**
132
+ * Type text into an input element. Uses CDP to focus, optionally clears, then types via keyboard.
133
+ */
134
+ async function typeIntoElement(page, backendNodeId, text, clearFirst, pressEnter) {
135
+ // Focus the element
136
+ await focusElementByBackendNodeId(page, backendNodeId);
137
+ if (clearFirst) {
138
+ // Select all text then delete — works cross-platform
139
+ await page.keyboard.down("Control");
140
+ await page.keyboard.press("a");
141
+ await page.keyboard.up("Control");
142
+ await page.keyboard.press("Backspace");
143
+ }
144
+ // Type the text character by character
145
+ await page.keyboard.type(text);
146
+ if (pressEnter) {
147
+ await page.keyboard.press("Enter");
148
+ }
149
+ }
150
+ /**
151
+ * Select a value in a <select> element using CDP to set the value and dispatch change events.
152
+ */
153
+ async function selectOptionByBackendNodeId(page, backendNodeId, value) {
154
+ const cdpSession = await page.createCDPSession();
155
+ try {
156
+ // Resolve the node to get a remote object reference
157
+ const { object } = await cdpSession.send("DOM.resolveNode", {
158
+ backendNodeId,
159
+ });
160
+ if (!object?.objectId) {
161
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Could not resolve select element.");
162
+ }
163
+ // Use Runtime.callFunctionOn to set the value and fire events
164
+ await cdpSession.send("Runtime.callFunctionOn", {
165
+ objectId: object.objectId,
166
+ functionDeclaration: `function(targetValue) {
167
+ const options = Array.from(this.options);
168
+ const matchByValue = options.find(o => o.value === targetValue);
169
+ const matchByText = options.find(o => o.textContent.trim() === targetValue);
170
+ const match = matchByValue || matchByText;
171
+ if (match) {
172
+ this.value = match.value;
173
+ this.dispatchEvent(new Event('input', { bubbles: true }));
174
+ this.dispatchEvent(new Event('change', { bubbles: true }));
175
+ } else {
176
+ throw new Error('Option "' + targetValue + '" not found');
177
+ }
178
+ }`,
179
+ arguments: [{ value }],
180
+ });
181
+ }
182
+ finally {
183
+ await cdpSession.detach();
184
+ }
185
+ }
186
+ /**
187
+ * Submit a form by backend node ID — calls form.submit() via CDP.
188
+ */
189
+ async function submitFormByBackendNodeId(page, backendNodeId) {
190
+ const cdpSession = await page.createCDPSession();
191
+ try {
192
+ const { object } = await cdpSession.send("DOM.resolveNode", {
193
+ backendNodeId,
194
+ });
195
+ if (!object?.objectId) {
196
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, "Could not resolve form element.");
197
+ }
198
+ await cdpSession.send("Runtime.callFunctionOn", {
199
+ objectId: object.objectId,
200
+ functionDeclaration: `function() {
201
+ this.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true }));
202
+ }`,
203
+ });
204
+ }
205
+ finally {
206
+ await cdpSession.detach();
207
+ }
208
+ }
209
+ export function registerInteractionTools(server, deps) {
210
+ // ─── charlotte:click ───
211
+ server.registerTool("charlotte:click", {
212
+ description: "Click an interactive element on the page. Returns full page representation after the click.",
213
+ inputSchema: {
214
+ element_id: z.string().describe("Target element ID from page representation"),
215
+ click_type: z
216
+ .enum(["left", "right", "double"])
217
+ .optional()
218
+ .describe('Click type: "left" (default), "right", "double"'),
219
+ },
220
+ }, async ({ element_id, click_type }) => {
221
+ try {
222
+ await deps.browserManager.ensureConnected();
223
+ const { page, backendNodeId } = await resolveElement(deps, element_id);
224
+ const clickVariant = click_type ?? "left";
225
+ logger.info("Clicking element", { element_id, clickType: clickVariant });
226
+ await waitForPossibleNavigation(page, () => clickElementByBackendNodeId(page, backendNodeId, clickVariant));
227
+ const representation = await renderAfterAction(deps);
228
+ return formatPageResponse(representation);
229
+ }
230
+ catch (error) {
231
+ return handleToolError(error);
232
+ }
233
+ });
234
+ // ─── charlotte:type ───
235
+ server.registerTool("charlotte:type", {
236
+ description: "Type text into an input element. Returns full page representation after typing.",
237
+ inputSchema: {
238
+ element_id: z.string().describe("Target input element ID"),
239
+ text: z.string().describe("Text to enter"),
240
+ clear_first: z
241
+ .boolean()
242
+ .optional()
243
+ .describe("Clear existing value before typing (default: true)"),
244
+ press_enter: z
245
+ .boolean()
246
+ .optional()
247
+ .describe("Press Enter after typing (default: false)"),
248
+ },
249
+ }, async ({ element_id, text, clear_first, press_enter }) => {
250
+ try {
251
+ await deps.browserManager.ensureConnected();
252
+ const { page, backendNodeId } = await resolveElement(deps, element_id);
253
+ const shouldClearFirst = clear_first ?? true;
254
+ const shouldPressEnter = press_enter ?? false;
255
+ logger.info("Typing into element", {
256
+ element_id,
257
+ textLength: text.length,
258
+ clearFirst: shouldClearFirst,
259
+ pressEnter: shouldPressEnter,
260
+ });
261
+ await typeIntoElement(page, backendNodeId, text, shouldClearFirst, shouldPressEnter);
262
+ const representation = await renderAfterAction(deps);
263
+ return formatPageResponse(representation);
264
+ }
265
+ catch (error) {
266
+ return handleToolError(error);
267
+ }
268
+ });
269
+ // ─── charlotte:select ───
270
+ server.registerTool("charlotte:select", {
271
+ description: "Select an option in a select/dropdown element. Returns full page representation after selection.",
272
+ inputSchema: {
273
+ element_id: z.string().describe("Target select element ID"),
274
+ value: z.string().describe("Value or text of the option to select"),
275
+ },
276
+ }, async ({ element_id, value }) => {
277
+ try {
278
+ await deps.browserManager.ensureConnected();
279
+ const { page, backendNodeId } = await resolveElement(deps, element_id);
280
+ logger.info("Selecting option", { element_id, value });
281
+ await selectOptionByBackendNodeId(page, backendNodeId, value);
282
+ const representation = await renderAfterAction(deps);
283
+ return formatPageResponse(representation);
284
+ }
285
+ catch (error) {
286
+ return handleToolError(error);
287
+ }
288
+ });
289
+ // ─── charlotte:toggle ───
290
+ server.registerTool("charlotte:toggle", {
291
+ description: "Toggle a checkbox or switch element. Returns full page representation after toggle.",
292
+ inputSchema: {
293
+ element_id: z.string().describe("Target checkbox or switch element ID"),
294
+ },
295
+ }, async ({ element_id }) => {
296
+ try {
297
+ await deps.browserManager.ensureConnected();
298
+ const { page, backendNodeId } = await resolveElement(deps, element_id);
299
+ logger.info("Toggling element", { element_id });
300
+ // Toggle by clicking the element
301
+ await clickElementByBackendNodeId(page, backendNodeId, "left");
302
+ await new Promise((resolve) => setTimeout(resolve, 50));
303
+ const representation = await renderAfterAction(deps);
304
+ return formatPageResponse(representation);
305
+ }
306
+ catch (error) {
307
+ return handleToolError(error);
308
+ }
309
+ });
310
+ // ─── charlotte:submit ───
311
+ server.registerTool("charlotte:submit", {
312
+ description: "Submit a form. Can submit by form ID or by clicking its submit button. Returns full page representation after submission.",
313
+ inputSchema: {
314
+ form_id: z.string().describe("Form ID from page representation"),
315
+ },
316
+ }, async ({ form_id }) => {
317
+ try {
318
+ await deps.browserManager.ensureConnected();
319
+ // Find the form in the current representation
320
+ const representation = await renderActivePage(deps, { detail: "minimal" });
321
+ const form = representation.forms.find((f) => f.id === form_id);
322
+ if (!form) {
323
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, `Form '${form_id}' not found on page.`, "Call charlotte:observe to get current page state and verify form IDs.");
324
+ }
325
+ const page = deps.pageManager.getActivePage();
326
+ // If the form has a submit button, click it
327
+ if (form.submit) {
328
+ const submitResolved = await resolveElement(deps, form.submit);
329
+ logger.info("Submitting form via submit button", {
330
+ form_id,
331
+ submitButton: form.submit,
332
+ });
333
+ await waitForPossibleNavigation(page, () => clickElementByBackendNodeId(page, submitResolved.backendNodeId, "left"));
334
+ }
335
+ else {
336
+ // Fall back to dispatching submit event on the form itself
337
+ const formBackendNodeId = deps.elementIdGenerator.resolveId(form_id);
338
+ if (formBackendNodeId === null) {
339
+ throw new CharlotteError(CharlotteErrorCode.ELEMENT_NOT_FOUND, `Could not resolve form '${form_id}' to a DOM element.`);
340
+ }
341
+ logger.info("Submitting form via submit event", { form_id });
342
+ await waitForPossibleNavigation(page, () => submitFormByBackendNodeId(page, formBackendNodeId));
343
+ }
344
+ const updatedRepresentation = await renderAfterAction(deps);
345
+ return formatPageResponse(updatedRepresentation);
346
+ }
347
+ catch (error) {
348
+ return handleToolError(error);
349
+ }
350
+ });
351
+ // ─── charlotte:scroll ───
352
+ server.registerTool("charlotte:scroll", {
353
+ description: "Scroll the page or a specific container. Returns full page representation after scrolling.",
354
+ inputSchema: {
355
+ direction: z
356
+ .enum(["up", "down", "left", "right"])
357
+ .describe("Scroll direction"),
358
+ amount: z
359
+ .string()
360
+ .optional()
361
+ .describe('Scroll amount: "page" (default), "half", or pixel value (e.g. "200")'),
362
+ element_id: z
363
+ .string()
364
+ .optional()
365
+ .describe("Scroll within a specific container element"),
366
+ },
367
+ }, async ({ direction, amount, element_id }) => {
368
+ try {
369
+ await deps.browserManager.ensureConnected();
370
+ const page = deps.pageManager.getActivePage();
371
+ const scrollAmount = amount ?? "page";
372
+ logger.info("Scrolling", { direction, amount: scrollAmount, element_id });
373
+ // Calculate pixel distance
374
+ const viewport = page.viewport();
375
+ const viewportWidth = viewport?.width ?? 1280;
376
+ const viewportHeight = viewport?.height ?? 720;
377
+ let pixelDistance;
378
+ if (scrollAmount === "page") {
379
+ pixelDistance =
380
+ direction === "left" || direction === "right"
381
+ ? viewportWidth
382
+ : viewportHeight;
383
+ }
384
+ else if (scrollAmount === "half") {
385
+ pixelDistance =
386
+ direction === "left" || direction === "right"
387
+ ? viewportWidth / 2
388
+ : viewportHeight / 2;
389
+ }
390
+ else {
391
+ pixelDistance = parseInt(scrollAmount, 10);
392
+ if (isNaN(pixelDistance)) {
393
+ throw new CharlotteError(CharlotteErrorCode.SESSION_ERROR, `Invalid scroll amount: "${scrollAmount}". Use "page", "half", or a pixel value.`);
394
+ }
395
+ }
396
+ // Determine scroll deltas
397
+ let deltaX = 0;
398
+ let deltaY = 0;
399
+ switch (direction) {
400
+ case "up":
401
+ deltaY = -pixelDistance;
402
+ break;
403
+ case "down":
404
+ deltaY = pixelDistance;
405
+ break;
406
+ case "left":
407
+ deltaX = -pixelDistance;
408
+ break;
409
+ case "right":
410
+ deltaX = pixelDistance;
411
+ break;
412
+ }
413
+ if (element_id) {
414
+ // Scroll within a specific container
415
+ const { backendNodeId } = await resolveElement(deps, element_id);
416
+ const cdpSession = await page.createCDPSession();
417
+ try {
418
+ const { object } = await cdpSession.send("DOM.resolveNode", {
419
+ backendNodeId,
420
+ });
421
+ if (object?.objectId) {
422
+ await cdpSession.send("Runtime.callFunctionOn", {
423
+ objectId: object.objectId,
424
+ functionDeclaration: `function(dx, dy) {
425
+ this.scrollBy(dx, dy);
426
+ }`,
427
+ arguments: [{ value: deltaX }, { value: deltaY }],
428
+ });
429
+ }
430
+ }
431
+ finally {
432
+ await cdpSession.detach();
433
+ }
434
+ }
435
+ else {
436
+ // Scroll the page
437
+ await page.evaluate((dx, dy) => {
438
+ window.scrollBy(dx, dy);
439
+ }, deltaX, deltaY);
440
+ }
441
+ await new Promise((resolve) => setTimeout(resolve, 50));
442
+ const representation = await renderAfterAction(deps);
443
+ return formatPageResponse(representation);
444
+ }
445
+ catch (error) {
446
+ return handleToolError(error);
447
+ }
448
+ });
449
+ // ─── charlotte:hover ───
450
+ server.registerTool("charlotte:hover", {
451
+ description: "Hover over an element to trigger hover states. Returns full page representation after hover.",
452
+ inputSchema: {
453
+ element_id: z.string().describe("Target element ID"),
454
+ },
455
+ }, async ({ element_id }) => {
456
+ try {
457
+ await deps.browserManager.ensureConnected();
458
+ const { page, backendNodeId } = await resolveElement(deps, element_id);
459
+ logger.info("Hovering element", { element_id });
460
+ await hoverElementByBackendNodeId(page, backendNodeId);
461
+ const representation = await renderAfterAction(deps);
462
+ return formatPageResponse(representation);
463
+ }
464
+ catch (error) {
465
+ return handleToolError(error);
466
+ }
467
+ });
468
+ // ─── charlotte:key ───
469
+ server.registerTool("charlotte:key", {
470
+ description: 'Press a keyboard key, optionally with modifiers. Returns full page representation after keypress.',
471
+ inputSchema: {
472
+ key: z
473
+ .string()
474
+ .describe('Key name: "Escape", "Tab", "Enter", "ArrowDown", "ArrowUp", "ArrowLeft", "ArrowRight", "Backspace", "Delete", "Home", "End", "PageUp", "PageDown", or a single character'),
475
+ modifiers: z
476
+ .array(z.enum(["ctrl", "shift", "alt", "meta"]))
477
+ .optional()
478
+ .describe('Modifier keys to hold: ["ctrl"], ["shift"], ["alt"], ["meta"]'),
479
+ },
480
+ }, async ({ key, modifiers }) => {
481
+ try {
482
+ await deps.browserManager.ensureConnected();
483
+ const page = deps.pageManager.getActivePage();
484
+ logger.info("Pressing key", { key, modifiers });
485
+ // Hold down modifiers
486
+ const activeModifiers = modifiers ?? [];
487
+ for (const modifier of activeModifiers) {
488
+ const modifierKey = MODIFIER_KEY_MAP[modifier];
489
+ await page.keyboard.down(modifierKey);
490
+ }
491
+ // Press the key
492
+ await page.keyboard.press(key);
493
+ // Release modifiers in reverse order
494
+ for (const modifier of [...activeModifiers].reverse()) {
495
+ const modifierKey = MODIFIER_KEY_MAP[modifier];
496
+ await page.keyboard.up(modifierKey);
497
+ }
498
+ await new Promise((resolve) => setTimeout(resolve, 50));
499
+ const representation = await renderAfterAction(deps);
500
+ return formatPageResponse(representation);
501
+ }
502
+ catch (error) {
503
+ return handleToolError(error);
504
+ }
505
+ });
506
+ // ─── charlotte:wait_for ───
507
+ server.registerTool("charlotte:wait_for", {
508
+ description: "Wait for a condition to be met on the page. Returns page representation when the condition is satisfied, or a TIMEOUT error.",
509
+ inputSchema: {
510
+ element_id: z
511
+ .string()
512
+ .optional()
513
+ .describe("Wait for specific element to appear/change"),
514
+ state: z
515
+ .enum(["visible", "hidden", "enabled", "disabled", "exists", "removed"])
516
+ .optional()
517
+ .describe("Target element state to wait for"),
518
+ text: z
519
+ .string()
520
+ .optional()
521
+ .describe("Wait for text to appear on the page"),
522
+ selector: z
523
+ .string()
524
+ .optional()
525
+ .describe("Wait for CSS selector to match"),
526
+ js: z
527
+ .string()
528
+ .optional()
529
+ .describe("Wait for JS expression to return truthy"),
530
+ timeout: z
531
+ .number()
532
+ .optional()
533
+ .describe("Max wait in ms (default: 10000)"),
534
+ },
535
+ }, async ({ element_id, state, text, selector, js, timeout }) => {
536
+ try {
537
+ await deps.browserManager.ensureConnected();
538
+ const page = deps.pageManager.getActivePage();
539
+ const waitTimeout = timeout ?? 10000;
540
+ // Validate that at least one condition is provided
541
+ if (!element_id && !text && !selector && !js) {
542
+ throw new CharlotteError(CharlotteErrorCode.SESSION_ERROR, "At least one wait condition is required (element_id, text, selector, or js).");
543
+ }
544
+ logger.info("Waiting for condition", {
545
+ element_id,
546
+ state,
547
+ text,
548
+ selector,
549
+ js,
550
+ timeout: waitTimeout,
551
+ });
552
+ // Build a composite wait condition
553
+ const satisfied = await pollWaitForCondition(deps, page, { element_id, state, text, selector, js }, waitTimeout);
554
+ if (!satisfied) {
555
+ const representation = await renderAfterAction(deps);
556
+ const timeoutError = new CharlotteError(CharlotteErrorCode.TIMEOUT, `Wait condition not met within ${waitTimeout}ms.`, "The current page state is included in the response. Consider increasing timeout or adjusting your condition.");
557
+ return {
558
+ content: [
559
+ {
560
+ type: "text",
561
+ text: JSON.stringify({
562
+ ...timeoutError.toResponse(),
563
+ page: representation,
564
+ }),
565
+ },
566
+ ],
567
+ isError: true,
568
+ };
569
+ }
570
+ const representation = await renderAfterAction(deps);
571
+ return formatPageResponse(representation);
572
+ }
573
+ catch (error) {
574
+ return handleToolError(error);
575
+ }
576
+ });
577
+ }
578
+ const MODIFIER_KEY_MAP = {
579
+ ctrl: "Control",
580
+ shift: "Shift",
581
+ alt: "Alt",
582
+ meta: "Meta",
583
+ };
584
+ /**
585
+ * Poll for complex wait_for conditions that may involve element state checks.
586
+ */
587
+ async function pollWaitForCondition(deps, page, condition, timeoutMs) {
588
+ const pollInterval = 100;
589
+ const deadline = Date.now() + timeoutMs;
590
+ while (Date.now() < deadline) {
591
+ let allSatisfied = true;
592
+ // Check element_id + state condition
593
+ if (condition.element_id) {
594
+ const targetState = condition.state ?? "exists";
595
+ const elementSatisfied = await checkElementCondition(deps, condition.element_id, targetState);
596
+ if (!elementSatisfied)
597
+ allSatisfied = false;
598
+ }
599
+ // Check text condition
600
+ if (allSatisfied && condition.text) {
601
+ const textFound = await page.evaluate((searchText) => {
602
+ return document.body?.innerText?.includes(searchText) ?? false;
603
+ }, condition.text);
604
+ if (!textFound)
605
+ allSatisfied = false;
606
+ }
607
+ // Check selector condition
608
+ if (allSatisfied && condition.selector) {
609
+ const selectorMatched = await page.$(condition.selector);
610
+ if (!selectorMatched)
611
+ allSatisfied = false;
612
+ }
613
+ // Check JS condition
614
+ if (allSatisfied && condition.js) {
615
+ try {
616
+ const jsResult = await page.evaluate((expression) => {
617
+ return !!eval(expression);
618
+ }, condition.js);
619
+ if (!jsResult)
620
+ allSatisfied = false;
621
+ }
622
+ catch {
623
+ allSatisfied = false;
624
+ }
625
+ }
626
+ if (allSatisfied)
627
+ return true;
628
+ const remainingTime = deadline - Date.now();
629
+ if (remainingTime <= 0)
630
+ break;
631
+ await new Promise((resolve) => setTimeout(resolve, Math.min(pollInterval, remainingTime)));
632
+ }
633
+ return false;
634
+ }
635
+ /**
636
+ * Check if an element meets a specific state condition.
637
+ */
638
+ async function checkElementCondition(deps, elementId, targetState) {
639
+ switch (targetState) {
640
+ case "exists": {
641
+ const backendNodeId = deps.elementIdGenerator.resolveId(elementId);
642
+ return backendNodeId !== null;
643
+ }
644
+ case "removed": {
645
+ const backendNodeId = deps.elementIdGenerator.resolveId(elementId);
646
+ if (backendNodeId !== null) {
647
+ // Re-render to check if it's truly still there
648
+ await renderActivePage(deps, { detail: "minimal" });
649
+ return deps.elementIdGenerator.resolveId(elementId) === null;
650
+ }
651
+ return true;
652
+ }
653
+ case "visible":
654
+ case "hidden":
655
+ case "enabled":
656
+ case "disabled": {
657
+ // Re-render to get fresh state
658
+ const representation = await renderActivePage(deps, { detail: "minimal" });
659
+ const element = representation.interactive.find((el) => el.id === elementId);
660
+ if (!element) {
661
+ // Element doesn't exist — "hidden" and "disabled" are satisfied, others not
662
+ return targetState === "hidden" || targetState === "disabled";
663
+ }
664
+ switch (targetState) {
665
+ case "visible":
666
+ return element.state.visible === true;
667
+ case "hidden":
668
+ return element.state.visible === false;
669
+ case "enabled":
670
+ return element.state.enabled === true;
671
+ case "disabled":
672
+ return element.state.enabled === false;
673
+ }
674
+ return false;
675
+ }
676
+ default:
677
+ return false;
678
+ }
679
+ }
680
+ //# sourceMappingURL=interaction.js.map