@geometra/mcp 1.19.18 → 1.19.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -48,6 +48,20 @@ function nodeFilterShape() {
48
48
  busy: z.boolean().optional().describe('Match busy / in-progress state'),
49
49
  };
50
50
  }
51
+ function waitConditionShape() {
52
+ return {
53
+ ...nodeFilterShape(),
54
+ present: z.boolean().optional().default(true).describe('Wait for a matching node to exist (default true) or disappear'),
55
+ timeoutMs: z
56
+ .number()
57
+ .int()
58
+ .min(50)
59
+ .max(60_000)
60
+ .optional()
61
+ .default(10_000)
62
+ .describe('Maximum time to wait before returning an error (default 10000ms)'),
63
+ };
64
+ }
51
65
  const timeoutMsInput = z.number().int().min(50).max(60_000).optional();
52
66
  const fillFieldSchema = z.discriminatedUnion('kind', [
53
67
  z.object({
@@ -98,8 +112,14 @@ const formValuesRecordSchema = z.record(z.string(), formValueSchema);
98
112
  const batchActionSchema = z.discriminatedUnion('type', [
99
113
  z.object({
100
114
  type: z.literal('click'),
101
- x: z.number(),
102
- y: z.number(),
115
+ x: z.number().optional().describe('X coordinate to click'),
116
+ y: z.number().optional().describe('Y coordinate to click'),
117
+ ...nodeFilterShape(),
118
+ index: z.number().int().min(0).optional().describe('Which matching semantic target to click after sorting top-to-bottom'),
119
+ fullyVisible: z.boolean().optional().describe('When clicking by semantic target, require full visibility before clicking (default true)'),
120
+ maxRevealSteps: z.number().int().min(1).max(12).optional().describe('Maximum reveal attempts before clicking a semantic target'),
121
+ revealTimeoutMs: timeoutMsInput.describe('Per-scroll wait timeout while revealing a semantic target'),
122
+ waitFor: z.object(waitConditionShape()).optional().describe('Optional semantic condition to wait for after the click'),
103
123
  timeoutMs: timeoutMsInput,
104
124
  }),
105
125
  z.object({
@@ -183,7 +203,7 @@ export function createServer() {
183
203
 
184
204
  Use \`url\` (ws://…) only when a Geometra/native server or an already-running proxy is listening. If you accidentally pass \`https://…\` in \`url\`, MCP treats it like \`pageUrl\` and starts the proxy for you.
185
205
 
186
- Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy).`, {
206
+ Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy). Set \`returnForms: true\` and/or \`returnPageModel: true\` when you want a lower-turn startup response.`, {
187
207
  url: z
188
208
  .string()
189
209
  .optional()
@@ -218,6 +238,11 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
218
238
  .optional()
219
239
  .default(false)
220
240
  .describe('Include compact form schema discovery in the connect response so form flows can start in one turn.'),
241
+ returnPageModel: z
242
+ .boolean()
243
+ .optional()
244
+ .default(false)
245
+ .describe('Include geometra_page_model output in the connect response so exploration can start in one turn.'),
221
246
  formId: z.string().optional().describe('Optional form id filter when returnForms=true'),
222
247
  maxFields: z.number().int().min(1).max(120).optional().default(80).describe('Cap returned fields per form when returnForms=true'),
223
248
  onlyRequiredFields: z.boolean().optional().default(false).describe('Only include required fields when returnForms=true'),
@@ -226,6 +251,8 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
226
251
  includeContext: formSchemaContextInput(),
227
252
  sinceSchemaId: z.string().optional().describe('If the current schema matches this id, return changed=false without resending forms'),
228
253
  schemaFormat: formSchemaFormatInput(),
254
+ maxPrimaryActions: z.number().int().min(1).max(12).optional().default(6).describe('Cap top-level primary actions when returnPageModel=true'),
255
+ maxSectionsPerKind: z.number().int().min(1).max(16).optional().default(8).describe('Cap returned landmarks/forms/dialogs/lists per kind when returnPageModel=true'),
229
256
  detail: detailInput(),
230
257
  }, async (input) => {
231
258
  const normalized = normalizeConnectTarget({ url: input.url, pageUrl: input.pageUrl });
@@ -242,6 +269,10 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
242
269
  sinceSchemaId: input.sinceSchemaId,
243
270
  format: input.schemaFormat,
244
271
  };
272
+ const pageModelOptions = {
273
+ maxPrimaryActions: input.maxPrimaryActions,
274
+ maxSectionsPerKind: input.maxSectionsPerKind,
275
+ };
245
276
  try {
246
277
  if (target.kind === 'proxy') {
247
278
  const session = await connectThroughProxy({
@@ -261,7 +292,9 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
261
292
  autoCoercedFromUrl: target.autoCoercedFromUrl,
262
293
  detail: input.detail,
263
294
  returnForms: input.returnForms,
295
+ returnPageModel: input.returnPageModel,
264
296
  formSchema,
297
+ pageModelOptions,
265
298
  }), null, input.detail === 'verbose' ? 2 : undefined));
266
299
  }
267
300
  const session = await connect(target.wsUrl, {
@@ -277,7 +310,9 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
277
310
  autoCoercedFromUrl: false,
278
311
  detail: input.detail,
279
312
  returnForms: input.returnForms,
313
+ returnPageModel: input.returnPageModel,
280
314
  formSchema,
315
+ pageModelOptions,
281
316
  }), null, input.detail === 'verbose' ? 2 : undefined));
282
317
  }
283
318
  catch (e) {
@@ -321,63 +356,36 @@ This is the Geometra equivalent of Playwright's locator — but instant, structu
321
356
  });
322
357
  server.tool('geometra_wait_for', `Wait for a semantic UI condition without guessing sleep durations. Use this for slow SPA transitions, resume parsing, custom validation alerts, disabled submit buttons, and value/state confirmation before submit.
323
358
 
324
- The filter matches the same fields as geometra_query. Set \`present: false\` to wait for something to disappear (for example an alert or a "Parsing…" status).`, {
325
- ...nodeFilterShape(),
326
- present: z.boolean().optional().default(true).describe('Wait for a matching node to exist (default true) or disappear'),
327
- timeoutMs: z
328
- .number()
329
- .int()
330
- .min(50)
331
- .max(60_000)
332
- .optional()
333
- .default(10_000)
334
- .describe('Maximum time to wait before returning an error (default 10000ms)'),
335
- }, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs }) => {
359
+ The filter matches the same fields as geometra_query. Set \`present: false\` to wait for something to disappear (for example an alert or a "Parsing…" status).`, waitConditionShape(), async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs }) => {
336
360
  const session = getSession();
337
361
  if (!session?.tree || !session?.layout)
338
362
  return err('Not connected. Call geometra_connect first.');
339
- const filter = {
340
- id,
341
- role,
342
- name,
343
- text,
344
- contextText,
345
- value,
346
- checked,
347
- disabled,
348
- focused,
349
- selected,
350
- expanded,
351
- invalid,
352
- required,
353
- busy,
354
- };
355
- if (!hasNodeFilter(filter))
356
- return err('Provide at least one wait filter (id, role, name, text, contextText, value, or state)');
357
- const matchesCondition = () => {
358
- if (!session.tree || !session.layout)
359
- return false;
360
- const a11y = sessionA11y(session);
361
- if (!a11y)
362
- return false;
363
- const matches = findNodes(a11y, filter);
364
- return present ? matches.length > 0 : matches.length === 0;
365
- };
366
- const startedAt = Date.now();
367
- const matched = await waitForUiCondition(session, matchesCondition, timeoutMs);
368
- const elapsedMs = Date.now() - startedAt;
369
- if (!matched) {
370
- return err(`Timed out after ${timeoutMs}ms waiting for ${present ? 'presence' : 'absence'} of ${JSON.stringify(filter)}.\nCurrent UI:\n${compactSessionSummary(session)}`);
371
- }
372
- if (!present) {
373
- return ok(`Condition satisfied after ${elapsedMs}ms: no nodes matched ${JSON.stringify(filter)}.`);
363
+ const waited = await waitForSemanticCondition(session, {
364
+ filter: {
365
+ id,
366
+ role,
367
+ name,
368
+ text,
369
+ contextText,
370
+ value,
371
+ checked,
372
+ disabled,
373
+ focused,
374
+ selected,
375
+ expanded,
376
+ invalid,
377
+ required,
378
+ busy,
379
+ },
380
+ present: present ?? true,
381
+ timeoutMs: timeoutMs ?? 10_000,
382
+ });
383
+ if (!waited.ok)
384
+ return err(waited.error);
385
+ if (!waited.value.present) {
386
+ return ok(waitConditionSuccessLine(waited.value));
374
387
  }
375
- const after = sessionA11y(session);
376
- if (!after)
377
- return ok(`Condition satisfied after ${elapsedMs}ms for ${JSON.stringify(filter)}.`);
378
- const matches = findNodes(after, filter);
379
- const result = sortA11yNodes(matches).slice(0, 8).map(node => formatNode(node, after, after.bounds));
380
- return ok(JSON.stringify(result, null, 2));
388
+ return ok(JSON.stringify(waited.value.matches.slice(0, 8), null, 2));
381
389
  });
382
390
  server.tool('geometra_fill_fields', `Fill several labeled form fields in one MCP call. This is the preferred high-level primitive for long forms.
383
391
 
@@ -653,7 +661,7 @@ Pass \`valuesById\` with field ids from \`geometra_form_schema\` for the most st
653
661
  });
654
662
  server.tool('geometra_run_actions', `Execute several Geometra actions in one MCP round trip and return one consolidated result. This is the preferred path for long, multi-step form fills where one-tool-per-field would otherwise create too much chatter.
655
663
 
656
- Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`.`, {
664
+ Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`. \`click\` steps can also carry a nested \`waitFor\` condition.`, {
657
665
  actions: z.array(batchActionSchema).min(1).max(80).describe('Ordered high-level action steps to run sequentially'),
658
666
  stopOnError: z.boolean().optional().default(true).describe('Stop at the first failing step (default true)'),
659
667
  includeSteps: z
@@ -832,10 +840,6 @@ Use the same filters as geometra_query, plus an optional match index when repeat
832
840
  const session = getSession();
833
841
  if (!session)
834
842
  return err('Not connected. Call geometra_connect first.');
835
- const matchIndex = index ?? 0;
836
- const requireFullyVisible = fullyVisible ?? true;
837
- const revealSteps = maxSteps ?? 6;
838
- const waitTimeout = timeoutMs ?? 2_500;
839
843
  const filter = {
840
844
  id,
841
845
  role,
@@ -854,55 +858,40 @@ Use the same filters as geometra_query, plus an optional match index when repeat
854
858
  };
855
859
  if (!hasNodeFilter(filter))
856
860
  return err('Provide at least one reveal filter (id, role, name, text, contextText, value, or state)');
857
- let attempts = 0;
858
- while (attempts <= revealSteps) {
859
- const a11y = sessionA11y(session);
860
- if (!a11y)
861
- return err('No UI tree available to reveal from');
862
- const matches = sortA11yNodes(findNodes(a11y, filter));
863
- if (matches.length === 0) {
864
- return err(`No elements found matching ${JSON.stringify(filter)}`);
865
- }
866
- if (matchIndex >= matches.length) {
867
- return err(`Requested reveal index ${matchIndex} but only ${matches.length} matching element(s) were found`);
868
- }
869
- const target = matches[matchIndex];
870
- const formatted = formatNode(target, a11y, a11y.bounds);
871
- const visible = requireFullyVisible ? formatted.visibility.fullyVisible : formatted.visibility.intersectsViewport;
872
- if (visible) {
873
- return ok(JSON.stringify({
874
- revealed: true,
875
- attempts,
876
- target: formatted,
877
- }, null, 2));
878
- }
879
- if (attempts === revealSteps) {
880
- return err(JSON.stringify({
881
- revealed: false,
882
- attempts,
883
- target: formatted,
884
- }, null, 2));
885
- }
886
- const deltaX = clamp(formatted.scrollHint.revealDeltaX, -Math.round(a11y.bounds.width * 0.75), Math.round(a11y.bounds.width * 0.75));
887
- let deltaY = clamp(formatted.scrollHint.revealDeltaY, -Math.round(a11y.bounds.height * 0.85), Math.round(a11y.bounds.height * 0.85));
888
- if (deltaY === 0 && !formatted.visibility.fullyVisible) {
889
- deltaY = formatted.visibility.offscreenAbove ? -Math.round(a11y.bounds.height * 0.4) : Math.round(a11y.bounds.height * 0.4);
890
- }
891
- await sendWheel(session, deltaY, {
892
- deltaX,
893
- x: formatted.center.x,
894
- y: formatted.center.y,
895
- }, waitTimeout);
896
- attempts++;
897
- }
898
- return err(`Failed to reveal ${JSON.stringify(filter)}`);
861
+ const revealed = await revealSemanticTarget(session, {
862
+ filter,
863
+ index: index ?? 0,
864
+ fullyVisible: fullyVisible ?? true,
865
+ maxSteps: maxSteps ?? 6,
866
+ timeoutMs: timeoutMs ?? 2_500,
867
+ });
868
+ if (!revealed.ok)
869
+ return err(revealed.error);
870
+ return ok(JSON.stringify({
871
+ revealed: true,
872
+ attempts: revealed.value.attempts,
873
+ target: revealed.value.target,
874
+ }, null, 2));
899
875
  });
900
876
  // ── click ────────────────────────────────────────────────────
901
- server.tool('geometra_click', `Click an element in the Geometra UI. Provide either the element's bounds (from geometra_query) or raw x,y coordinates. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
877
+ server.tool('geometra_click', `Click an element in the Geometra UI. Provide either raw x,y coordinates or a semantic target (\`id\`, \`role\`, \`name\`, \`text\`, \`contextText\`, \`value\`, or state filters). You can also attach \`waitFor\` to block on the post-click semantic state in the same call. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
902
878
 
903
879
  After clicking, returns a compact semantic delta when possible (dialogs/forms/lists/nodes changed). If nothing meaningful changed, returns a short current-UI overview.`, {
904
- x: z.number().describe('X coordinate to click (use center of element bounds from geometra_query)'),
905
- y: z.number().describe('Y coordinate to click'),
880
+ x: z.number().optional().describe('X coordinate to click (use center of element bounds from geometra_query)'),
881
+ y: z.number().optional().describe('Y coordinate to click'),
882
+ ...nodeFilterShape(),
883
+ index: z.number().int().min(0).optional().default(0).describe('Which matching semantic target to click after sorting top-to-bottom'),
884
+ fullyVisible: z.boolean().optional().default(true).describe('When clicking by semantic target, require full visibility before clicking (default true)'),
885
+ maxRevealSteps: z.number().int().min(1).max(12).optional().default(6).describe('Maximum reveal attempts before clicking a semantic target'),
886
+ revealTimeoutMs: z
887
+ .number()
888
+ .int()
889
+ .min(50)
890
+ .max(60_000)
891
+ .optional()
892
+ .default(2_500)
893
+ .describe('Per-scroll wait timeout while revealing a semantic target (default 2500ms)'),
894
+ waitFor: z.object(waitConditionShape()).optional().describe('Optional semantic condition to wait for after the click'),
906
895
  timeoutMs: z
907
896
  .number()
908
897
  .int()
@@ -911,14 +900,69 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
911
900
  .optional()
912
901
  .describe('Optional action wait timeout (use a longer value for slow submits or route transitions)'),
913
902
  detail: detailInput(),
914
- }, async ({ x, y, timeoutMs, detail }) => {
903
+ }, async ({ x, y, id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxRevealSteps, revealTimeoutMs, waitFor, timeoutMs, detail }) => {
915
904
  const session = getSession();
916
905
  if (!session)
917
906
  return err('Not connected. Call geometra_connect first.');
918
907
  const before = sessionA11y(session);
919
- const wait = await sendClick(session, x, y, timeoutMs);
908
+ const resolved = await resolveClickLocation(session, {
909
+ x,
910
+ y,
911
+ filter: {
912
+ id,
913
+ role,
914
+ name,
915
+ text,
916
+ contextText,
917
+ value,
918
+ checked,
919
+ disabled,
920
+ focused,
921
+ selected,
922
+ expanded,
923
+ invalid,
924
+ required,
925
+ busy,
926
+ },
927
+ index,
928
+ fullyVisible,
929
+ maxRevealSteps,
930
+ revealTimeoutMs,
931
+ });
932
+ if (!resolved.ok)
933
+ return err(resolved.error);
934
+ const wait = await sendClick(session, resolved.value.x, resolved.value.y, timeoutMs);
920
935
  const summary = postActionSummary(session, before, wait, detail);
921
- return ok(`Clicked at (${x}, ${y}).\n${summary}`);
936
+ const clickLine = !resolved.value.target
937
+ ? `Clicked at (${resolved.value.x}, ${resolved.value.y}).`
938
+ : `Clicked ${describeFormattedNode(resolved.value.target)} at (${resolved.value.x}, ${resolved.value.y})${resolved.value.revealAttempts && resolved.value.revealAttempts > 0 ? ` after ${resolved.value.revealAttempts} reveal step${resolved.value.revealAttempts === 1 ? '' : 's'}` : ''}.`;
939
+ const lines = [clickLine, summary];
940
+ if (waitFor) {
941
+ const postWait = await waitForSemanticCondition(session, {
942
+ filter: {
943
+ id: waitFor.id,
944
+ role: waitFor.role,
945
+ name: waitFor.name,
946
+ text: waitFor.text,
947
+ contextText: waitFor.contextText,
948
+ value: waitFor.value,
949
+ checked: waitFor.checked,
950
+ disabled: waitFor.disabled,
951
+ focused: waitFor.focused,
952
+ selected: waitFor.selected,
953
+ expanded: waitFor.expanded,
954
+ invalid: waitFor.invalid,
955
+ required: waitFor.required,
956
+ busy: waitFor.busy,
957
+ },
958
+ present: waitFor.present ?? true,
959
+ timeoutMs: waitFor.timeoutMs ?? 10_000,
960
+ });
961
+ if (!postWait.ok)
962
+ return err([...lines, postWait.error].join('\n'));
963
+ lines.push(`Post-click ${waitConditionSuccessLine(postWait.value)}`);
964
+ }
965
+ return ok(lines.filter(Boolean).join('\n'));
922
966
  });
923
967
  // ── type ─────────────────────────────────────────────────────
924
968
  server.tool('geometra_type', `Type text into the currently focused element. First click a textbox/input with geometra_click to focus it, then use this to type.
@@ -1188,7 +1232,7 @@ For a token-efficient semantic view, use geometra_snapshot (default compact). Fo
1188
1232
  return ok(JSON.stringify(session.layout, null, 2));
1189
1233
  });
1190
1234
  // ── disconnect ───────────────────────────────────────────────
1191
- server.tool('geometra_disconnect', `Disconnect from the Geometra server. Proxy-backed sessions keep the browser alive by default so the next geometra_connect can reuse it quickly; pass closeBrowser=true to fully tear down the proxy/browser.`, {
1235
+ server.tool('geometra_disconnect', `Disconnect from the Geometra server. Proxy-backed sessions keep compatible browsers alive by default so the next geometra_connect can reuse them quickly; pass closeBrowser=true to fully tear down the warm proxy/browser pool.`, {
1192
1236
  closeBrowser: z.boolean().optional().default(false).describe('Fully close the spawned proxy/browser instead of keeping it warm for reuse'),
1193
1237
  }, async ({ closeBrowser }) => {
1194
1238
  disconnect({ closeProxy: closeBrowser });
@@ -1338,12 +1382,23 @@ async function stabilizeInlineFormSchemas(session, options, opts) {
1338
1382
  }
1339
1383
  function connectResponsePayload(session, opts) {
1340
1384
  const payload = connectPayload(session, opts);
1341
- if (!opts.returnForms)
1385
+ if (!opts.returnForms && !opts.returnPageModel)
1342
1386
  return payload;
1343
- return {
1344
- ...payload,
1345
- formSchema: formSchemaResponsePayload(session, opts.formSchema ?? {}),
1346
- };
1387
+ const nextPayload = { ...payload };
1388
+ if (opts.returnForms) {
1389
+ nextPayload.formSchema = formSchemaResponsePayload(session, opts.formSchema ?? {});
1390
+ }
1391
+ if (opts.returnPageModel) {
1392
+ nextPayload.pageModel = pageModelResponsePayload(session, opts.pageModelOptions);
1393
+ }
1394
+ return nextPayload;
1395
+ }
1396
+ function pageModelResponsePayload(session, options) {
1397
+ const a11y = sessionA11y(session);
1398
+ if (!a11y) {
1399
+ return { available: false };
1400
+ }
1401
+ return buildPageModel(a11y, options);
1347
1402
  }
1348
1403
  async function ensureToolSession(target, missingConnectionMessage = 'Not connected. Call geometra_connect first.') {
1349
1404
  if (!target.url && !target.pageUrl) {
@@ -1608,6 +1663,154 @@ function waitStatusPayload(wait) {
1608
1663
  function compactFilterPayload(filter) {
1609
1664
  return Object.fromEntries(Object.entries(filter).filter(([, value]) => value !== undefined));
1610
1665
  }
1666
+ async function waitForSemanticCondition(session, options) {
1667
+ if (!hasNodeFilter(options.filter)) {
1668
+ return { ok: false, error: 'Provide at least one wait filter (id, role, name, text, contextText, value, or state)' };
1669
+ }
1670
+ const startedAt = Date.now();
1671
+ const matched = await waitForUiCondition(session, () => {
1672
+ const a11y = sessionA11y(session);
1673
+ if (!a11y)
1674
+ return false;
1675
+ const matches = findNodes(a11y, options.filter);
1676
+ return options.present ? matches.length > 0 : matches.length === 0;
1677
+ }, options.timeoutMs);
1678
+ const elapsedMs = Date.now() - startedAt;
1679
+ if (!matched) {
1680
+ return {
1681
+ ok: false,
1682
+ error: `Timed out after ${options.timeoutMs}ms waiting for ${options.present ? 'presence' : 'absence'} of ${JSON.stringify(options.filter)}.\nCurrent UI:\n${compactSessionSummary(session)}`,
1683
+ };
1684
+ }
1685
+ const after = sessionA11y(session);
1686
+ const matches = options.present && after
1687
+ ? sortA11yNodes(findNodes(after, options.filter)).slice(0, 8).map(node => formatNode(node, after, after.bounds))
1688
+ : [];
1689
+ return {
1690
+ ok: true,
1691
+ value: {
1692
+ filter: options.filter,
1693
+ present: options.present,
1694
+ elapsedMs,
1695
+ matchCount: matches.length,
1696
+ matches,
1697
+ },
1698
+ };
1699
+ }
1700
+ function waitConditionSuccessLine(result) {
1701
+ if (!result.present) {
1702
+ return `condition satisfied after ${result.elapsedMs}ms: no nodes matched ${JSON.stringify(result.filter)}.`;
1703
+ }
1704
+ return `condition satisfied after ${result.elapsedMs}ms with ${result.matchCount} matching node(s).`;
1705
+ }
1706
+ function waitConditionCompact(result) {
1707
+ return {
1708
+ present: result.present,
1709
+ elapsedMs: result.elapsedMs,
1710
+ filter: compactFilterPayload(result.filter),
1711
+ ...(result.present ? { matchCount: result.matchCount } : {}),
1712
+ };
1713
+ }
1714
+ async function revealSemanticTarget(session, options) {
1715
+ let attempts = 0;
1716
+ while (attempts <= options.maxSteps) {
1717
+ const a11y = sessionA11y(session);
1718
+ if (!a11y)
1719
+ return { ok: false, error: 'No UI tree available to reveal from' };
1720
+ const matches = sortA11yNodes(findNodes(a11y, options.filter));
1721
+ if (matches.length === 0) {
1722
+ return { ok: false, error: `No elements found matching ${JSON.stringify(options.filter)}` };
1723
+ }
1724
+ if (options.index >= matches.length) {
1725
+ return {
1726
+ ok: false,
1727
+ error: `Requested reveal index ${options.index} but only ${matches.length} matching element(s) were found`,
1728
+ };
1729
+ }
1730
+ const formatted = formatNode(matches[options.index], a11y, a11y.bounds);
1731
+ const visible = options.fullyVisible ? formatted.visibility.fullyVisible : formatted.visibility.intersectsViewport;
1732
+ if (visible) {
1733
+ return {
1734
+ ok: true,
1735
+ value: {
1736
+ attempts,
1737
+ target: formatted,
1738
+ },
1739
+ };
1740
+ }
1741
+ if (attempts === options.maxSteps) {
1742
+ return {
1743
+ ok: false,
1744
+ error: JSON.stringify({
1745
+ revealed: false,
1746
+ attempts,
1747
+ target: formatted,
1748
+ }, null, 2),
1749
+ };
1750
+ }
1751
+ const deltaX = clamp(formatted.scrollHint.revealDeltaX, -Math.round(a11y.bounds.width * 0.75), Math.round(a11y.bounds.width * 0.75));
1752
+ let deltaY = clamp(formatted.scrollHint.revealDeltaY, -Math.round(a11y.bounds.height * 0.85), Math.round(a11y.bounds.height * 0.85));
1753
+ if (deltaY === 0 && !formatted.visibility.fullyVisible) {
1754
+ deltaY = formatted.visibility.offscreenAbove ? -Math.round(a11y.bounds.height * 0.4) : Math.round(a11y.bounds.height * 0.4);
1755
+ }
1756
+ await sendWheel(session, deltaY, {
1757
+ deltaX,
1758
+ x: formatted.center.x,
1759
+ y: formatted.center.y,
1760
+ }, options.timeoutMs);
1761
+ attempts++;
1762
+ }
1763
+ return { ok: false, error: `Failed to reveal ${JSON.stringify(options.filter)}` };
1764
+ }
1765
+ async function resolveClickLocation(session, options) {
1766
+ const hasExplicitCoordinates = options.x !== undefined || options.y !== undefined;
1767
+ if (hasExplicitCoordinates) {
1768
+ if (options.x === undefined || options.y === undefined) {
1769
+ return { ok: false, error: 'Provide both x and y when clicking by coordinates' };
1770
+ }
1771
+ return {
1772
+ ok: true,
1773
+ value: {
1774
+ x: options.x,
1775
+ y: options.y,
1776
+ },
1777
+ };
1778
+ }
1779
+ if (!hasNodeFilter(options.filter)) {
1780
+ return {
1781
+ ok: false,
1782
+ error: 'Provide x and y, or at least one semantic target filter (id, role, name, text, contextText, value, or state)',
1783
+ };
1784
+ }
1785
+ const revealed = await revealSemanticTarget(session, {
1786
+ filter: options.filter,
1787
+ index: options.index ?? 0,
1788
+ fullyVisible: options.fullyVisible ?? true,
1789
+ maxSteps: options.maxRevealSteps ?? 6,
1790
+ timeoutMs: options.revealTimeoutMs ?? 2_500,
1791
+ });
1792
+ if (!revealed.ok)
1793
+ return revealed;
1794
+ return {
1795
+ ok: true,
1796
+ value: {
1797
+ x: revealed.value.target.center.x,
1798
+ y: revealed.value.target.center.y,
1799
+ target: revealed.value.target,
1800
+ revealAttempts: revealed.value.attempts,
1801
+ },
1802
+ };
1803
+ }
1804
+ function describeFormattedNode(node) {
1805
+ return `${node.role}${node.name ? ` ${JSON.stringify(node.name)}` : ''} (${node.id})`;
1806
+ }
1807
+ function compactNodeReference(node) {
1808
+ return {
1809
+ id: node.id,
1810
+ role: node.role,
1811
+ ...(node.name ? { name: node.name } : {}),
1812
+ };
1813
+ }
1611
1814
  function normalizeLookupKey(value) {
1612
1815
  return value.replace(/\s+/g, ' ').trim().toLowerCase();
1613
1816
  }
@@ -1817,12 +2020,72 @@ async function executeBatchAction(session, action, detail, includeSteps) {
1817
2020
  switch (action.type) {
1818
2021
  case 'click': {
1819
2022
  const before = sessionA11y(session);
1820
- const wait = await sendClick(session, action.x, action.y, action.timeoutMs);
2023
+ const resolved = await resolveClickLocation(session, {
2024
+ x: action.x,
2025
+ y: action.y,
2026
+ filter: {
2027
+ id: action.id,
2028
+ role: action.role,
2029
+ name: action.name,
2030
+ text: action.text,
2031
+ contextText: action.contextText,
2032
+ value: action.value,
2033
+ checked: action.checked,
2034
+ disabled: action.disabled,
2035
+ focused: action.focused,
2036
+ selected: action.selected,
2037
+ expanded: action.expanded,
2038
+ invalid: action.invalid,
2039
+ required: action.required,
2040
+ busy: action.busy,
2041
+ },
2042
+ index: action.index,
2043
+ fullyVisible: action.fullyVisible,
2044
+ maxRevealSteps: action.maxRevealSteps,
2045
+ revealTimeoutMs: action.revealTimeoutMs,
2046
+ });
2047
+ if (!resolved.ok)
2048
+ throw new Error(resolved.error);
2049
+ const wait = await sendClick(session, resolved.value.x, resolved.value.y, action.timeoutMs);
2050
+ const targetSummary = resolved.value.target
2051
+ ? `Clicked ${describeFormattedNode(resolved.value.target)} at (${resolved.value.x}, ${resolved.value.y}).`
2052
+ : `Clicked at (${resolved.value.x}, ${resolved.value.y}).`;
2053
+ let postWaitSummary;
2054
+ let postWaitCompact;
2055
+ if (action.waitFor) {
2056
+ const postWait = await waitForSemanticCondition(session, {
2057
+ filter: {
2058
+ id: action.waitFor.id,
2059
+ role: action.waitFor.role,
2060
+ name: action.waitFor.name,
2061
+ text: action.waitFor.text,
2062
+ contextText: action.waitFor.contextText,
2063
+ value: action.waitFor.value,
2064
+ checked: action.waitFor.checked,
2065
+ disabled: action.waitFor.disabled,
2066
+ focused: action.waitFor.focused,
2067
+ selected: action.waitFor.selected,
2068
+ expanded: action.waitFor.expanded,
2069
+ invalid: action.waitFor.invalid,
2070
+ required: action.waitFor.required,
2071
+ busy: action.waitFor.busy,
2072
+ },
2073
+ present: action.waitFor.present ?? true,
2074
+ timeoutMs: action.waitFor.timeoutMs ?? 10_000,
2075
+ });
2076
+ if (!postWait.ok) {
2077
+ throw new Error(`Post-click wait failed after ${targetSummary.toLowerCase()}\n${postWait.error}`);
2078
+ }
2079
+ postWaitSummary = `Post-click ${waitConditionSuccessLine(postWait.value)}`;
2080
+ postWaitCompact = waitConditionCompact(postWait.value);
2081
+ }
1821
2082
  return {
1822
- summary: `Clicked at (${action.x}, ${action.y}).\n${postActionSummary(session, before, wait, detail)}`,
2083
+ summary: [targetSummary, postActionSummary(session, before, wait, detail), postWaitSummary].filter(Boolean).join('\n'),
1823
2084
  compact: {
1824
- at: { x: action.x, y: action.y },
2085
+ at: { x: resolved.value.x, y: resolved.value.y },
2086
+ ...(resolved.value.target ? { target: compactNodeReference(resolved.value.target), revealSteps: resolved.value.revealAttempts ?? 0 } : {}),
1825
2087
  ...waitStatusPayload(wait),
2088
+ ...(postWaitCompact ? { postWait: postWaitCompact } : {}),
1826
2089
  },
1827
2090
  };
1828
2091
  }
@@ -1946,80 +2209,44 @@ async function executeBatchAction(session, action, detail, includeSteps) {
1946
2209
  case 'wait_for': {
1947
2210
  if (!session.tree || !session.layout)
1948
2211
  throw new Error('Not connected. Call geometra_connect first.');
1949
- const filter = {
1950
- id: action.id,
1951
- role: action.role,
1952
- name: action.name,
1953
- text: action.text,
1954
- contextText: action.contextText,
1955
- value: action.value,
1956
- checked: action.checked,
1957
- disabled: action.disabled,
1958
- focused: action.focused,
1959
- selected: action.selected,
1960
- expanded: action.expanded,
1961
- invalid: action.invalid,
1962
- required: action.required,
1963
- busy: action.busy,
1964
- };
1965
- if (!hasNodeFilter(filter)) {
1966
- throw new Error('wait_for step requires at least one filter');
1967
- }
1968
- const present = action.present ?? true;
1969
- const timeoutMs = action.timeoutMs ?? 10_000;
1970
- const startedAt = Date.now();
1971
- const matched = await waitForUiCondition(session, () => {
1972
- const a11y = sessionA11y(session);
1973
- if (!a11y)
1974
- return false;
1975
- const matches = findNodes(a11y, filter);
1976
- return present ? matches.length > 0 : matches.length === 0;
1977
- }, timeoutMs);
1978
- const elapsedMs = Date.now() - startedAt;
1979
- if (!matched) {
1980
- throw new Error(`Timed out after ${timeoutMs}ms waiting for ${present ? 'presence' : 'absence'} of ${JSON.stringify(filter)}`);
1981
- }
1982
- if (!present) {
1983
- return {
1984
- summary: `Condition satisfied after ${elapsedMs}ms: no nodes matched ${JSON.stringify(filter)}.`,
1985
- compact: {
1986
- present,
1987
- elapsedMs,
1988
- filter: compactFilterPayload(filter),
1989
- },
1990
- };
2212
+ const waited = await waitForSemanticCondition(session, {
2213
+ filter: {
2214
+ id: action.id,
2215
+ role: action.role,
2216
+ name: action.name,
2217
+ text: action.text,
2218
+ contextText: action.contextText,
2219
+ value: action.value,
2220
+ checked: action.checked,
2221
+ disabled: action.disabled,
2222
+ focused: action.focused,
2223
+ selected: action.selected,
2224
+ expanded: action.expanded,
2225
+ invalid: action.invalid,
2226
+ required: action.required,
2227
+ busy: action.busy,
2228
+ },
2229
+ present: action.present ?? true,
2230
+ timeoutMs: action.timeoutMs ?? 10_000,
2231
+ });
2232
+ if (!waited.ok) {
2233
+ throw new Error(waited.error);
1991
2234
  }
1992
- const after = sessionA11y(session);
1993
- if (!after) {
2235
+ if (!waited.value.present) {
1994
2236
  return {
1995
- summary: `Condition satisfied after ${elapsedMs}ms for ${JSON.stringify(filter)}.`,
1996
- compact: {
1997
- present,
1998
- elapsedMs,
1999
- filter: compactFilterPayload(filter),
2000
- },
2237
+ summary: waitConditionSuccessLine(waited.value),
2238
+ compact: waitConditionCompact(waited.value),
2001
2239
  };
2002
2240
  }
2003
- const matches = findNodes(after, filter);
2004
2241
  if (detail === 'verbose') {
2005
2242
  return {
2006
- summary: JSON.stringify(sortA11yNodes(matches).slice(0, 8).map(node => formatNode(node, after, after.bounds)), null, 2),
2007
- compact: {
2008
- present,
2009
- elapsedMs,
2010
- matchCount: matches.length,
2011
- filter: compactFilterPayload(filter),
2012
- },
2243
+ summary: JSON.stringify(waited.value.matches, null, 2),
2244
+ compact: waitConditionCompact(waited.value),
2013
2245
  };
2014
2246
  }
2015
2247
  return {
2016
- summary: `Condition satisfied after ${elapsedMs}ms with ${matches.length} matching node(s).`,
2017
- compact: {
2018
- present,
2019
- elapsedMs,
2020
- matchCount: matches.length,
2021
- filter: compactFilterPayload(filter),
2022
- },
2248
+ summary: waitConditionSuccessLine(waited.value),
2249
+ compact: waitConditionCompact(waited.value),
2023
2250
  };
2024
2251
  }
2025
2252
  case 'fill_fields': {