@geometra/mcp 1.19.18 → 1.19.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -48,6 +48,33 @@ function nodeFilterShape() {
48
48
  busy: z.boolean().optional().describe('Match busy / in-progress state'),
49
49
  };
50
50
  }
51
+ function waitConditionShape() {
52
+ return {
53
+ ...nodeFilterShape(),
54
+ present: z
55
+ .boolean()
56
+ .optional()
57
+ .default(true)
58
+ .describe('Wait until at least one node matches the filter (default true), or until no node matches (set false to wait out loading/parsing banners like “Parsing…” or “Parsing your resume”)'),
59
+ timeoutMs: z
60
+ .number()
61
+ .int()
62
+ .min(50)
63
+ .max(60_000)
64
+ .optional()
65
+ .default(10_000)
66
+ .describe('Maximum time to wait before returning an error (default 10000ms)'),
67
+ };
68
+ }
69
+ const GEOMETRA_QUERY_FILTER_REQUIRED_MESSAGE = 'Provide at least one filter (id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
70
+ 'This tool uses a strict schema: unknown keys are rejected. There is no textGone parameter — use text for substring matching. ' +
71
+ 'To wait until text disappears from the UI, use geometra_wait_for with text and present: false.';
72
+ const GEOMETRA_WAIT_FILTER_REQUIRED_MESSAGE = 'Provide at least one semantic filter (id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
73
+ 'This tool uses a strict schema: unknown keys are rejected. There is no textGone parameter — use text with a distinctive substring and present: false to wait until that text is gone ' +
74
+ '(common for “Parsing…”, “Parsing your resume”, or similar). Passing only present/timeoutMs is not enough without a filter.';
75
+ /** Strict input so unknown keys (e.g. textGone) fail parse; empty-filter checks happen in handlers / waitForSemanticCondition. */
76
+ const geometraQueryInputSchema = z.object(nodeFilterShape()).strict();
77
+ const geometraWaitForInputSchema = z.object(waitConditionShape()).strict();
51
78
  const timeoutMsInput = z.number().int().min(50).max(60_000).optional();
52
79
  const fillFieldSchema = z.discriminatedUnion('kind', [
53
80
  z.object({
@@ -98,8 +125,14 @@ const formValuesRecordSchema = z.record(z.string(), formValueSchema);
98
125
  const batchActionSchema = z.discriminatedUnion('type', [
99
126
  z.object({
100
127
  type: z.literal('click'),
101
- x: z.number(),
102
- y: z.number(),
128
+ x: z.number().optional().describe('X coordinate to click'),
129
+ y: z.number().optional().describe('Y coordinate to click'),
130
+ ...nodeFilterShape(),
131
+ index: z.number().int().min(0).optional().describe('Which matching semantic target to click after sorting top-to-bottom'),
132
+ fullyVisible: z.boolean().optional().describe('When clicking by semantic target, require full visibility before clicking (default true)'),
133
+ maxRevealSteps: z.number().int().min(1).max(12).optional().describe('Maximum reveal attempts before clicking a semantic target'),
134
+ revealTimeoutMs: timeoutMsInput.describe('Per-scroll wait timeout while revealing a semantic target'),
135
+ waitFor: z.object(waitConditionShape()).optional().describe('Optional semantic condition to wait for after the click'),
103
136
  timeoutMs: timeoutMsInput,
104
137
  }),
105
138
  z.object({
@@ -175,7 +208,7 @@ const batchActionSchema = z.discriminatedUnion('type', [
175
208
  }),
176
209
  ]);
177
210
  export function createServer() {
178
- const server = new McpServer({ name: 'geometra', version: '1.19.17' }, { capabilities: { tools: {} } });
211
+ const server = new McpServer({ name: 'geometra', version: '1.19.20' }, { capabilities: { tools: {} } });
179
212
  // ── connect ──────────────────────────────────────────────────
180
213
  server.tool('geometra_connect', `Connect to a Geometra WebSocket peer, or start \`geometra-proxy\` automatically for a normal web page.
181
214
 
@@ -183,7 +216,7 @@ export function createServer() {
183
216
 
184
217
  Use \`url\` (ws://…) only when a Geometra/native server or an already-running proxy is listening. If you accidentally pass \`https://…\` in \`url\`, MCP treats it like \`pageUrl\` and starts the proxy for you.
185
218
 
186
- Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy).`, {
219
+ Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy). Set \`returnForms: true\` and/or \`returnPageModel: true\` when you want a lower-turn startup response.`, {
187
220
  url: z
188
221
  .string()
189
222
  .optional()
@@ -218,6 +251,11 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
218
251
  .optional()
219
252
  .default(false)
220
253
  .describe('Include compact form schema discovery in the connect response so form flows can start in one turn.'),
254
+ returnPageModel: z
255
+ .boolean()
256
+ .optional()
257
+ .default(false)
258
+ .describe('Include geometra_page_model output in the connect response so exploration can start in one turn.'),
221
259
  formId: z.string().optional().describe('Optional form id filter when returnForms=true'),
222
260
  maxFields: z.number().int().min(1).max(120).optional().default(80).describe('Cap returned fields per form when returnForms=true'),
223
261
  onlyRequiredFields: z.boolean().optional().default(false).describe('Only include required fields when returnForms=true'),
@@ -226,6 +264,8 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
226
264
  includeContext: formSchemaContextInput(),
227
265
  sinceSchemaId: z.string().optional().describe('If the current schema matches this id, return changed=false without resending forms'),
228
266
  schemaFormat: formSchemaFormatInput(),
267
+ maxPrimaryActions: z.number().int().min(1).max(12).optional().default(6).describe('Cap top-level primary actions when returnPageModel=true'),
268
+ maxSectionsPerKind: z.number().int().min(1).max(16).optional().default(8).describe('Cap returned landmarks/forms/dialogs/lists per kind when returnPageModel=true'),
229
269
  detail: detailInput(),
230
270
  }, async (input) => {
231
271
  const normalized = normalizeConnectTarget({ url: input.url, pageUrl: input.pageUrl });
@@ -242,6 +282,10 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
242
282
  sinceSchemaId: input.sinceSchemaId,
243
283
  format: input.schemaFormat,
244
284
  };
285
+ const pageModelOptions = {
286
+ maxPrimaryActions: input.maxPrimaryActions,
287
+ maxSectionsPerKind: input.maxSectionsPerKind,
288
+ };
245
289
  try {
246
290
  if (target.kind === 'proxy') {
247
291
  const session = await connectThroughProxy({
@@ -261,7 +305,9 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
261
305
  autoCoercedFromUrl: target.autoCoercedFromUrl,
262
306
  detail: input.detail,
263
307
  returnForms: input.returnForms,
308
+ returnPageModel: input.returnPageModel,
264
309
  formSchema,
310
+ pageModelOptions,
265
311
  }), null, input.detail === 'verbose' ? 2 : undefined));
266
312
  }
267
313
  const session = await connect(target.wsUrl, {
@@ -277,7 +323,9 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
277
323
  autoCoercedFromUrl: false,
278
324
  detail: input.detail,
279
325
  returnForms: input.returnForms,
326
+ returnPageModel: input.returnPageModel,
280
327
  formSchema,
328
+ pageModelOptions,
281
329
  }), null, input.detail === 'verbose' ? 2 : undefined));
282
330
  }
283
331
  catch (e) {
@@ -287,7 +335,11 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
287
335
  // ── query ────────────────────────────────────────────────────
288
336
  server.tool('geometra_query', `Find elements in the current Geometra UI by stable id, role, name, text content, current value, or semantic state. Returns matching elements with their exact pixel bounds {x, y, width, height}, visible in-viewport bounds, an on-screen center point, visibility / scroll-reveal hints, role, name, value, state, and tree path.
289
337
 
290
- This is the Geometra equivalent of Playwright's locator — but instant, structured, and with no browser. Use the returned bounds to click elements or assert on layout.`, nodeFilterShape(), async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy }) => {
338
+ This is the Geometra equivalent of Playwright's locator — but instant, structured, and with no browser. Use the returned bounds to click elements or assert on layout.
339
+
340
+ Unknown parameter names are rejected (strict schema). To wait until visible text goes away (e.g. a parsing banner), use geometra_wait_for with that substring in text and present: false — there is no textGone field.`,
341
+ // SDK overload typings only list raw shapes; runtime accepts ZodObject via getZodSchemaObject().
342
+ geometraQueryInputSchema, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy }) => {
291
343
  const session = getSession();
292
344
  if (!session?.tree || !session?.layout)
293
345
  return err('Not connected. Call geometra_connect first.');
@@ -311,7 +363,7 @@ This is the Geometra equivalent of Playwright's locator — but instant, structu
311
363
  busy,
312
364
  };
313
365
  if (!hasNodeFilter(filter))
314
- return err('Provide at least one query filter (id, role, name, text, contextText, value, or state)');
366
+ return err(GEOMETRA_QUERY_FILTER_REQUIRED_MESSAGE);
315
367
  const matches = findNodes(a11y, filter);
316
368
  if (matches.length === 0) {
317
369
  return ok(`No elements found matching ${JSON.stringify(filter)}`);
@@ -321,22 +373,11 @@ This is the Geometra equivalent of Playwright's locator — but instant, structu
321
373
  });
322
374
  server.tool('geometra_wait_for', `Wait for a semantic UI condition without guessing sleep durations. Use this for slow SPA transitions, resume parsing, custom validation alerts, disabled submit buttons, and value/state confirmation before submit.
323
375
 
324
- The filter matches the same fields as geometra_query. Set \`present: false\` to wait for something to disappear (for example an alert or a "Parsing" status).`, {
325
- ...nodeFilterShape(),
326
- present: z.boolean().optional().default(true).describe('Wait for a matching node to exist (default true) or disappear'),
327
- timeoutMs: z
328
- .number()
329
- .int()
330
- .min(50)
331
- .max(60_000)
332
- .optional()
333
- .default(10_000)
334
- .describe('Maximum time to wait before returning an error (default 10000ms)'),
335
- }, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs }) => {
376
+ The filter matches the same fields as geometra_query (strict schema — unknown keys error). Set \`present: false\` to wait until **no** node matches for example Ashby/Lever-style “Parsing your resume” or any “Parsing…” banner: \`{ "text": "Parsing", "present": false }\` (tune the substring to the site). Do not use a textGone parameter; use \`text\` + \`present: false\`.`, geometraWaitForInputSchema, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs }) => {
336
377
  const session = getSession();
337
378
  if (!session?.tree || !session?.layout)
338
379
  return err('Not connected. Call geometra_connect first.');
339
- const filter = {
380
+ const filterProbe = {
340
381
  id,
341
382
  role,
342
383
  name,
@@ -352,32 +393,19 @@ The filter matches the same fields as geometra_query. Set \`present: false\` to
352
393
  required,
353
394
  busy,
354
395
  };
355
- if (!hasNodeFilter(filter))
356
- return err('Provide at least one wait filter (id, role, name, text, contextText, value, or state)');
357
- const matchesCondition = () => {
358
- if (!session.tree || !session.layout)
359
- return false;
360
- const a11y = sessionA11y(session);
361
- if (!a11y)
362
- return false;
363
- const matches = findNodes(a11y, filter);
364
- return present ? matches.length > 0 : matches.length === 0;
365
- };
366
- const startedAt = Date.now();
367
- const matched = await waitForUiCondition(session, matchesCondition, timeoutMs);
368
- const elapsedMs = Date.now() - startedAt;
369
- if (!matched) {
370
- return err(`Timed out after ${timeoutMs}ms waiting for ${present ? 'presence' : 'absence'} of ${JSON.stringify(filter)}.\nCurrent UI:\n${compactSessionSummary(session)}`);
371
- }
372
- if (!present) {
373
- return ok(`Condition satisfied after ${elapsedMs}ms: no nodes matched ${JSON.stringify(filter)}.`);
396
+ if (!hasNodeFilter(filterProbe))
397
+ return err(GEOMETRA_WAIT_FILTER_REQUIRED_MESSAGE);
398
+ const waited = await waitForSemanticCondition(session, {
399
+ filter: filterProbe,
400
+ present: present ?? true,
401
+ timeoutMs: timeoutMs ?? 10_000,
402
+ });
403
+ if (!waited.ok)
404
+ return err(waited.error);
405
+ if (!waited.value.present) {
406
+ return ok(waitConditionSuccessLine(waited.value));
374
407
  }
375
- const after = sessionA11y(session);
376
- if (!after)
377
- return ok(`Condition satisfied after ${elapsedMs}ms for ${JSON.stringify(filter)}.`);
378
- const matches = findNodes(after, filter);
379
- const result = sortA11yNodes(matches).slice(0, 8).map(node => formatNode(node, after, after.bounds));
380
- return ok(JSON.stringify(result, null, 2));
408
+ return ok(JSON.stringify(waited.value.matches.slice(0, 8), null, 2));
381
409
  });
382
410
  server.tool('geometra_fill_fields', `Fill several labeled form fields in one MCP call. This is the preferred high-level primitive for long forms.
383
411
 
@@ -653,7 +681,7 @@ Pass \`valuesById\` with field ids from \`geometra_form_schema\` for the most st
653
681
  });
654
682
  server.tool('geometra_run_actions', `Execute several Geometra actions in one MCP round trip and return one consolidated result. This is the preferred path for long, multi-step form fills where one-tool-per-field would otherwise create too much chatter.
655
683
 
656
- Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`.`, {
684
+ Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`. \`click\` steps can also carry a nested \`waitFor\` condition.`, {
657
685
  actions: z.array(batchActionSchema).min(1).max(80).describe('Ordered high-level action steps to run sequentially'),
658
686
  stopOnError: z.boolean().optional().default(true).describe('Stop at the first failing step (default true)'),
659
687
  includeSteps: z
@@ -832,10 +860,6 @@ Use the same filters as geometra_query, plus an optional match index when repeat
832
860
  const session = getSession();
833
861
  if (!session)
834
862
  return err('Not connected. Call geometra_connect first.');
835
- const matchIndex = index ?? 0;
836
- const requireFullyVisible = fullyVisible ?? true;
837
- const revealSteps = maxSteps ?? 6;
838
- const waitTimeout = timeoutMs ?? 2_500;
839
863
  const filter = {
840
864
  id,
841
865
  role,
@@ -854,55 +878,40 @@ Use the same filters as geometra_query, plus an optional match index when repeat
854
878
  };
855
879
  if (!hasNodeFilter(filter))
856
880
  return err('Provide at least one reveal filter (id, role, name, text, contextText, value, or state)');
857
- let attempts = 0;
858
- while (attempts <= revealSteps) {
859
- const a11y = sessionA11y(session);
860
- if (!a11y)
861
- return err('No UI tree available to reveal from');
862
- const matches = sortA11yNodes(findNodes(a11y, filter));
863
- if (matches.length === 0) {
864
- return err(`No elements found matching ${JSON.stringify(filter)}`);
865
- }
866
- if (matchIndex >= matches.length) {
867
- return err(`Requested reveal index ${matchIndex} but only ${matches.length} matching element(s) were found`);
868
- }
869
- const target = matches[matchIndex];
870
- const formatted = formatNode(target, a11y, a11y.bounds);
871
- const visible = requireFullyVisible ? formatted.visibility.fullyVisible : formatted.visibility.intersectsViewport;
872
- if (visible) {
873
- return ok(JSON.stringify({
874
- revealed: true,
875
- attempts,
876
- target: formatted,
877
- }, null, 2));
878
- }
879
- if (attempts === revealSteps) {
880
- return err(JSON.stringify({
881
- revealed: false,
882
- attempts,
883
- target: formatted,
884
- }, null, 2));
885
- }
886
- const deltaX = clamp(formatted.scrollHint.revealDeltaX, -Math.round(a11y.bounds.width * 0.75), Math.round(a11y.bounds.width * 0.75));
887
- let deltaY = clamp(formatted.scrollHint.revealDeltaY, -Math.round(a11y.bounds.height * 0.85), Math.round(a11y.bounds.height * 0.85));
888
- if (deltaY === 0 && !formatted.visibility.fullyVisible) {
889
- deltaY = formatted.visibility.offscreenAbove ? -Math.round(a11y.bounds.height * 0.4) : Math.round(a11y.bounds.height * 0.4);
890
- }
891
- await sendWheel(session, deltaY, {
892
- deltaX,
893
- x: formatted.center.x,
894
- y: formatted.center.y,
895
- }, waitTimeout);
896
- attempts++;
897
- }
898
- return err(`Failed to reveal ${JSON.stringify(filter)}`);
881
+ const revealed = await revealSemanticTarget(session, {
882
+ filter,
883
+ index: index ?? 0,
884
+ fullyVisible: fullyVisible ?? true,
885
+ maxSteps: maxSteps ?? 6,
886
+ timeoutMs: timeoutMs ?? 2_500,
887
+ });
888
+ if (!revealed.ok)
889
+ return err(revealed.error);
890
+ return ok(JSON.stringify({
891
+ revealed: true,
892
+ attempts: revealed.value.attempts,
893
+ target: revealed.value.target,
894
+ }, null, 2));
899
895
  });
900
896
  // ── click ────────────────────────────────────────────────────
901
- server.tool('geometra_click', `Click an element in the Geometra UI. Provide either the element's bounds (from geometra_query) or raw x,y coordinates. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
897
+ server.tool('geometra_click', `Click an element in the Geometra UI. Provide either raw x,y coordinates or a semantic target (\`id\`, \`role\`, \`name\`, \`text\`, \`contextText\`, \`value\`, or state filters). You can also attach \`waitFor\` to block on the post-click semantic state in the same call. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
902
898
 
903
899
  After clicking, returns a compact semantic delta when possible (dialogs/forms/lists/nodes changed). If nothing meaningful changed, returns a short current-UI overview.`, {
904
- x: z.number().describe('X coordinate to click (use center of element bounds from geometra_query)'),
905
- y: z.number().describe('Y coordinate to click'),
900
+ x: z.number().optional().describe('X coordinate to click (use center of element bounds from geometra_query)'),
901
+ y: z.number().optional().describe('Y coordinate to click'),
902
+ ...nodeFilterShape(),
903
+ index: z.number().int().min(0).optional().default(0).describe('Which matching semantic target to click after sorting top-to-bottom'),
904
+ fullyVisible: z.boolean().optional().default(true).describe('When clicking by semantic target, require full visibility before clicking (default true)'),
905
+ maxRevealSteps: z.number().int().min(1).max(12).optional().default(6).describe('Maximum reveal attempts before clicking a semantic target'),
906
+ revealTimeoutMs: z
907
+ .number()
908
+ .int()
909
+ .min(50)
910
+ .max(60_000)
911
+ .optional()
912
+ .default(2_500)
913
+ .describe('Per-scroll wait timeout while revealing a semantic target (default 2500ms)'),
914
+ waitFor: z.object(waitConditionShape()).optional().describe('Optional semantic condition to wait for after the click'),
906
915
  timeoutMs: z
907
916
  .number()
908
917
  .int()
@@ -911,14 +920,69 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
911
920
  .optional()
912
921
  .describe('Optional action wait timeout (use a longer value for slow submits or route transitions)'),
913
922
  detail: detailInput(),
914
- }, async ({ x, y, timeoutMs, detail }) => {
923
+ }, async ({ x, y, id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxRevealSteps, revealTimeoutMs, waitFor, timeoutMs, detail }) => {
915
924
  const session = getSession();
916
925
  if (!session)
917
926
  return err('Not connected. Call geometra_connect first.');
918
927
  const before = sessionA11y(session);
919
- const wait = await sendClick(session, x, y, timeoutMs);
928
+ const resolved = await resolveClickLocation(session, {
929
+ x,
930
+ y,
931
+ filter: {
932
+ id,
933
+ role,
934
+ name,
935
+ text,
936
+ contextText,
937
+ value,
938
+ checked,
939
+ disabled,
940
+ focused,
941
+ selected,
942
+ expanded,
943
+ invalid,
944
+ required,
945
+ busy,
946
+ },
947
+ index,
948
+ fullyVisible,
949
+ maxRevealSteps,
950
+ revealTimeoutMs,
951
+ });
952
+ if (!resolved.ok)
953
+ return err(resolved.error);
954
+ const wait = await sendClick(session, resolved.value.x, resolved.value.y, timeoutMs);
920
955
  const summary = postActionSummary(session, before, wait, detail);
921
- return ok(`Clicked at (${x}, ${y}).\n${summary}`);
956
+ const clickLine = !resolved.value.target
957
+ ? `Clicked at (${resolved.value.x}, ${resolved.value.y}).`
958
+ : `Clicked ${describeFormattedNode(resolved.value.target)} at (${resolved.value.x}, ${resolved.value.y})${resolved.value.revealAttempts && resolved.value.revealAttempts > 0 ? ` after ${resolved.value.revealAttempts} reveal step${resolved.value.revealAttempts === 1 ? '' : 's'}` : ''}.`;
959
+ const lines = [clickLine, summary];
960
+ if (waitFor) {
961
+ const postWait = await waitForSemanticCondition(session, {
962
+ filter: {
963
+ id: waitFor.id,
964
+ role: waitFor.role,
965
+ name: waitFor.name,
966
+ text: waitFor.text,
967
+ contextText: waitFor.contextText,
968
+ value: waitFor.value,
969
+ checked: waitFor.checked,
970
+ disabled: waitFor.disabled,
971
+ focused: waitFor.focused,
972
+ selected: waitFor.selected,
973
+ expanded: waitFor.expanded,
974
+ invalid: waitFor.invalid,
975
+ required: waitFor.required,
976
+ busy: waitFor.busy,
977
+ },
978
+ present: waitFor.present ?? true,
979
+ timeoutMs: waitFor.timeoutMs ?? 10_000,
980
+ });
981
+ if (!postWait.ok)
982
+ return err([...lines, postWait.error].join('\n'));
983
+ lines.push(`Post-click ${waitConditionSuccessLine(postWait.value)}`);
984
+ }
985
+ return ok(lines.filter(Boolean).join('\n'));
922
986
  });
923
987
  // ── type ─────────────────────────────────────────────────────
924
988
  server.tool('geometra_type', `Type text into the currently focused element. First click a textbox/input with geometra_click to focus it, then use this to type.
@@ -1188,7 +1252,7 @@ For a token-efficient semantic view, use geometra_snapshot (default compact). Fo
1188
1252
  return ok(JSON.stringify(session.layout, null, 2));
1189
1253
  });
1190
1254
  // ── disconnect ───────────────────────────────────────────────
1191
- server.tool('geometra_disconnect', `Disconnect from the Geometra server. Proxy-backed sessions keep the browser alive by default so the next geometra_connect can reuse it quickly; pass closeBrowser=true to fully tear down the proxy/browser.`, {
1255
+ server.tool('geometra_disconnect', `Disconnect from the Geometra server. Proxy-backed sessions keep compatible browsers alive by default so the next geometra_connect can reuse them quickly; pass closeBrowser=true to fully tear down the warm proxy/browser pool.`, {
1192
1256
  closeBrowser: z.boolean().optional().default(false).describe('Fully close the spawned proxy/browser instead of keeping it warm for reuse'),
1193
1257
  }, async ({ closeBrowser }) => {
1194
1258
  disconnect({ closeProxy: closeBrowser });
@@ -1338,12 +1402,23 @@ async function stabilizeInlineFormSchemas(session, options, opts) {
1338
1402
  }
1339
1403
  function connectResponsePayload(session, opts) {
1340
1404
  const payload = connectPayload(session, opts);
1341
- if (!opts.returnForms)
1405
+ if (!opts.returnForms && !opts.returnPageModel)
1342
1406
  return payload;
1343
- return {
1344
- ...payload,
1345
- formSchema: formSchemaResponsePayload(session, opts.formSchema ?? {}),
1346
- };
1407
+ const nextPayload = { ...payload };
1408
+ if (opts.returnForms) {
1409
+ nextPayload.formSchema = formSchemaResponsePayload(session, opts.formSchema ?? {});
1410
+ }
1411
+ if (opts.returnPageModel) {
1412
+ nextPayload.pageModel = pageModelResponsePayload(session, opts.pageModelOptions);
1413
+ }
1414
+ return nextPayload;
1415
+ }
1416
+ function pageModelResponsePayload(session, options) {
1417
+ const a11y = sessionA11y(session);
1418
+ if (!a11y) {
1419
+ return { available: false };
1420
+ }
1421
+ return buildPageModel(a11y, options);
1347
1422
  }
1348
1423
  async function ensureToolSession(target, missingConnectionMessage = 'Not connected. Call geometra_connect first.') {
1349
1424
  if (!target.url && !target.pageUrl) {
@@ -1608,6 +1683,154 @@ function waitStatusPayload(wait) {
1608
1683
  function compactFilterPayload(filter) {
1609
1684
  return Object.fromEntries(Object.entries(filter).filter(([, value]) => value !== undefined));
1610
1685
  }
1686
+ async function waitForSemanticCondition(session, options) {
1687
+ if (!hasNodeFilter(options.filter)) {
1688
+ return { ok: false, error: GEOMETRA_WAIT_FILTER_REQUIRED_MESSAGE };
1689
+ }
1690
+ const startedAt = Date.now();
1691
+ const matched = await waitForUiCondition(session, () => {
1692
+ const a11y = sessionA11y(session);
1693
+ if (!a11y)
1694
+ return false;
1695
+ const matches = findNodes(a11y, options.filter);
1696
+ return options.present ? matches.length > 0 : matches.length === 0;
1697
+ }, options.timeoutMs);
1698
+ const elapsedMs = Date.now() - startedAt;
1699
+ if (!matched) {
1700
+ return {
1701
+ ok: false,
1702
+ error: `Timed out after ${options.timeoutMs}ms waiting for ${options.present ? 'presence' : 'absence'} of ${JSON.stringify(options.filter)}.\nCurrent UI:\n${compactSessionSummary(session)}`,
1703
+ };
1704
+ }
1705
+ const after = sessionA11y(session);
1706
+ const matches = options.present && after
1707
+ ? sortA11yNodes(findNodes(after, options.filter)).slice(0, 8).map(node => formatNode(node, after, after.bounds))
1708
+ : [];
1709
+ return {
1710
+ ok: true,
1711
+ value: {
1712
+ filter: options.filter,
1713
+ present: options.present,
1714
+ elapsedMs,
1715
+ matchCount: matches.length,
1716
+ matches,
1717
+ },
1718
+ };
1719
+ }
1720
+ function waitConditionSuccessLine(result) {
1721
+ if (!result.present) {
1722
+ return `condition satisfied after ${result.elapsedMs}ms: no nodes matched ${JSON.stringify(result.filter)}.`;
1723
+ }
1724
+ return `condition satisfied after ${result.elapsedMs}ms with ${result.matchCount} matching node(s).`;
1725
+ }
1726
+ function waitConditionCompact(result) {
1727
+ return {
1728
+ present: result.present,
1729
+ elapsedMs: result.elapsedMs,
1730
+ filter: compactFilterPayload(result.filter),
1731
+ ...(result.present ? { matchCount: result.matchCount } : {}),
1732
+ };
1733
+ }
1734
+ async function revealSemanticTarget(session, options) {
1735
+ let attempts = 0;
1736
+ while (attempts <= options.maxSteps) {
1737
+ const a11y = sessionA11y(session);
1738
+ if (!a11y)
1739
+ return { ok: false, error: 'No UI tree available to reveal from' };
1740
+ const matches = sortA11yNodes(findNodes(a11y, options.filter));
1741
+ if (matches.length === 0) {
1742
+ return { ok: false, error: `No elements found matching ${JSON.stringify(options.filter)}` };
1743
+ }
1744
+ if (options.index >= matches.length) {
1745
+ return {
1746
+ ok: false,
1747
+ error: `Requested reveal index ${options.index} but only ${matches.length} matching element(s) were found`,
1748
+ };
1749
+ }
1750
+ const formatted = formatNode(matches[options.index], a11y, a11y.bounds);
1751
+ const visible = options.fullyVisible ? formatted.visibility.fullyVisible : formatted.visibility.intersectsViewport;
1752
+ if (visible) {
1753
+ return {
1754
+ ok: true,
1755
+ value: {
1756
+ attempts,
1757
+ target: formatted,
1758
+ },
1759
+ };
1760
+ }
1761
+ if (attempts === options.maxSteps) {
1762
+ return {
1763
+ ok: false,
1764
+ error: JSON.stringify({
1765
+ revealed: false,
1766
+ attempts,
1767
+ target: formatted,
1768
+ }, null, 2),
1769
+ };
1770
+ }
1771
+ const deltaX = clamp(formatted.scrollHint.revealDeltaX, -Math.round(a11y.bounds.width * 0.75), Math.round(a11y.bounds.width * 0.75));
1772
+ let deltaY = clamp(formatted.scrollHint.revealDeltaY, -Math.round(a11y.bounds.height * 0.85), Math.round(a11y.bounds.height * 0.85));
1773
+ if (deltaY === 0 && !formatted.visibility.fullyVisible) {
1774
+ deltaY = formatted.visibility.offscreenAbove ? -Math.round(a11y.bounds.height * 0.4) : Math.round(a11y.bounds.height * 0.4);
1775
+ }
1776
+ await sendWheel(session, deltaY, {
1777
+ deltaX,
1778
+ x: formatted.center.x,
1779
+ y: formatted.center.y,
1780
+ }, options.timeoutMs);
1781
+ attempts++;
1782
+ }
1783
+ return { ok: false, error: `Failed to reveal ${JSON.stringify(options.filter)}` };
1784
+ }
1785
+ async function resolveClickLocation(session, options) {
1786
+ const hasExplicitCoordinates = options.x !== undefined || options.y !== undefined;
1787
+ if (hasExplicitCoordinates) {
1788
+ if (options.x === undefined || options.y === undefined) {
1789
+ return { ok: false, error: 'Provide both x and y when clicking by coordinates' };
1790
+ }
1791
+ return {
1792
+ ok: true,
1793
+ value: {
1794
+ x: options.x,
1795
+ y: options.y,
1796
+ },
1797
+ };
1798
+ }
1799
+ if (!hasNodeFilter(options.filter)) {
1800
+ return {
1801
+ ok: false,
1802
+ error: 'Provide x and y, or at least one semantic target filter (id, role, name, text, contextText, value, or state)',
1803
+ };
1804
+ }
1805
+ const revealed = await revealSemanticTarget(session, {
1806
+ filter: options.filter,
1807
+ index: options.index ?? 0,
1808
+ fullyVisible: options.fullyVisible ?? true,
1809
+ maxSteps: options.maxRevealSteps ?? 6,
1810
+ timeoutMs: options.revealTimeoutMs ?? 2_500,
1811
+ });
1812
+ if (!revealed.ok)
1813
+ return revealed;
1814
+ return {
1815
+ ok: true,
1816
+ value: {
1817
+ x: revealed.value.target.center.x,
1818
+ y: revealed.value.target.center.y,
1819
+ target: revealed.value.target,
1820
+ revealAttempts: revealed.value.attempts,
1821
+ },
1822
+ };
1823
+ }
1824
+ function describeFormattedNode(node) {
1825
+ return `${node.role}${node.name ? ` ${JSON.stringify(node.name)}` : ''} (${node.id})`;
1826
+ }
1827
+ function compactNodeReference(node) {
1828
+ return {
1829
+ id: node.id,
1830
+ role: node.role,
1831
+ ...(node.name ? { name: node.name } : {}),
1832
+ };
1833
+ }
1611
1834
  function normalizeLookupKey(value) {
1612
1835
  return value.replace(/\s+/g, ' ').trim().toLowerCase();
1613
1836
  }
@@ -1817,12 +2040,72 @@ async function executeBatchAction(session, action, detail, includeSteps) {
1817
2040
  switch (action.type) {
1818
2041
  case 'click': {
1819
2042
  const before = sessionA11y(session);
1820
- const wait = await sendClick(session, action.x, action.y, action.timeoutMs);
2043
+ const resolved = await resolveClickLocation(session, {
2044
+ x: action.x,
2045
+ y: action.y,
2046
+ filter: {
2047
+ id: action.id,
2048
+ role: action.role,
2049
+ name: action.name,
2050
+ text: action.text,
2051
+ contextText: action.contextText,
2052
+ value: action.value,
2053
+ checked: action.checked,
2054
+ disabled: action.disabled,
2055
+ focused: action.focused,
2056
+ selected: action.selected,
2057
+ expanded: action.expanded,
2058
+ invalid: action.invalid,
2059
+ required: action.required,
2060
+ busy: action.busy,
2061
+ },
2062
+ index: action.index,
2063
+ fullyVisible: action.fullyVisible,
2064
+ maxRevealSteps: action.maxRevealSteps,
2065
+ revealTimeoutMs: action.revealTimeoutMs,
2066
+ });
2067
+ if (!resolved.ok)
2068
+ throw new Error(resolved.error);
2069
+ const wait = await sendClick(session, resolved.value.x, resolved.value.y, action.timeoutMs);
2070
+ const targetSummary = resolved.value.target
2071
+ ? `Clicked ${describeFormattedNode(resolved.value.target)} at (${resolved.value.x}, ${resolved.value.y}).`
2072
+ : `Clicked at (${resolved.value.x}, ${resolved.value.y}).`;
2073
+ let postWaitSummary;
2074
+ let postWaitCompact;
2075
+ if (action.waitFor) {
2076
+ const postWait = await waitForSemanticCondition(session, {
2077
+ filter: {
2078
+ id: action.waitFor.id,
2079
+ role: action.waitFor.role,
2080
+ name: action.waitFor.name,
2081
+ text: action.waitFor.text,
2082
+ contextText: action.waitFor.contextText,
2083
+ value: action.waitFor.value,
2084
+ checked: action.waitFor.checked,
2085
+ disabled: action.waitFor.disabled,
2086
+ focused: action.waitFor.focused,
2087
+ selected: action.waitFor.selected,
2088
+ expanded: action.waitFor.expanded,
2089
+ invalid: action.waitFor.invalid,
2090
+ required: action.waitFor.required,
2091
+ busy: action.waitFor.busy,
2092
+ },
2093
+ present: action.waitFor.present ?? true,
2094
+ timeoutMs: action.waitFor.timeoutMs ?? 10_000,
2095
+ });
2096
+ if (!postWait.ok) {
2097
+ throw new Error(`Post-click wait failed after ${targetSummary.toLowerCase()}\n${postWait.error}`);
2098
+ }
2099
+ postWaitSummary = `Post-click ${waitConditionSuccessLine(postWait.value)}`;
2100
+ postWaitCompact = waitConditionCompact(postWait.value);
2101
+ }
1821
2102
  return {
1822
- summary: `Clicked at (${action.x}, ${action.y}).\n${postActionSummary(session, before, wait, detail)}`,
2103
+ summary: [targetSummary, postActionSummary(session, before, wait, detail), postWaitSummary].filter(Boolean).join('\n'),
1823
2104
  compact: {
1824
- at: { x: action.x, y: action.y },
2105
+ at: { x: resolved.value.x, y: resolved.value.y },
2106
+ ...(resolved.value.target ? { target: compactNodeReference(resolved.value.target), revealSteps: resolved.value.revealAttempts ?? 0 } : {}),
1825
2107
  ...waitStatusPayload(wait),
2108
+ ...(postWaitCompact ? { postWait: postWaitCompact } : {}),
1826
2109
  },
1827
2110
  };
1828
2111
  }
@@ -1946,80 +2229,44 @@ async function executeBatchAction(session, action, detail, includeSteps) {
1946
2229
  case 'wait_for': {
1947
2230
  if (!session.tree || !session.layout)
1948
2231
  throw new Error('Not connected. Call geometra_connect first.');
1949
- const filter = {
1950
- id: action.id,
1951
- role: action.role,
1952
- name: action.name,
1953
- text: action.text,
1954
- contextText: action.contextText,
1955
- value: action.value,
1956
- checked: action.checked,
1957
- disabled: action.disabled,
1958
- focused: action.focused,
1959
- selected: action.selected,
1960
- expanded: action.expanded,
1961
- invalid: action.invalid,
1962
- required: action.required,
1963
- busy: action.busy,
1964
- };
1965
- if (!hasNodeFilter(filter)) {
1966
- throw new Error('wait_for step requires at least one filter');
1967
- }
1968
- const present = action.present ?? true;
1969
- const timeoutMs = action.timeoutMs ?? 10_000;
1970
- const startedAt = Date.now();
1971
- const matched = await waitForUiCondition(session, () => {
1972
- const a11y = sessionA11y(session);
1973
- if (!a11y)
1974
- return false;
1975
- const matches = findNodes(a11y, filter);
1976
- return present ? matches.length > 0 : matches.length === 0;
1977
- }, timeoutMs);
1978
- const elapsedMs = Date.now() - startedAt;
1979
- if (!matched) {
1980
- throw new Error(`Timed out after ${timeoutMs}ms waiting for ${present ? 'presence' : 'absence'} of ${JSON.stringify(filter)}`);
1981
- }
1982
- if (!present) {
1983
- return {
1984
- summary: `Condition satisfied after ${elapsedMs}ms: no nodes matched ${JSON.stringify(filter)}.`,
1985
- compact: {
1986
- present,
1987
- elapsedMs,
1988
- filter: compactFilterPayload(filter),
1989
- },
1990
- };
2232
+ const waited = await waitForSemanticCondition(session, {
2233
+ filter: {
2234
+ id: action.id,
2235
+ role: action.role,
2236
+ name: action.name,
2237
+ text: action.text,
2238
+ contextText: action.contextText,
2239
+ value: action.value,
2240
+ checked: action.checked,
2241
+ disabled: action.disabled,
2242
+ focused: action.focused,
2243
+ selected: action.selected,
2244
+ expanded: action.expanded,
2245
+ invalid: action.invalid,
2246
+ required: action.required,
2247
+ busy: action.busy,
2248
+ },
2249
+ present: action.present ?? true,
2250
+ timeoutMs: action.timeoutMs ?? 10_000,
2251
+ });
2252
+ if (!waited.ok) {
2253
+ throw new Error(waited.error);
1991
2254
  }
1992
- const after = sessionA11y(session);
1993
- if (!after) {
2255
+ if (!waited.value.present) {
1994
2256
  return {
1995
- summary: `Condition satisfied after ${elapsedMs}ms for ${JSON.stringify(filter)}.`,
1996
- compact: {
1997
- present,
1998
- elapsedMs,
1999
- filter: compactFilterPayload(filter),
2000
- },
2257
+ summary: waitConditionSuccessLine(waited.value),
2258
+ compact: waitConditionCompact(waited.value),
2001
2259
  };
2002
2260
  }
2003
- const matches = findNodes(after, filter);
2004
2261
  if (detail === 'verbose') {
2005
2262
  return {
2006
- summary: JSON.stringify(sortA11yNodes(matches).slice(0, 8).map(node => formatNode(node, after, after.bounds)), null, 2),
2007
- compact: {
2008
- present,
2009
- elapsedMs,
2010
- matchCount: matches.length,
2011
- filter: compactFilterPayload(filter),
2012
- },
2263
+ summary: JSON.stringify(waited.value.matches, null, 2),
2264
+ compact: waitConditionCompact(waited.value),
2013
2265
  };
2014
2266
  }
2015
2267
  return {
2016
- summary: `Condition satisfied after ${elapsedMs}ms with ${matches.length} matching node(s).`,
2017
- compact: {
2018
- present,
2019
- elapsedMs,
2020
- matchCount: matches.length,
2021
- filter: compactFilterPayload(filter),
2022
- },
2268
+ summary: waitConditionSuccessLine(waited.value),
2269
+ compact: waitConditionCompact(waited.value),
2023
2270
  };
2024
2271
  }
2025
2272
  case 'fill_fields': {