@geometra/mcp 1.19.22 → 1.19.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -1,8 +1,9 @@
1
1
  import { createHash } from 'node:crypto';
2
+ import { performance } from 'node:perf_hooks';
2
3
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
4
  import { z } from 'zod';
4
5
  import { formatConnectFailureMessage, isHttpUrl, normalizeConnectTarget } from './connect-utils.js';
5
- import { connect, connectThroughProxy, disconnect, getSession, sendClick, sendFillFields, sendType, sendKey, sendFileUpload, sendFieldText, sendFieldChoice, sendListboxPick, sendSelectOption, sendSetChecked, sendWheel, buildA11yTree, buildCompactUiIndex, buildFormRequiredSnapshot, buildPageModel, buildFormSchemas, expandPageSection, buildUiDelta, hasUiDelta, nodeIdForPath, summarizeCompactIndex, summarizePageModel, summarizeUiDelta, waitForUiCondition, } from './session.js';
6
+ import { connect, connectThroughProxy, disconnect, getSession, prewarmProxy, sendClick, sendFillFields, sendType, sendKey, sendFileUpload, sendFieldText, sendFieldChoice, sendListboxPick, sendSelectOption, sendSetChecked, sendWheel, buildA11yTree, buildCompactUiIndex, buildFormRequiredSnapshot, buildPageModel, buildFormSchemas, expandPageSection, buildUiDelta, hasUiDelta, nodeIdForPath, nodeContextForNode, summarizeCompactIndex, summarizePageModel, summarizeUiDelta, waitForUiCondition, } from './session.js';
6
7
  function checkedStateInput() {
7
8
  return z
8
9
  .union([z.boolean(), z.literal('mixed')])
@@ -11,10 +12,10 @@ function checkedStateInput() {
11
12
  }
12
13
  function detailInput() {
13
14
  return z
14
- .enum(['minimal', 'verbose'])
15
+ .enum(['terse', 'minimal', 'verbose'])
15
16
  .optional()
16
17
  .default('minimal')
17
- .describe('`minimal` (default) returns terse action summaries. Use `verbose` for a fuller current-UI fallback.');
18
+ .describe('`terse` returns compact machine-friendly JSON. `minimal` (default) returns short human-readable summaries. `verbose` adds fuller fallback context.');
18
19
  }
19
20
  function formSchemaFormatInput() {
20
21
  return z
@@ -23,6 +24,13 @@ function formSchemaFormatInput() {
23
24
  .default('compact')
24
25
  .describe('`compact` (default) returns readable JSON fields. Use `packed` for the smallest schema payload with short keys.');
25
26
  }
27
+ function pageModelModeInput() {
28
+ return z
29
+ .enum(['inline', 'deferred'])
30
+ .optional()
31
+ .default('inline')
32
+ .describe('When returnPageModel=true, `inline` includes the full page model in the connect response. `deferred` returns connect as soon as the transport is ready and lets the caller fetch geometra_page_model separately.');
33
+ }
26
34
  function formSchemaContextInput() {
27
35
  return z
28
36
  .enum(['auto', 'always', 'none'])
@@ -37,6 +45,9 @@ function nodeFilterShape() {
37
45
  name: z.string().optional().describe('Accessible name to match (exact or substring)'),
38
46
  text: z.string().optional().describe('Text content to search for (substring match)'),
39
47
  contextText: z.string().optional().describe('Ancestor / prompt text to disambiguate repeated controls with the same visible name'),
48
+ promptText: z.string().optional().describe('Nearby question/prompt text to disambiguate repeated controls or actions'),
49
+ sectionText: z.string().optional().describe('Containing section/landmark/form/dialog text to disambiguate repeated controls or actions'),
50
+ itemText: z.string().optional().describe('Nearby card/row/item label to disambiguate repeated actions like “Add to cart” or “Open incident”'),
40
51
  value: z.string().optional().describe('Displayed / current field value to match (substring match)'),
41
52
  checked: checkedStateInput(),
42
53
  disabled: z.boolean().optional().describe('Match disabled state'),
@@ -66,15 +77,22 @@ function waitConditionShape() {
66
77
  .describe('Maximum time to wait before returning an error (default 10000ms)'),
67
78
  };
68
79
  }
69
- const GEOMETRA_QUERY_FILTER_REQUIRED_MESSAGE = 'Provide at least one filter (id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
80
+ const GEOMETRA_QUERY_FILTER_REQUIRED_MESSAGE = 'Provide at least one filter (id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
70
81
  'This tool uses a strict schema: unknown keys are rejected. There is no textGone parameter — use text for substring matching. ' +
71
82
  'To wait until text disappears from the UI, use geometra_wait_for with text and present: false, or geometra_wait_for_resume_parse for typical resume “Parsing…” banners.';
72
- const GEOMETRA_WAIT_FILTER_REQUIRED_MESSAGE = 'Provide at least one semantic filter (id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
83
+ const GEOMETRA_WAIT_FILTER_REQUIRED_MESSAGE = 'Provide at least one semantic filter (id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, or busy). ' +
73
84
  'This tool uses a strict schema: unknown keys are rejected. There is no textGone parameter — use text with a distinctive substring and present: false to wait until that text is gone ' +
74
85
  '(common for “Parsing…”, “Parsing your resume”, or similar). Passing only present/timeoutMs is not enough without a filter.';
75
86
  /** Strict input so unknown keys (e.g. textGone) fail parse; empty-filter checks happen in handlers / waitForSemanticCondition. */
76
- const geometraQueryInputSchema = z.object(nodeFilterShape()).strict();
77
- const geometraWaitForInputSchema = z.object(waitConditionShape()).strict();
87
+ const geometraQueryInputSchema = z.object({
88
+ ...nodeFilterShape(),
89
+ maxResults: z.number().int().min(1).max(50).optional().describe('Optional cap on returned matches; terse mode defaults to 8'),
90
+ detail: detailInput(),
91
+ }).strict();
92
+ const geometraWaitForInputSchema = z.object({
93
+ ...waitConditionShape(),
94
+ detail: detailInput(),
95
+ }).strict();
78
96
  /** Same upper bound as geometra_wait_for; resume uploads often need the full minute. */
79
97
  const geometraWaitForResumeParseInputSchema = z
80
98
  .object({
@@ -271,7 +289,7 @@ export function createServer() {
271
289
 
272
290
  Use \`url\` (ws://…) only when a Geometra/native server or an already-running proxy is listening. If you accidentally pass \`https://…\` in \`url\`, MCP treats it like \`pageUrl\` and starts the proxy for you.
273
291
 
274
- Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy). Set \`returnForms: true\` and/or \`returnPageModel: true\` when you want a lower-turn startup response.`, {
292
+ Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy). Set \`returnForms: true\` and/or \`returnPageModel: true\` when you want a lower-turn startup response. When connect first-response latency matters more than inlining the page model, pair \`returnPageModel: true\` with \`pageModelMode: "deferred"\` and call \`geometra_page_model\` next.`, {
275
293
  url: z
276
294
  .string()
277
295
  .optional()
@@ -311,6 +329,7 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
311
329
  .optional()
312
330
  .default(false)
313
331
  .describe('Include geometra_page_model output in the connect response so exploration can start in one turn.'),
332
+ pageModelMode: pageModelModeInput(),
314
333
  formId: z.string().optional().describe('Optional form id filter when returnForms=true'),
315
334
  maxFields: z.number().int().min(1).max(120).optional().default(80).describe('Cap returned fields per form when returnForms=true'),
316
335
  onlyRequiredFields: z.boolean().optional().default(false).describe('Only include required fields when returnForms=true'),
@@ -341,6 +360,10 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
341
360
  maxPrimaryActions: input.maxPrimaryActions,
342
361
  maxSectionsPerKind: input.maxSectionsPerKind,
343
362
  };
363
+ const deferInlinePageModel = input.returnPageModel
364
+ && input.pageModelMode === 'deferred'
365
+ && !input.returnForms
366
+ && input.detail !== 'verbose';
344
367
  try {
345
368
  if (target.kind === 'proxy') {
346
369
  const session = await connectThroughProxy({
@@ -350,6 +373,8 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
350
373
  width: input.width,
351
374
  height: input.height,
352
375
  slowMo: input.slowMo,
376
+ awaitInitialFrame: deferInlinePageModel ? false : undefined,
377
+ eagerInitialExtract: deferInlinePageModel ? true : undefined,
353
378
  });
354
379
  if (input.returnForms) {
355
380
  await stabilizeInlineFormSchemas(session, formSchema);
@@ -361,6 +386,7 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
361
386
  detail: input.detail,
362
387
  returnForms: input.returnForms,
363
388
  returnPageModel: input.returnPageModel,
389
+ pageModelMode: input.pageModelMode,
364
390
  formSchema,
365
391
  pageModelOptions,
366
392
  }), null, input.detail === 'verbose' ? 2 : undefined));
@@ -368,6 +394,7 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
368
394
  const session = await connect(target.wsUrl, {
369
395
  width: input.width,
370
396
  height: input.height,
397
+ awaitInitialFrame: deferInlinePageModel ? false : undefined,
371
398
  });
372
399
  if (input.returnForms) {
373
400
  await stabilizeInlineFormSchemas(session, formSchema);
@@ -379,6 +406,7 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
379
406
  detail: input.detail,
380
407
  returnForms: input.returnForms,
381
408
  returnPageModel: input.returnPageModel,
409
+ pageModelMode: input.pageModelMode,
382
410
  formSchema,
383
411
  pageModelOptions,
384
412
  }), null, input.detail === 'verbose' ? 2 : undefined));
@@ -387,6 +415,43 @@ Chromium opens **visible** by default unless \`headless: true\`. File upload / w
387
415
  return err(`Failed to connect: ${formatConnectFailureMessage(e, target)}`);
388
416
  }
389
417
  });
418
+ // ── prepare browser ──────────────────────────────────────────
419
+ server.tool('geometra_prepare_browser', `Pre-launch and pre-navigate a reusable geometra-proxy browser for a normal web page without creating an active MCP session.
420
+
421
+ Use this when you can prepare ahead of the user-facing task so the next \`geometra_connect\` or one-call \`geometra_run_actions\` on the same \`pageUrl\` / viewport / headless settings skips the cold browser launch.`, {
422
+ pageUrl: z
423
+ .string()
424
+ .url()
425
+ .refine(isHttpUrl, 'pageUrl must use http:// or https://')
426
+ .describe('HTTP(S) page to open and keep warm for the next proxy-backed task.'),
427
+ port: z
428
+ .number()
429
+ .int()
430
+ .positive()
431
+ .max(65535)
432
+ .optional()
433
+ .describe('Preferred local port for spawned proxy (default: ephemeral OS-assigned port).'),
434
+ headless: z
435
+ .boolean()
436
+ .optional()
437
+ .describe('Run Chromium headless (default false = visible window).'),
438
+ width: z.number().int().positive().optional().describe('Viewport width for the warmed browser.'),
439
+ height: z.number().int().positive().optional().describe('Viewport height for the warmed browser.'),
440
+ slowMo: z
441
+ .number()
442
+ .int()
443
+ .nonnegative()
444
+ .optional()
445
+ .describe('Playwright slowMo (ms) for the warmed browser.'),
446
+ }, async ({ pageUrl, port, headless, width, height, slowMo }) => {
447
+ try {
448
+ const prepared = await prewarmProxy({ pageUrl, port, headless, width, height, slowMo });
449
+ return ok(JSON.stringify(prepared));
450
+ }
451
+ catch (e) {
452
+ return err(`Failed to prepare browser: ${e instanceof Error ? e.message : String(e)}`);
453
+ }
454
+ });
390
455
  // ── query ────────────────────────────────────────────────────
391
456
  server.registerTool('geometra_query', {
392
457
  description: `Find elements in the current Geometra UI by stable id, role, name, text content, current value, or semantic state. Returns matching elements with their exact pixel bounds {x, y, width, height}, visible in-viewport bounds, an on-screen center point, visibility / scroll-reveal hints, role, name, value, state, and tree path.
@@ -395,11 +460,11 @@ This is the Geometra equivalent of Playwright's locator — but instant, structu
395
460
 
396
461
  Unknown parameter names are rejected (strict schema). To wait until visible text goes away (e.g. a parsing banner), use geometra_wait_for with that substring in text and present: false — there is no textGone field.`,
397
462
  inputSchema: geometraQueryInputSchema,
398
- }, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy }) => {
463
+ }, async ({ id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, maxResults, detail }) => {
399
464
  const session = getSession();
400
- if (!session?.tree || !session?.layout)
465
+ if (!session)
401
466
  return err('Not connected. Call geometra_connect first.');
402
- const a11y = sessionA11y(session);
467
+ const a11y = await sessionA11yWhenReady(session);
403
468
  if (!a11y)
404
469
  return err('No UI tree available');
405
470
  const filter = {
@@ -408,6 +473,9 @@ Unknown parameter names are rejected (strict schema). To wait until visible text
408
473
  name,
409
474
  text,
410
475
  contextText,
476
+ promptText,
477
+ sectionText,
478
+ itemText,
411
479
  value,
412
480
  checked,
413
481
  disabled,
@@ -422,19 +490,70 @@ Unknown parameter names are rejected (strict schema). To wait until visible text
422
490
  return err(GEOMETRA_QUERY_FILTER_REQUIRED_MESSAGE);
423
491
  const matches = findNodes(a11y, filter);
424
492
  if (matches.length === 0) {
493
+ if (detail === 'terse') {
494
+ return ok(JSON.stringify({ matchCount: 0, filter: compactFilterPayload(filter) }));
495
+ }
425
496
  return ok(`No elements found matching ${JSON.stringify(filter)}`);
426
497
  }
427
- const result = sortA11yNodes(matches).map(node => formatNode(node, a11y, a11y.bounds));
498
+ const formatted = sortA11yNodes(matches).map(node => formatNode(node, a11y, a11y.bounds));
499
+ if (detail === 'terse') {
500
+ const limited = formatted.slice(0, maxResults ?? 8);
501
+ return ok(JSON.stringify({
502
+ matchCount: formatted.length,
503
+ matches: limited.map(compactFormattedNode),
504
+ }));
505
+ }
506
+ const result = typeof maxResults === 'number' ? formatted.slice(0, maxResults) : formatted;
428
507
  return ok(JSON.stringify(result, null, 2));
429
508
  });
509
+ server.tool('geometra_find_action', `Resolve a clickable action by action label plus optional section, prompt, or item/card text. This is a narrower, lower-token path for repeated actions like "Open incident" in a queue row or "Add to cart" inside a product card.
510
+
511
+ Use this when geometra_page_model tells you the page shape, but you want one direct semantic action target instead of expanding a whole section.`, {
512
+ name: z.string().describe('Action label / accessible name to match'),
513
+ role: z.enum(['button', 'link']).optional().describe('Optional action role hint (button or link)'),
514
+ sectionText: z.string().optional().describe('Containing section/landmark/form/dialog text to disambiguate repeated actions'),
515
+ promptText: z.string().optional().describe('Nearby question/prompt text to disambiguate repeated actions'),
516
+ itemText: z.string().optional().describe('Nearby card/row/item label to disambiguate repeated actions'),
517
+ maxResults: z.number().int().min(1).max(12).optional().default(6).describe('Maximum number of matches to return'),
518
+ detail: detailInput(),
519
+ }, async ({ name, role, sectionText, promptText, itemText, maxResults, detail }) => {
520
+ const session = getSession();
521
+ if (!session)
522
+ return err('Not connected. Call geometra_connect first.');
523
+ const a11y = await sessionA11yWhenReady(session);
524
+ if (!a11y)
525
+ return err('No UI tree available');
526
+ const filter = {
527
+ ...(role ? { role } : {}),
528
+ name,
529
+ ...(sectionText ? { sectionText } : {}),
530
+ ...(promptText ? { promptText } : {}),
531
+ ...(itemText ? { itemText } : {}),
532
+ };
533
+ const matches = sortA11yNodes(findNodes(a11y, filter).filter(node => node.focusable && (node.role === 'button' || node.role === 'link')));
534
+ if (matches.length === 0) {
535
+ if (detail === 'terse') {
536
+ return ok(JSON.stringify({ matchCount: 0, filter: compactFilterPayload(filter) }));
537
+ }
538
+ return ok(`No actions found matching ${JSON.stringify(filter)}`);
539
+ }
540
+ const formatted = matches.slice(0, maxResults).map(node => formatNode(node, a11y, a11y.bounds));
541
+ if (detail === 'terse') {
542
+ return ok(JSON.stringify({
543
+ matchCount: matches.length,
544
+ matches: formatted.map(compactFormattedNode),
545
+ }));
546
+ }
547
+ return ok(JSON.stringify(formatted));
548
+ });
430
549
  server.registerTool('geometra_wait_for', {
431
550
  description: `Wait for a semantic UI condition without guessing sleep durations. Use this for slow SPA transitions, resume parsing, custom validation alerts, disabled submit buttons, and value/state confirmation before submit.
432
551
 
433
552
  The filter matches the same fields as geometra_query (strict schema — unknown keys error). Set \`present: false\` to wait until **no** node matches — for example Ashby/Lever-style “Parsing your resume” or any “Parsing…” banner: \`{ "text": "Parsing", "present": false }\` (tune the substring to the site). Do not use a textGone parameter; use \`text\` + \`present: false\`, or \`geometra_wait_for_resume_parse\` for the usual post-upload parsing banner.`,
434
553
  inputSchema: geometraWaitForInputSchema,
435
- }, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs }) => {
554
+ }, async ({ id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, present, timeoutMs, detail }) => {
436
555
  const session = getSession();
437
- if (!session?.tree || !session?.layout)
556
+ if (!session)
438
557
  return err('Not connected. Call geometra_connect first.');
439
558
  const filterProbe = {
440
559
  id,
@@ -442,6 +561,9 @@ The filter matches the same fields as geometra_query (strict schema — unknown
442
561
  name,
443
562
  text,
444
563
  contextText,
564
+ promptText,
565
+ sectionText,
566
+ itemText,
445
567
  value,
446
568
  checked,
447
569
  disabled,
@@ -461,6 +583,16 @@ The filter matches the same fields as geometra_query (strict schema — unknown
461
583
  });
462
584
  if (!waited.ok)
463
585
  return err(waited.error);
586
+ if (detail === 'terse') {
587
+ const compact = waitConditionCompact(waited.value);
588
+ const matches = waited.value.matches
589
+ .slice(0, 3)
590
+ .map(match => compactFormattedNode(match));
591
+ return ok(JSON.stringify({
592
+ ...compact,
593
+ ...(matches.length > 0 ? { matches } : {}),
594
+ }));
595
+ }
464
596
  if (!waited.value.present) {
465
597
  return ok(waitConditionSuccessLine(waited.value));
466
598
  }
@@ -473,7 +605,7 @@ Equivalent to \`geometra_wait_for\` with \`present: false\` and \`text\` set to
473
605
  inputSchema: geometraWaitForResumeParseInputSchema,
474
606
  }, async ({ text, timeoutMs }) => {
475
607
  const session = getSession();
476
- if (!session?.tree || !session?.layout)
608
+ if (!session)
477
609
  return err('Not connected. Call geometra_connect first.');
478
610
  const filter = { text };
479
611
  const waited = await waitForSemanticCondition(session, {
@@ -508,6 +640,30 @@ Use \`kind: "text"\` for textboxes / textareas, \`"choice"\` for selects / combo
508
640
  const resolvedFields = resolveFillFieldInputs(session, fields);
509
641
  if (!resolvedFields.ok)
510
642
  return err(resolvedFields.error);
643
+ if (!includeSteps) {
644
+ try {
645
+ const batched = await tryBatchedResolvedFields(session, resolvedFields.fields, detail);
646
+ if (batched.ok) {
647
+ const payload = {
648
+ completed: true,
649
+ execution: 'batched',
650
+ finalSource: batched.finalSource,
651
+ fieldCount: resolvedFields.fields.length,
652
+ successCount: resolvedFields.fields.length,
653
+ errorCount: 0,
654
+ final: batched.final,
655
+ };
656
+ if (failOnInvalid && batched.invalidRemaining > 0) {
657
+ return err(JSON.stringify(payload, null, detail === 'verbose' ? 2 : undefined));
658
+ }
659
+ return ok(JSON.stringify(payload, null, detail === 'verbose' ? 2 : undefined));
660
+ }
661
+ }
662
+ catch (e) {
663
+ const message = e instanceof Error ? e.message : String(e);
664
+ return err(message);
665
+ }
666
+ }
511
667
  const steps = [];
512
668
  let stoppedAt;
513
669
  for (let index = 0; index < resolvedFields.fields.length; index++) {
@@ -762,7 +918,14 @@ Pass \`valuesById\` with field ids from \`geometra_form_schema\` for the most st
762
918
  });
763
919
  server.tool('geometra_run_actions', `Execute several Geometra actions in one MCP round trip and return one consolidated result. This is the preferred path for long, multi-step form fills where one-tool-per-field would otherwise create too much chatter.
764
920
 
765
- Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`. \`click\` steps can also carry a nested \`waitFor\` condition.`, {
921
+ Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_listbox_option\`, \`select_option\`, \`set_checked\`, \`wheel\`, \`wait_for\`, and \`fill_fields\`. \`click\` steps can also carry a nested \`waitFor\` condition. Pass \`pageUrl\` / \`url\` to auto-connect so an entire flow can run in one MCP call.`, {
922
+ url: z.string().optional().describe('Optional target URL. Use a ws:// Geometra server URL or an http(s) page URL to auto-connect before running actions.'),
923
+ pageUrl: z.string().optional().describe('Optional http(s) page URL to auto-connect before running actions. Prefer this over url for browser pages.'),
924
+ port: z.number().int().min(0).max(65535).optional().describe('Preferred local port for an auto-spawned proxy (default: ephemeral OS-assigned port).'),
925
+ headless: z.boolean().optional().describe('Run Chromium headless when auto-spawning a proxy (default false = visible window).'),
926
+ width: z.number().int().positive().optional().describe('Viewport width for auto-connected sessions.'),
927
+ height: z.number().int().positive().optional().describe('Viewport height for auto-connected sessions.'),
928
+ slowMo: z.number().int().nonnegative().optional().describe('Playwright slowMo (ms) when auto-spawning a proxy.'),
766
929
  actions: z.array(batchActionSchema).min(1).max(80).describe('Ordered high-level action steps to run sequentially'),
767
930
  stopOnError: z.boolean().optional().default(true).describe('Stop at the first failing step (default true)'),
768
931
  includeSteps: z
@@ -770,24 +933,66 @@ Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_lis
770
933
  .optional()
771
934
  .default(true)
772
935
  .describe('Include per-action step results in the JSON payload (default true). Set false for the smallest batch response.'),
936
+ output: z.enum(['full', 'final']).optional().default('full').describe('`full` (default) returns counts and optional step listings. `final` keeps only completion state plus final semantic signals.'),
773
937
  detail: detailInput(),
774
- }, async ({ actions, stopOnError, includeSteps, detail }) => {
775
- const session = getSession();
776
- if (!session)
777
- return err('Not connected. Call geometra_connect first.');
938
+ }, async ({ url, pageUrl, port, headless, width, height, slowMo, actions, stopOnError, includeSteps, output, detail }) => {
939
+ const resolved = await ensureToolSession({
940
+ url,
941
+ pageUrl,
942
+ port,
943
+ headless,
944
+ width,
945
+ height,
946
+ slowMo,
947
+ awaitInitialFrame: canDeferInitialFrameForRunActions(actions) ? false : undefined,
948
+ }, 'Not connected. Call geometra_connect first, or pass pageUrl/url to geometra_run_actions.');
949
+ if (!resolved.ok)
950
+ return err(resolved.error);
951
+ const session = resolved.session;
952
+ const connection = autoConnectionPayload(resolved);
778
953
  const steps = [];
779
954
  let stoppedAt;
780
955
  for (let index = 0; index < actions.length; index++) {
781
956
  const action = actions[index];
957
+ const startedAt = performance.now();
958
+ let uiTreeWaitMs = 0;
782
959
  try {
960
+ if (actionNeedsUiTree(action) && (!session.tree || !session.layout)) {
961
+ const uiTreeWaitStartedAt = performance.now();
962
+ await waitForUiCondition(session, () => Boolean(session.tree && session.layout), 2_000);
963
+ uiTreeWaitMs = performance.now() - uiTreeWaitStartedAt;
964
+ }
783
965
  const result = await executeBatchAction(session, action, detail, includeSteps);
966
+ const elapsedMs = Number((performance.now() - startedAt).toFixed(1));
784
967
  steps.push(detail === 'verbose'
785
- ? { index, type: action.type, ok: true, summary: result.summary }
786
- : { index, type: action.type, ok: true, ...result.compact });
968
+ ? {
969
+ index,
970
+ type: action.type,
971
+ ok: true,
972
+ elapsedMs,
973
+ ...(uiTreeWaitMs > 0 ? { uiTreeWaitMs: Number(uiTreeWaitMs.toFixed(1)) } : {}),
974
+ summary: result.summary,
975
+ }
976
+ : {
977
+ index,
978
+ type: action.type,
979
+ ok: true,
980
+ elapsedMs,
981
+ ...(uiTreeWaitMs > 0 ? { uiTreeWaitMs: Number(uiTreeWaitMs.toFixed(1)) } : {}),
982
+ ...result.compact,
983
+ });
787
984
  }
788
985
  catch (e) {
789
986
  const message = e instanceof Error ? e.message : String(e);
790
- steps.push({ index, type: action.type, ok: false, error: message });
987
+ const elapsedMs = Number((performance.now() - startedAt).toFixed(1));
988
+ steps.push({
989
+ index,
990
+ type: action.type,
991
+ ok: false,
992
+ elapsedMs,
993
+ ...(uiTreeWaitMs > 0 ? { uiTreeWaitMs: Number(uiTreeWaitMs.toFixed(1)) } : {}),
994
+ error: message,
995
+ });
791
996
  if (stopOnError) {
792
997
  stoppedAt = index;
793
998
  break;
@@ -797,15 +1002,23 @@ Supported step types: \`click\`, \`type\`, \`key\`, \`upload_files\`, \`pick_lis
797
1002
  const after = sessionA11y(session);
798
1003
  const successCount = steps.filter(step => step.ok === true).length;
799
1004
  const errorCount = steps.length - successCount;
800
- const payload = {
801
- completed: stoppedAt === undefined && steps.length === actions.length,
802
- stepCount: actions.length,
803
- successCount,
804
- errorCount,
805
- ...(includeSteps ? { steps } : {}),
806
- ...(stoppedAt !== undefined ? { stoppedAt } : {}),
807
- ...(after ? { final: sessionSignalsPayload(collectSessionSignals(after), detail) } : {}),
808
- };
1005
+ const payload = output === 'final'
1006
+ ? {
1007
+ ...connection,
1008
+ completed: stoppedAt === undefined && steps.length === actions.length,
1009
+ ...(stoppedAt !== undefined ? { stoppedAt } : {}),
1010
+ ...(after ? { final: sessionSignalsPayload(collectSessionSignals(after), detail) } : {}),
1011
+ }
1012
+ : {
1013
+ ...connection,
1014
+ completed: stoppedAt === undefined && steps.length === actions.length,
1015
+ stepCount: actions.length,
1016
+ successCount,
1017
+ errorCount,
1018
+ ...(includeSteps ? { steps } : {}),
1019
+ ...(stoppedAt !== undefined ? { stoppedAt } : {}),
1020
+ ...(after ? { final: sessionSignalsPayload(collectSessionSignals(after), detail) } : {}),
1021
+ };
809
1022
  return ok(JSON.stringify(payload, null, detail === 'verbose' ? 2 : undefined));
810
1023
  });
811
1024
  // ── page model ────────────────────────────────────────────────
@@ -830,9 +1043,9 @@ Use this first on normal HTML pages when you want to understand the page shape w
830
1043
  .describe('Cap returned landmarks/forms/dialogs/lists per kind (default 8).'),
831
1044
  }, async ({ maxPrimaryActions, maxSectionsPerKind }) => {
832
1045
  const session = getSession();
833
- if (!session?.tree || !session?.layout)
1046
+ if (!session)
834
1047
  return err('Not connected. Call geometra_connect first.');
835
- const a11y = sessionA11y(session);
1048
+ const a11y = await sessionA11yWhenReady(session);
836
1049
  if (!a11y)
837
1050
  return err('No UI tree available');
838
1051
  const model = buildPageModel(a11y, { maxPrimaryActions, maxSectionsPerKind });
@@ -861,6 +1074,9 @@ Unlike geometra_expand_section, this collapses repeated radio/button groups into
861
1074
  if (!resolved.ok)
862
1075
  return err(resolved.error);
863
1076
  const session = resolved.session;
1077
+ if (!(await ensureSessionUiTree(session, 4_000))) {
1078
+ return err('Timed out waiting for the initial UI tree after connect.');
1079
+ }
864
1080
  const payload = formSchemaResponsePayload(session, {
865
1081
  formId,
866
1082
  maxFields,
@@ -898,9 +1114,9 @@ Use this after geometra_page_model when you know which form/dialog/list/landmark
898
1114
  includeBounds: z.boolean().optional().default(false).describe('Include bounds for fields/actions/headings/items'),
899
1115
  }, async ({ id, maxHeadings, maxFields, fieldOffset, onlyRequiredFields, onlyInvalidFields, maxActions, actionOffset, maxLists, listOffset, maxItems, itemOffset, maxTextPreview, includeBounds, }) => {
900
1116
  const session = getSession();
901
- if (!session?.tree || !session?.layout)
1117
+ if (!session)
902
1118
  return err('Not connected. Call geometra_connect first.');
903
- const a11y = sessionA11y(session);
1119
+ const a11y = await sessionA11yWhenReady(session);
904
1120
  if (!a11y)
905
1121
  return err('No UI tree available');
906
1122
  const detail = expandPageSection(a11y, id, {
@@ -937,7 +1153,7 @@ Use the same filters as geometra_query, plus an optional match index when repeat
937
1153
  .optional()
938
1154
  .default(2_500)
939
1155
  .describe('Per-scroll wait timeout (default 2500ms)'),
940
- }, async ({ id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxSteps, timeoutMs }) => {
1156
+ }, async ({ id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxSteps, timeoutMs }) => {
941
1157
  const session = getSession();
942
1158
  if (!session)
943
1159
  return err('Not connected. Call geometra_connect first.');
@@ -947,6 +1163,9 @@ Use the same filters as geometra_query, plus an optional match index when repeat
947
1163
  name,
948
1164
  text,
949
1165
  contextText,
1166
+ promptText,
1167
+ sectionText,
1168
+ itemText,
950
1169
  value,
951
1170
  checked,
952
1171
  disabled,
@@ -958,7 +1177,7 @@ Use the same filters as geometra_query, plus an optional match index when repeat
958
1177
  busy,
959
1178
  };
960
1179
  if (!hasNodeFilter(filter))
961
- return err('Provide at least one reveal filter (id, role, name, text, contextText, value, or state)');
1180
+ return err('Provide at least one reveal filter (id, role, name, text, contextText, promptText, sectionText, itemText, value, or state)');
962
1181
  const revealed = await revealSemanticTarget(session, {
963
1182
  filter,
964
1183
  index: index ?? 0,
@@ -1001,7 +1220,7 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
1001
1220
  .optional()
1002
1221
  .describe('Optional action wait timeout (use a longer value for slow submits or route transitions)'),
1003
1222
  detail: detailInput(),
1004
- }, async ({ x, y, id, role, name, text, contextText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxRevealSteps, revealTimeoutMs, waitFor, timeoutMs, detail }) => {
1223
+ }, async ({ x, y, id, role, name, text, contextText, promptText, sectionText, itemText, value, checked, disabled, focused, selected, expanded, invalid, required, busy, index, fullyVisible, maxRevealSteps, revealTimeoutMs, waitFor, timeoutMs, detail }) => {
1005
1224
  const session = getSession();
1006
1225
  if (!session)
1007
1226
  return err('Not connected. Call geometra_connect first.');
@@ -1015,6 +1234,9 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
1015
1234
  name,
1016
1235
  text,
1017
1236
  contextText,
1237
+ promptText,
1238
+ sectionText,
1239
+ itemText,
1018
1240
  value,
1019
1241
  checked,
1020
1242
  disabled,
@@ -1046,6 +1268,9 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
1046
1268
  name: waitFor.name,
1047
1269
  text: waitFor.text,
1048
1270
  contextText: waitFor.contextText,
1271
+ promptText: waitFor.promptText,
1272
+ sectionText: waitFor.sectionText,
1273
+ itemText: waitFor.itemText,
1049
1274
  value: waitFor.value,
1050
1275
  checked: waitFor.checked,
1051
1276
  disabled: waitFor.disabled,
@@ -1062,8 +1287,20 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
1062
1287
  if (!postWait.ok)
1063
1288
  return err([...lines, postWait.error].join('\n'));
1064
1289
  lines.push(`Post-click ${waitConditionSuccessLine(postWait.value)}`);
1290
+ const compact = {
1291
+ at: { x: resolved.value.x, y: resolved.value.y },
1292
+ ...(resolved.value.target ? { target: compactNodeReference(resolved.value.target), revealSteps: resolved.value.revealAttempts ?? 0 } : {}),
1293
+ ...waitStatusPayload(wait),
1294
+ postWait: waitConditionCompact(postWait.value),
1295
+ };
1296
+ return ok(detailText(lines.filter(Boolean).join('\n'), compact, detail));
1065
1297
  }
1066
- return ok(lines.filter(Boolean).join('\n'));
1298
+ const compact = {
1299
+ at: { x: resolved.value.x, y: resolved.value.y },
1300
+ ...(resolved.value.target ? { target: compactNodeReference(resolved.value.target), revealSteps: resolved.value.revealAttempts ?? 0 } : {}),
1301
+ ...waitStatusPayload(wait),
1302
+ };
1303
+ return ok(detailText(lines.filter(Boolean).join('\n'), compact, detail));
1067
1304
  });
1068
1305
  // ── type ─────────────────────────────────────────────────────
1069
1306
  server.tool('geometra_type', `Type text into the currently focused element. First click a textbox/input with geometra_click to focus it, then use this to type.
@@ -1085,7 +1322,10 @@ Each character is sent as a key event through the geometry protocol. Returns a c
1085
1322
  const before = sessionA11y(session);
1086
1323
  const wait = await sendType(session, text, timeoutMs);
1087
1324
  const summary = postActionSummary(session, before, wait, detail);
1088
- return ok(`Typed "${text}".\n${summary}`);
1325
+ return ok(detailText(`Typed "${text}".\n${summary}`, {
1326
+ ...compactTextValue(text),
1327
+ ...waitStatusPayload(wait),
1328
+ }, detail));
1089
1329
  });
1090
1330
  // ── key ──────────────────────────────────────────────────────
1091
1331
  server.tool('geometra_key', `Send a special key press (Enter, Tab, Escape, ArrowDown, etc.) to the Geometra UI. Useful for form submission, focus navigation, and keyboard shortcuts.`, {
@@ -1109,7 +1349,10 @@ Each character is sent as a key event through the geometry protocol. Returns a c
1109
1349
  const before = sessionA11y(session);
1110
1350
  const wait = await sendKey(session, key, { shift, ctrl, meta, alt }, timeoutMs);
1111
1351
  const summary = postActionSummary(session, before, wait, detail);
1112
- return ok(`Pressed ${formatKeyCombo(key, { shift, ctrl, meta, alt })}.\n${summary}`);
1352
+ return ok(detailText(`Pressed ${formatKeyCombo(key, { shift, ctrl, meta, alt })}.\n${summary}`, {
1353
+ key: formatKeyCombo(key, { shift, ctrl, meta, alt }),
1354
+ ...waitStatusPayload(wait),
1355
+ }, detail));
1113
1356
  });
1114
1357
  // ── upload files (proxy) ───────────────────────────────────────
1115
1358
  server.tool('geometra_upload_files', `Attach local files to a file input. Requires \`@geometra/proxy\` (paths exist on the proxy host).
@@ -1148,7 +1391,13 @@ Strategies: **auto** (default) tries chooser click if x,y given, else a labeled
1148
1391
  drop: dropX !== undefined && dropY !== undefined ? { x: dropX, y: dropY } : undefined,
1149
1392
  }, timeoutMs ?? 8_000);
1150
1393
  const summary = postActionSummary(session, before, wait, detail);
1151
- return ok(`Uploaded ${paths.length} file(s).\n${summary}`);
1394
+ return ok(detailText(`Uploaded ${paths.length} file(s).\n${summary}`, {
1395
+ fileCount: paths.length,
1396
+ ...(fieldLabel ? { fieldLabel } : {}),
1397
+ ...(strategy ? { strategy } : {}),
1398
+ ...waitStatusPayload(wait),
1399
+ ...(fieldLabel ? { readback: fieldStatePayload(session, fieldLabel) } : {}),
1400
+ }, detail));
1152
1401
  }
1153
1402
  catch (e) {
1154
1403
  return err(e.message);
@@ -1185,11 +1434,17 @@ Pass \`fieldLabel\` to open a labeled dropdown semantically instead of relying o
1185
1434
  }, timeoutMs);
1186
1435
  const summary = postActionSummary(session, before, wait, detail);
1187
1436
  const fieldSummary = fieldLabel ? summarizeFieldLabelState(session, fieldLabel) : undefined;
1188
- return ok([
1437
+ const summaryText = [
1189
1438
  `Picked listbox option "${label}".`,
1190
1439
  fieldSummary,
1191
1440
  summary,
1192
- ].filter(Boolean).join('\n'));
1441
+ ].filter(Boolean).join('\n');
1442
+ return ok(detailText(summaryText, {
1443
+ label,
1444
+ ...(fieldLabel ? { fieldLabel } : {}),
1445
+ ...waitStatusPayload(wait),
1446
+ ...(fieldLabel ? { readback: fieldStatePayload(session, fieldLabel) } : {}),
1447
+ }, detail));
1193
1448
  }
1194
1449
  catch (e) {
1195
1450
  return err(e.message);
@@ -1223,7 +1478,13 @@ Custom React/Vue dropdowns are not supported here — use \`geometra_pick_listbo
1223
1478
  try {
1224
1479
  const wait = await sendSelectOption(session, x, y, { value, label, index }, timeoutMs);
1225
1480
  const summary = postActionSummary(session, before, wait, detail);
1226
- return ok(`Selected option.\n${summary}`);
1481
+ return ok(detailText(`Selected option.\n${summary}`, {
1482
+ at: { x, y },
1483
+ ...(value !== undefined ? { value } : {}),
1484
+ ...(label !== undefined ? { label } : {}),
1485
+ ...(index !== undefined ? { index } : {}),
1486
+ ...waitStatusPayload(wait),
1487
+ }, detail));
1227
1488
  }
1228
1489
  catch (e) {
1229
1490
  return err(e.message);
@@ -1252,7 +1513,12 @@ Prefer this over raw coordinate clicks for custom forms that keep the real input
1252
1513
  try {
1253
1514
  const wait = await sendSetChecked(session, label, { checked, exact, controlType }, timeoutMs);
1254
1515
  const summary = postActionSummary(session, before, wait, detail);
1255
- return ok(`Set ${controlType ?? 'checkbox/radio'} "${label}" to ${String(checked ?? true)}.\n${summary}`);
1516
+ return ok(detailText(`Set ${controlType ?? 'checkbox/radio'} "${label}" to ${String(checked ?? true)}.\n${summary}`, {
1517
+ label,
1518
+ checked: checked ?? true,
1519
+ ...(controlType ? { controlType } : {}),
1520
+ ...waitStatusPayload(wait),
1521
+ }, detail));
1256
1522
  }
1257
1523
  catch (e) {
1258
1524
  return err(e.message);
@@ -1280,7 +1546,12 @@ Prefer this over raw coordinate clicks for custom forms that keep the real input
1280
1546
  try {
1281
1547
  const wait = await sendWheel(session, deltaY, { deltaX, x, y }, timeoutMs);
1282
1548
  const summary = postActionSummary(session, before, wait, detail);
1283
- return ok(`Wheel delta (${deltaX ?? 0}, ${deltaY}).\n${summary}`);
1549
+ return ok(detailText(`Wheel delta (${deltaX ?? 0}, ${deltaY}).\n${summary}`, {
1550
+ deltaY,
1551
+ ...(deltaX !== undefined ? { deltaX } : {}),
1552
+ ...(x !== undefined && y !== undefined ? { at: { x, y } } : {}),
1553
+ ...waitStatusPayload(wait),
1554
+ }, detail));
1284
1555
  }
1285
1556
  catch (e) {
1286
1557
  return err(e.message);
@@ -1308,9 +1579,9 @@ JSON is minified in compact view to save tokens. For a summary-first overview, u
1308
1579
  includeOptions: z.boolean().optional().default(false).describe('Include explicit choice option labels when view=form-required'),
1309
1580
  }, async ({ view, maxNodes, formId, maxFields, includeOptions }) => {
1310
1581
  const session = getSession();
1311
- if (!session?.tree || !session?.layout)
1582
+ if (!session)
1312
1583
  return err('Not connected. Call geometra_connect first.');
1313
- const a11y = sessionA11y(session);
1584
+ const a11y = await sessionA11yWhenReady(session);
1314
1585
  if (!a11y)
1315
1586
  return err('No UI tree available');
1316
1587
  if (view === 'full') {
@@ -1365,7 +1636,7 @@ function compactSessionSummary(session) {
1365
1636
  return sessionOverviewFromA11y(a11y);
1366
1637
  }
1367
1638
  function connectPayload(session, opts) {
1368
- const a11y = sessionA11y(session);
1639
+ const a11y = opts.detail === 'verbose' ? sessionA11y(session) : null;
1369
1640
  return {
1370
1641
  connected: true,
1371
1642
  transport: opts.transport,
@@ -1387,6 +1658,17 @@ function sessionA11y(session) {
1387
1658
  session.cachedA11yRevision = session.updateRevision;
1388
1659
  return a11y;
1389
1660
  }
1661
+ async function ensureSessionUiTree(session, timeoutMs = 4_000) {
1662
+ if (session.tree && session.layout)
1663
+ return true;
1664
+ return await waitForUiCondition(session, () => Boolean(session.tree && session.layout), timeoutMs);
1665
+ }
1666
+ async function sessionA11yWhenReady(session, timeoutMs = 4_000) {
1667
+ const ready = await ensureSessionUiTree(session, timeoutMs);
1668
+ if (!ready)
1669
+ return null;
1670
+ return sessionA11y(session);
1671
+ }
1390
1672
  function shortHash(value) {
1391
1673
  return createHash('sha1').update(value).digest('hex').slice(0, 12);
1392
1674
  }
@@ -1506,10 +1788,23 @@ function connectResponsePayload(session, opts) {
1506
1788
  nextPayload.formSchema = formSchemaResponsePayload(session, opts.formSchema ?? {});
1507
1789
  }
1508
1790
  if (opts.returnPageModel) {
1509
- nextPayload.pageModel = pageModelResponsePayload(session, opts.pageModelOptions);
1791
+ nextPayload.pageModel = opts.pageModelMode === 'deferred'
1792
+ ? deferredPageModelConnectPayload(session, opts.pageModelOptions)
1793
+ : pageModelResponsePayload(session, opts.pageModelOptions);
1510
1794
  }
1511
1795
  return nextPayload;
1512
1796
  }
1797
+ function deferredPageModelConnectPayload(session, options) {
1798
+ return {
1799
+ deferred: true,
1800
+ ready: Boolean(session.tree && session.layout),
1801
+ tool: 'geometra_page_model',
1802
+ options: {
1803
+ maxPrimaryActions: options?.maxPrimaryActions ?? 6,
1804
+ maxSectionsPerKind: options?.maxSectionsPerKind ?? 8,
1805
+ },
1806
+ };
1807
+ }
1513
1808
  function pageModelResponsePayload(session, options) {
1514
1809
  const a11y = sessionA11y(session);
1515
1810
  if (!a11y) {
@@ -1737,8 +2032,17 @@ function sessionSignalsPayload(signals, detail = 'minimal') {
1737
2032
  busyCount: signals.busyCount,
1738
2033
  alertCount: signals.alerts.length,
1739
2034
  invalidCount: signals.invalidFields.length,
1740
- alerts: detail === 'verbose' ? signals.alerts : signals.alerts.slice(0, 2),
1741
- invalidFields: detail === 'verbose' ? signals.invalidFields : signals.invalidFields.slice(0, 4),
2035
+ ...(detail === 'verbose'
2036
+ ? {
2037
+ alerts: signals.alerts,
2038
+ invalidFields: signals.invalidFields,
2039
+ }
2040
+ : detail === 'minimal'
2041
+ ? {
2042
+ alerts: signals.alerts.slice(0, 2),
2043
+ invalidFields: signals.invalidFields.slice(0, 4),
2044
+ }
2045
+ : {}),
1742
2046
  };
1743
2047
  }
1744
2048
  function compactTextValue(value, inlineLimit = 48) {
@@ -1834,6 +2138,10 @@ function inferRevealStepBudget(target, viewport) {
1834
2138
  return clamp(Math.max(6, Math.max(verticalSteps, horizontalSteps) + 1), 6, 48);
1835
2139
  }
1836
2140
  async function revealSemanticTarget(session, options) {
2141
+ const initialTreeReady = await ensureSessionUiTree(session, Math.max(4_000, options.timeoutMs));
2142
+ if (!initialTreeReady) {
2143
+ return { ok: false, error: 'Timed out waiting for the initial UI tree after connect.' };
2144
+ }
1837
2145
  let attempts = 0;
1838
2146
  let stepBudget = options.maxSteps;
1839
2147
  while (attempts <= (stepBudget ?? 48)) {
@@ -1904,7 +2212,7 @@ async function resolveClickLocation(session, options) {
1904
2212
  if (!hasNodeFilter(options.filter)) {
1905
2213
  return {
1906
2214
  ok: false,
1907
- error: 'Provide x and y, or at least one semantic target filter (id, role, name, text, contextText, value, or state)',
2215
+ error: 'Provide x and y, or at least one semantic target filter (id, role, name, text, contextText, promptText, sectionText, itemText, value, or state)',
1908
2216
  };
1909
2217
  }
1910
2218
  const revealed = await revealSemanticTarget(session, {
@@ -1936,6 +2244,18 @@ function compactNodeReference(node) {
1936
2244
  ...(node.name ? { name: node.name } : {}),
1937
2245
  };
1938
2246
  }
2247
+ function compactFormattedNode(node) {
2248
+ return {
2249
+ ...compactNodeReference(node),
2250
+ ...(node.context ? { context: node.context } : {}),
2251
+ ...(node.value ? { value: node.value } : {}),
2252
+ center: node.center,
2253
+ bounds: node.bounds,
2254
+ };
2255
+ }
2256
+ function detailText(summary, compact, detail) {
2257
+ return detail === 'terse' ? JSON.stringify(compact) : summary;
2258
+ }
1939
2259
  function normalizeLookupKey(value) {
1940
2260
  return value.replace(/\s+/g, ' ').trim().toLowerCase();
1941
2261
  }
@@ -2202,6 +2522,44 @@ function directLabelBatchFields(valuesByLabel) {
2202
2522
  }
2203
2523
  return fields;
2204
2524
  }
2525
+ async function tryBatchedResolvedFields(session, fields, detail) {
2526
+ let batchAckResult;
2527
+ try {
2528
+ const startRevision = session.updateRevision;
2529
+ const wait = await sendFillFields(session, fields);
2530
+ const ackResult = parseProxyFillAckResult(wait.result);
2531
+ batchAckResult = ackResult;
2532
+ if (ackResult && ackResult.invalidCount === 0) {
2533
+ return {
2534
+ ok: true,
2535
+ finalSource: 'proxy',
2536
+ final: ackResult,
2537
+ invalidRemaining: 0,
2538
+ };
2539
+ }
2540
+ await waitForDeferredBatchUpdate(session, startRevision, wait);
2541
+ await waitForBatchFieldReadback(session, fields);
2542
+ }
2543
+ catch (e) {
2544
+ if (canFallbackToSequentialFill(e))
2545
+ return { ok: false };
2546
+ throw e;
2547
+ }
2548
+ const after = sessionA11y(session);
2549
+ if (!after)
2550
+ return { ok: false };
2551
+ const signals = collectSessionSignals(after);
2552
+ const invalidRemaining = signals.invalidFields.length;
2553
+ if ((!batchAckResult || batchAckResult.invalidCount > 0) && invalidRemaining > 0) {
2554
+ return { ok: false };
2555
+ }
2556
+ return {
2557
+ ok: true,
2558
+ finalSource: 'session',
2559
+ final: sessionSignalsPayload(signals, detail),
2560
+ invalidRemaining,
2561
+ };
2562
+ }
2205
2563
  async function waitForDeferredBatchUpdate(session, startRevision, wait) {
2206
2564
  if (wait.status !== 'acknowledged' || session.updateRevision > startRevision)
2207
2565
  return;
@@ -2242,6 +2600,22 @@ function batchFieldReadbackMatches(a11y, field) {
2242
2600
  }
2243
2601
  }
2244
2602
  }
2603
+ function actionNeedsUiTree(action) {
2604
+ switch (action.type) {
2605
+ case 'wait_for':
2606
+ return true;
2607
+ case 'click':
2608
+ return action.x === undefined || action.y === undefined || Boolean(action.waitFor);
2609
+ default:
2610
+ return false;
2611
+ }
2612
+ }
2613
+ function canDeferInitialFrameForRunActions(actions) {
2614
+ const first = actions[0];
2615
+ if (!first)
2616
+ return false;
2617
+ return first.type === 'fill_fields';
2618
+ }
2245
2619
  async function executeBatchAction(session, action, detail, includeSteps) {
2246
2620
  switch (action.type) {
2247
2621
  case 'click': {
@@ -2255,6 +2629,9 @@ async function executeBatchAction(session, action, detail, includeSteps) {
2255
2629
  name: action.name,
2256
2630
  text: action.text,
2257
2631
  contextText: action.contextText,
2632
+ promptText: action.promptText,
2633
+ sectionText: action.sectionText,
2634
+ itemText: action.itemText,
2258
2635
  value: action.value,
2259
2636
  checked: action.checked,
2260
2637
  disabled: action.disabled,
@@ -2286,6 +2663,9 @@ async function executeBatchAction(session, action, detail, includeSteps) {
2286
2663
  name: action.waitFor.name,
2287
2664
  text: action.waitFor.text,
2288
2665
  contextText: action.waitFor.contextText,
2666
+ promptText: action.waitFor.promptText,
2667
+ sectionText: action.waitFor.sectionText,
2668
+ itemText: action.waitFor.itemText,
2289
2669
  value: action.waitFor.value,
2290
2670
  checked: action.waitFor.checked,
2291
2671
  disabled: action.waitFor.disabled,
@@ -2442,6 +2822,9 @@ async function executeBatchAction(session, action, detail, includeSteps) {
2442
2822
  name: action.name,
2443
2823
  text: action.text,
2444
2824
  contextText: action.contextText,
2825
+ promptText: action.promptText,
2826
+ sectionText: action.sectionText,
2827
+ itemText: action.itemText,
2445
2828
  value: action.value,
2446
2829
  checked: action.checked,
2447
2830
  disabled: action.disabled,
@@ -2479,6 +2862,20 @@ async function executeBatchAction(session, action, detail, includeSteps) {
2479
2862
  const resolvedFields = resolveFillFieldInputs(session, action.fields);
2480
2863
  if (!resolvedFields.ok)
2481
2864
  throw new Error(resolvedFields.error);
2865
+ if (!includeSteps) {
2866
+ const batched = await tryBatchedResolvedFields(session, resolvedFields.fields, detail);
2867
+ if (batched.ok) {
2868
+ return {
2869
+ summary: `Filled ${resolvedFields.fields.length} field(s) in one proxy batch.`,
2870
+ compact: {
2871
+ fieldCount: resolvedFields.fields.length,
2872
+ execution: 'batched',
2873
+ finalSource: batched.finalSource,
2874
+ final: batched.final,
2875
+ },
2876
+ };
2877
+ }
2878
+ }
2482
2879
  const steps = [];
2483
2880
  for (let index = 0; index < resolvedFields.fields.length; index++) {
2484
2881
  const field = resolvedFields.fields[index];
@@ -2673,10 +3070,10 @@ function sectionContext(root, node) {
2673
3070
  }
2674
3071
  return undefined;
2675
3072
  }
2676
- function nodeContextText(root, node) {
2677
- return [promptContext(root, node), sectionContext(root, node)].filter(Boolean).join(' | ') || undefined;
3073
+ function nodeContextText(context) {
3074
+ return [context?.prompt, context?.section, context?.item].filter(Boolean).join(' | ') || undefined;
2678
3075
  }
2679
- function nodeMatchesFilter(node, filter, contextText) {
3076
+ function nodeMatchesFilter(node, filter, context) {
2680
3077
  if (filter.id && nodeIdForPath(node.path) !== filter.id)
2681
3078
  return false;
2682
3079
  if (filter.role && node.role !== filter.role)
@@ -2688,7 +3085,13 @@ function nodeMatchesFilter(node, filter, contextText) {
2688
3085
  if (filter.text &&
2689
3086
  !textMatches(`${node.name ?? ''} ${node.value ?? ''} ${node.validation?.error ?? ''} ${node.validation?.description ?? ''}`.trim(), filter.text))
2690
3087
  return false;
2691
- if (!textMatches(contextText, filter.contextText))
3088
+ if (!textMatches(nodeContextText(context), filter.contextText))
3089
+ return false;
3090
+ if (!textMatches(context?.prompt, filter.promptText))
3091
+ return false;
3092
+ if (!textMatches(context?.section, filter.sectionText))
3093
+ return false;
3094
+ if (!textMatches(context?.item, filter.itemText))
2692
3095
  return false;
2693
3096
  if (filter.checked !== undefined && node.state?.checked !== filter.checked)
2694
3097
  return false;
@@ -2711,8 +3114,10 @@ function nodeMatchesFilter(node, filter, contextText) {
2711
3114
  export function findNodes(node, filter) {
2712
3115
  const matches = [];
2713
3116
  function walk(n) {
2714
- const contextText = filter.contextText ? nodeContextText(node, n) : undefined;
2715
- if (nodeMatchesFilter(n, filter, contextText) && hasNodeFilter(filter))
3117
+ const context = filter.contextText || filter.promptText || filter.sectionText || filter.itemText
3118
+ ? nodeContextForNode(node, n)
3119
+ : undefined;
3120
+ if (nodeMatchesFilter(n, filter, context) && hasNodeFilter(filter))
2716
3121
  matches.push(n);
2717
3122
  for (const child of n.children)
2718
3123
  walk(child);
@@ -2755,14 +3160,13 @@ function formatNode(node, root, viewport) {
2755
3160
  : Math.round(Math.min(Math.max(node.bounds.y + node.bounds.height / 2, 0), viewport.height));
2756
3161
  const revealDeltaX = Math.round(node.bounds.x + node.bounds.width / 2 - viewport.width / 2);
2757
3162
  const revealDeltaY = Math.round(node.bounds.y + node.bounds.height / 2 - viewport.height / 2);
2758
- const prompt = promptContext(root, node);
2759
- const section = sectionContext(root, node);
3163
+ const context = nodeContextForNode(root, node);
2760
3164
  return {
2761
3165
  id: nodeIdForPath(node.path),
2762
3166
  role: node.role,
2763
3167
  name: node.name,
2764
3168
  ...(node.value ? { value: node.value } : {}),
2765
- ...(prompt || section ? { context: { ...(prompt ? { prompt } : {}), ...(section ? { section } : {}) } } : {}),
3169
+ ...(context ? { context } : {}),
2766
3170
  bounds: node.bounds,
2767
3171
  visibleBounds: {
2768
3172
  x: visibleLeft,