@geometra/mcp 1.18.1 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -1,78 +1,150 @@
1
1
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
2
  import { z } from 'zod';
3
- import { connect, disconnect, getSession, sendClick, sendType, sendKey, sendFileUpload, sendListboxPick, sendSelectOption, sendWheel, buildA11yTree, buildCompactUiIndex, buildPageModel, buildUiDelta, hasUiDelta, summarizeCompactIndex, summarizePageModel, summarizeUiDelta, } from './session.js';
3
+ import { formatConnectFailureMessage, isHttpUrl, normalizeConnectTarget } from './connect-utils.js';
4
+ import { connect, connectThroughProxy, disconnect, getSession, sendClick, sendType, sendKey, sendFileUpload, sendListboxPick, sendSelectOption, sendWheel, buildA11yTree, buildCompactUiIndex, buildPageModel, expandPageSection, buildUiDelta, hasUiDelta, nodeIdForPath, summarizeCompactIndex, summarizePageModel, summarizeUiDelta, } from './session.js';
4
5
  export function createServer() {
5
- const server = new McpServer({ name: 'geometra', version: '0.1.0' }, { capabilities: { tools: {} } });
6
+ const server = new McpServer({ name: 'geometra', version: '1.19.1' }, { capabilities: { tools: {} } });
6
7
  // ── connect ──────────────────────────────────────────────────
7
- server.tool('geometra_connect', `Connect to a running Geometra server over WebSocket. This replaces Playwright/browser automation you get direct access to the UI's pixel-exact geometry as JSON.
8
+ server.tool('geometra_connect', `Connect to a Geometra WebSocket peer, or start \`geometra-proxy\` automatically for a normal web page.
8
9
 
9
- Call this first before using any other geometra tools. The peer must be listening (native Geometra server, or \`geometra-proxy\` for real web pages). File upload / wheel / native \`<select>\` require \`@geometra/proxy\`; native Textura servers return an error for those messages.`, {
10
- url: z.string().describe('WebSocket URL of the Geometra server (e.g. ws://localhost:3100)'),
11
- }, async ({ url }) => {
10
+ **Prefer \`pageUrl\` for job sites and SPAs:** pass \`https://…\` and this server spawns geometra-proxy on an ephemeral local port and connects you do **not** need a separate terminal or a \`ws://\` URL (fewer IDE approval steps for the human).
11
+
12
+ Use \`url\` (ws://…) only when a Geometra/native server or an already-running proxy is listening. If you accidentally pass \`https://…\` in \`url\`, MCP treats it like \`pageUrl\` and starts the proxy for you.
13
+
14
+ Chromium opens **visible** by default unless \`headless: true\`. File upload / wheel / native \`<select>\` need the proxy path (\`pageUrl\` or ws to proxy).`, {
15
+ url: z
16
+ .string()
17
+ .optional()
18
+ .describe('WebSocket URL when a server is already running (e.g. ws://127.0.0.1:3200 or ws://localhost:3100). If you pass http(s) here by mistake, MCP will treat it as a page URL and start geometra-proxy.'),
19
+ pageUrl: z
20
+ .string()
21
+ .url()
22
+ .refine(isHttpUrl, 'pageUrl must use http:// or https://')
23
+ .optional()
24
+ .describe('HTTP(S) page to open. MCP starts geometra-proxy and connects automatically. Use this instead of url for most web apply flows.'),
25
+ port: z
26
+ .number()
27
+ .int()
28
+ .positive()
29
+ .max(65535)
30
+ .optional()
31
+ .describe('Preferred local port for spawned proxy (default: ephemeral OS-assigned port).'),
32
+ headless: z
33
+ .boolean()
34
+ .optional()
35
+ .describe('Run Chromium headless (default false = visible window).'),
36
+ width: z.number().int().positive().optional().describe('Viewport width for spawned proxy.'),
37
+ height: z.number().int().positive().optional().describe('Viewport height for spawned proxy.'),
38
+ slowMo: z
39
+ .number()
40
+ .int()
41
+ .nonnegative()
42
+ .optional()
43
+ .describe('Playwright slowMo (ms) on spawned proxy for easier visual following.'),
44
+ }, async (input) => {
45
+ const normalized = normalizeConnectTarget({ url: input.url, pageUrl: input.pageUrl });
46
+ if (!normalized.ok)
47
+ return err(normalized.error);
48
+ const target = normalized.value;
12
49
  try {
13
- const session = await connect(url);
50
+ if (target.kind === 'proxy') {
51
+ const session = await connectThroughProxy({
52
+ pageUrl: target.pageUrl,
53
+ port: input.port,
54
+ headless: input.headless,
55
+ width: input.width,
56
+ height: input.height,
57
+ slowMo: input.slowMo,
58
+ });
59
+ const summary = compactSessionSummary(session);
60
+ const inferred = target.autoCoercedFromUrl ? ' inferred from url input' : '';
61
+ return ok(`Started geometra-proxy and connected at ${session.url} (page: ${target.pageUrl}${inferred}). UI state:\n${summary}`);
62
+ }
63
+ const session = await connect(target.wsUrl);
14
64
  const summary = compactSessionSummary(session);
15
- return ok(`Connected to ${url}. UI state:\n${summary}`);
65
+ return ok(`Connected to ${target.wsUrl}. UI state:\n${summary}`);
16
66
  }
17
67
  catch (e) {
18
- return err(`Failed to connect: ${e.message}`);
68
+ return err(`Failed to connect: ${formatConnectFailureMessage(e, target)}`);
19
69
  }
20
70
  });
21
71
  // ── query ────────────────────────────────────────────────────
22
- server.tool('geometra_query', `Find elements in the current Geometra UI by role, name, or text content. Returns matching elements with their exact pixel bounds {x, y, width, height}, role, name, and tree path.
72
+ server.tool('geometra_query', `Find elements in the current Geometra UI by stable id, role, name, or text content. Returns matching elements with their exact pixel bounds {x, y, width, height}, role, name, and tree path.
23
73
 
24
74
  This is the Geometra equivalent of Playwright's locator — but instant, structured, and with no browser. Use the returned bounds to click elements or assert on layout.`, {
75
+ id: z.string().optional().describe('Stable node id from geometra_snapshot or geometra_expand_section'),
25
76
  role: z.string().optional().describe('ARIA role to match (e.g. "button", "textbox", "text", "heading", "listitem")'),
26
77
  name: z.string().optional().describe('Accessible name to match (exact or substring)'),
27
78
  text: z.string().optional().describe('Text content to search for (substring match)'),
28
- }, async ({ role, name, text }) => {
79
+ }, async ({ id, role, name, text }) => {
29
80
  const session = getSession();
30
81
  if (!session?.tree || !session?.layout)
31
82
  return err('Not connected. Call geometra_connect first.');
32
83
  const a11y = buildA11yTree(session.tree, session.layout);
33
- const matches = findNodes(a11y, { role, name, text });
84
+ const matches = findNodes(a11y, { id, role, name, text });
34
85
  if (matches.length === 0) {
35
- return ok(`No elements found matching ${JSON.stringify({ role, name, text })}`);
86
+ return ok(`No elements found matching ${JSON.stringify({ id, role, name, text })}`);
36
87
  }
37
88
  const result = matches.map(formatNode);
38
89
  return ok(JSON.stringify(result, null, 2));
39
90
  });
40
91
  // ── page model ────────────────────────────────────────────────
41
- server.tool('geometra_page_model', `Get a higher-level webpage model instead of a raw node dump. Extracts common structures such as landmarks, forms, dialogs, and lists, with short previews of fields/actions/items.
92
+ server.tool('geometra_page_model', `Get a higher-level webpage summary instead of a raw node dump. Returns stable section ids, page archetypes, summary counts, top-level landmarks/forms/dialogs/lists, and a few primary actions.
42
93
 
43
- Use this first on normal HTML pages when you want to understand the page shape with fewer tokens than a full snapshot.`, {
44
- maxFieldsPerForm: z
94
+ Use this first on normal HTML pages when you want to understand the page shape with fewer tokens than a full snapshot. Then call geometra_expand_section on a returned section id when you need details.`, {
95
+ maxPrimaryActions: z
45
96
  .number()
46
97
  .int()
47
98
  .min(1)
48
- .max(40)
99
+ .max(12)
49
100
  .optional()
50
- .default(12)
51
- .describe('Cap returned fields per form (default 12).'),
52
- maxActionsPerContainer: z
101
+ .default(6)
102
+ .describe('Cap top-level primary actions (default 6).'),
103
+ maxSectionsPerKind: z
53
104
  .number()
54
105
  .int()
55
106
  .min(1)
56
- .max(20)
107
+ .max(16)
57
108
  .optional()
58
109
  .default(8)
59
- .describe('Cap returned actions per form/dialog (default 8).'),
60
- maxItemsPerList: z
61
- .number()
62
- .int()
63
- .min(1)
64
- .max(20)
65
- .optional()
66
- .default(5)
67
- .describe('Cap list item preview strings (default 5).'),
68
- }, async ({ maxFieldsPerForm, maxActionsPerContainer, maxItemsPerList }) => {
110
+ .describe('Cap returned landmarks/forms/dialogs/lists per kind (default 8).'),
111
+ }, async ({ maxPrimaryActions, maxSectionsPerKind }) => {
69
112
  const session = getSession();
70
113
  if (!session?.tree || !session?.layout)
71
114
  return err('Not connected. Call geometra_connect first.');
72
115
  const a11y = buildA11yTree(session.tree, session.layout);
73
- const model = buildPageModel(a11y, { maxFieldsPerForm, maxActionsPerContainer, maxItemsPerList });
116
+ const model = buildPageModel(a11y, { maxPrimaryActions, maxSectionsPerKind });
74
117
  return ok(JSON.stringify(model));
75
118
  });
119
+ server.tool('geometra_expand_section', `Expand one section from geometra_page_model by stable id. Returns richer on-demand details such as headings, fields, actions, nested lists, list items, and text preview.
120
+
121
+ Use this after geometra_page_model when you know which form/dialog/list/landmark you want to inspect more closely. Per-item bounds are omitted by default to save tokens; set includeBounds=true if you need them immediately.`, {
122
+ id: z.string().describe('Section id from geometra_page_model, e.g. fm:1.0 or ls:2.1'),
123
+ maxHeadings: z.number().int().min(1).max(20).optional().default(6).describe('Cap heading rows'),
124
+ maxFields: z.number().int().min(1).max(40).optional().default(18).describe('Cap field rows'),
125
+ maxActions: z.number().int().min(1).max(30).optional().default(12).describe('Cap action rows'),
126
+ maxLists: z.number().int().min(0).max(20).optional().default(8).describe('Cap nested lists'),
127
+ maxItems: z.number().int().min(0).max(50).optional().default(20).describe('Cap list items'),
128
+ maxTextPreview: z.number().int().min(0).max(20).optional().default(6).describe('Cap text preview lines'),
129
+ includeBounds: z.boolean().optional().default(false).describe('Include bounds for fields/actions/headings/items'),
130
+ }, async ({ id, maxHeadings, maxFields, maxActions, maxLists, maxItems, maxTextPreview, includeBounds }) => {
131
+ const session = getSession();
132
+ if (!session?.tree || !session?.layout)
133
+ return err('Not connected. Call geometra_connect first.');
134
+ const a11y = buildA11yTree(session.tree, session.layout);
135
+ const detail = expandPageSection(a11y, id, {
136
+ maxHeadings,
137
+ maxFields,
138
+ maxActions,
139
+ maxLists,
140
+ maxItems,
141
+ maxTextPreview,
142
+ includeBounds,
143
+ });
144
+ if (!detail)
145
+ return err(`No expandable section found for id ${id}`);
146
+ return ok(JSON.stringify(detail));
147
+ });
76
148
  // ── click ────────────────────────────────────────────────────
77
149
  server.tool('geometra_click', `Click an element in the Geometra UI. Provide either the element's bounds (from geometra_query) or raw x,y coordinates. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
78
150
 
@@ -84,8 +156,8 @@ After clicking, returns a compact semantic delta when possible (dialogs/forms/li
84
156
  if (!session)
85
157
  return err('Not connected. Call geometra_connect first.');
86
158
  const before = sessionA11y(session);
87
- await sendClick(session, x, y);
88
- const summary = postActionSummary(session, before);
159
+ const wait = await sendClick(session, x, y);
160
+ const summary = postActionSummary(session, before, wait);
89
161
  return ok(`Clicked at (${x}, ${y}).\n${summary}`);
90
162
  });
91
163
  // ── type ─────────────────────────────────────────────────────
@@ -98,8 +170,8 @@ Each character is sent as a key event through the geometry protocol. Returns a c
98
170
  if (!session)
99
171
  return err('Not connected. Call geometra_connect first.');
100
172
  const before = sessionA11y(session);
101
- await sendType(session, text);
102
- const summary = postActionSummary(session, before);
173
+ const wait = await sendType(session, text);
174
+ const summary = postActionSummary(session, before, wait);
103
175
  return ok(`Typed "${text}".\n${summary}`);
104
176
  });
105
177
  // ── key ──────────────────────────────────────────────────────
@@ -114,8 +186,8 @@ Each character is sent as a key event through the geometry protocol. Returns a c
114
186
  if (!session)
115
187
  return err('Not connected. Call geometra_connect first.');
116
188
  const before = sessionA11y(session);
117
- await sendKey(session, key, { shift, ctrl, meta, alt });
118
- const summary = postActionSummary(session, before);
189
+ const wait = await sendKey(session, key, { shift, ctrl, meta, alt });
190
+ const summary = postActionSummary(session, before, wait);
119
191
  return ok(`Pressed ${formatKeyCombo(key, { shift, ctrl, meta, alt })}.\n${summary}`);
120
192
  });
121
193
  // ── upload files (proxy) ───────────────────────────────────────
@@ -137,12 +209,12 @@ Strategies: **auto** (default) tries chooser click if x,y given, else hidden \`i
137
209
  return err('Not connected. Call geometra_connect first.');
138
210
  const before = sessionA11y(session);
139
211
  try {
140
- await sendFileUpload(session, paths, {
212
+ const wait = await sendFileUpload(session, paths, {
141
213
  click: x !== undefined && y !== undefined ? { x, y } : undefined,
142
214
  strategy,
143
215
  drop: dropX !== undefined && dropY !== undefined ? { x: dropX, y: dropY } : undefined,
144
216
  });
145
- const summary = postActionSummary(session, before);
217
+ const summary = postActionSummary(session, before, wait);
146
218
  return ok(`Uploaded ${paths.length} file(s).\n${summary}`);
147
219
  }
148
220
  catch (e) {
@@ -162,11 +234,11 @@ Optional openX,openY clicks the combobox first if the list is not open. Uses sub
162
234
  return err('Not connected. Call geometra_connect first.');
163
235
  const before = sessionA11y(session);
164
236
  try {
165
- await sendListboxPick(session, label, {
237
+ const wait = await sendListboxPick(session, label, {
166
238
  exact,
167
239
  open: openX !== undefined && openY !== undefined ? { x: openX, y: openY } : undefined,
168
240
  });
169
- const summary = postActionSummary(session, before);
241
+ const summary = postActionSummary(session, before, wait);
170
242
  return ok(`Picked listbox option "${label}".\n${summary}`);
171
243
  }
172
244
  catch (e) {
@@ -191,8 +263,8 @@ Custom React/Vue dropdowns are not supported — open them with geometra_click a
191
263
  }
192
264
  const before = sessionA11y(session);
193
265
  try {
194
- await sendSelectOption(session, x, y, { value, label, index });
195
- const summary = postActionSummary(session, before);
266
+ const wait = await sendSelectOption(session, x, y, { value, label, index });
267
+ const summary = postActionSummary(session, before, wait);
196
268
  return ok(`Selected option.\n${summary}`);
197
269
  }
198
270
  catch (e) {
@@ -211,8 +283,8 @@ Custom React/Vue dropdowns are not supported — open them with geometra_click a
211
283
  return err('Not connected. Call geometra_connect first.');
212
284
  const before = sessionA11y(session);
213
285
  try {
214
- await sendWheel(session, deltaY, { deltaX, x, y });
215
- const summary = postActionSummary(session, before);
286
+ const wait = await sendWheel(session, deltaY, { deltaX, x, y });
287
+ const summary = postActionSummary(session, before, wait);
216
288
  return ok(`Wheel delta (${deltaX ?? 0}, ${deltaY}).\n${summary}`);
217
289
  }
218
290
  catch (e) {
@@ -222,7 +294,7 @@ Custom React/Vue dropdowns are not supported — open them with geometra_click a
222
294
  // ── snapshot ─────────────────────────────────────────────────
223
295
  server.tool('geometra_snapshot', `Get the current UI as JSON. Default **compact** view: flat list of viewport-visible actionable nodes (links, buttons, inputs, headings, landmarks, text leaves, focusable elements) with bounds and tree paths — far fewer tokens than a full nested tree. Use **full** for complete nested a11y + every wrapper when debugging layout.
224
296
 
225
- JSON is minified in compact view to save tokens. For a webpage-shaped overview (forms, dialogs, lists, landmarks), use geometra_page_model.`, {
297
+ JSON is minified in compact view to save tokens. For a summary-first overview, use geometra_page_model, then geometra_expand_section for just the part you want.`, {
226
298
  view: z
227
299
  .enum(['compact', 'full'])
228
300
  .optional()
@@ -287,17 +359,21 @@ function sessionOverviewFromA11y(a11y) {
287
359
  const keyNodes = nodes.length > 0 ? `Key nodes:\n${summarizeCompactIndex(nodes, 18)}` : '';
288
360
  return [pageSummary, keyNodes].filter(Boolean).join('\n');
289
361
  }
290
- function postActionSummary(session, before) {
362
+ function postActionSummary(session, before, wait) {
291
363
  const after = sessionA11y(session);
364
+ const notes = [];
365
+ if (wait?.status === 'timed_out') {
366
+ notes.push(`No frame or patch arrived within ${wait.timeoutMs}ms after the action. The action may still have succeeded if it did not change geometry or semantics.`);
367
+ }
292
368
  if (!after)
293
- return 'No UI update received';
369
+ return [...notes, 'No UI update received'].filter(Boolean).join('\n');
294
370
  if (before) {
295
371
  const delta = buildUiDelta(before, after);
296
372
  if (hasUiDelta(delta)) {
297
- return `Changes:\n${summarizeUiDelta(delta)}`;
373
+ return [...notes, `Changes:\n${summarizeUiDelta(delta)}`].filter(Boolean).join('\n');
298
374
  }
299
375
  }
300
- return `Current UI:\n${sessionOverviewFromA11y(after)}`;
376
+ return [...notes, `Current UI:\n${sessionOverviewFromA11y(after)}`].filter(Boolean).join('\n');
301
377
  }
302
378
  function ok(text) {
303
379
  return { content: [{ type: 'text', text }] };
@@ -309,13 +385,15 @@ function findNodes(node, filter) {
309
385
  const matches = [];
310
386
  function walk(n) {
311
387
  let match = true;
388
+ if (filter.id && nodeIdForPath(n.path) !== filter.id)
389
+ match = false;
312
390
  if (filter.role && n.role !== filter.role)
313
391
  match = false;
314
392
  if (filter.name && (!n.name || !n.name.includes(filter.name)))
315
393
  match = false;
316
394
  if (filter.text && (!n.name || !n.name.includes(filter.text)))
317
395
  match = false;
318
- if (match && (filter.role || filter.name || filter.text))
396
+ if (match && (filter.id || filter.role || filter.name || filter.text))
319
397
  matches.push(n);
320
398
  for (const child of n.children)
321
399
  walk(child);
@@ -325,6 +403,7 @@ function findNodes(node, filter) {
325
403
  }
326
404
  function formatNode(node) {
327
405
  return {
406
+ id: nodeIdForPath(node.path),
328
407
  role: node.role,
329
408
  name: node.name,
330
409
  bounds: node.bounds,
package/dist/session.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import type { ChildProcess } from 'node:child_process';
1
2
  import WebSocket from 'ws';
2
3
  /**
3
4
  * Parsed accessibility node from the UI tree + computed layout.
@@ -24,6 +25,7 @@ export interface A11yNode {
24
25
  }
25
26
  /** Flat, viewport-filtered index for token-efficient agent context (see `buildCompactUiIndex`). */
26
27
  export interface CompactUiNode {
28
+ id: string;
27
29
  role: string;
28
30
  name?: string;
29
31
  state?: A11yNode['state'];
@@ -36,8 +38,10 @@ export interface CompactUiNode {
36
38
  path: number[];
37
39
  focusable: boolean;
38
40
  }
39
- /** Higher-level webpage structures extracted from the a11y tree. */
40
- export interface PageLandmark {
41
+ export type PageSectionKind = 'landmark' | 'form' | 'dialog' | 'list';
42
+ export type PageArchetype = 'shell' | 'form' | 'dialog' | 'results' | 'content' | 'dashboard';
43
+ interface PageSectionSummaryBase {
44
+ id: string;
41
45
  role: string;
42
46
  name?: string;
43
47
  bounds: {
@@ -46,9 +50,12 @@ export interface PageLandmark {
46
50
  width: number;
47
51
  height: number;
48
52
  };
49
- path: number[];
50
53
  }
51
- export interface PageFieldModel {
54
+ /** Higher-level webpage structures extracted from the a11y tree. */
55
+ export interface PageLandmark extends PageSectionSummaryBase {
56
+ }
57
+ export interface PagePrimaryAction {
58
+ id: string;
52
59
  role: string;
53
60
  name?: string;
54
61
  state?: A11yNode['state'];
@@ -58,47 +65,85 @@ export interface PageFieldModel {
58
65
  width: number;
59
66
  height: number;
60
67
  };
61
- path: number[];
62
68
  }
63
- export interface PageActionModel {
69
+ export interface PageFormModel extends PageSectionSummaryBase {
70
+ fieldCount: number;
71
+ actionCount: number;
72
+ }
73
+ export interface PageDialogModel extends PageSectionSummaryBase {
74
+ fieldCount: number;
75
+ actionCount: number;
76
+ }
77
+ export interface PageListModel extends PageSectionSummaryBase {
78
+ itemCount: number;
79
+ }
80
+ export interface PageModel {
81
+ viewport: {
82
+ width: number;
83
+ height: number;
84
+ };
85
+ archetypes: PageArchetype[];
86
+ summary: {
87
+ landmarkCount: number;
88
+ formCount: number;
89
+ dialogCount: number;
90
+ listCount: number;
91
+ focusableCount: number;
92
+ };
93
+ primaryActions: PagePrimaryAction[];
94
+ landmarks: PageLandmark[];
95
+ forms: PageFormModel[];
96
+ dialogs: PageDialogModel[];
97
+ lists: PageListModel[];
98
+ }
99
+ export interface PageHeadingModel {
100
+ id: string;
101
+ name: string;
102
+ bounds?: {
103
+ x: number;
104
+ y: number;
105
+ width: number;
106
+ height: number;
107
+ };
108
+ }
109
+ export interface PageFieldModel {
110
+ id: string;
64
111
  role: string;
65
112
  name?: string;
66
113
  state?: A11yNode['state'];
67
- bounds: {
114
+ bounds?: {
68
115
  x: number;
69
116
  y: number;
70
117
  width: number;
71
118
  height: number;
72
119
  };
73
- path: number[];
74
120
  }
75
- export interface PageFormModel {
121
+ export interface PageActionModel {
122
+ id: string;
123
+ role: string;
76
124
  name?: string;
77
- bounds: {
125
+ state?: A11yNode['state'];
126
+ bounds?: {
78
127
  x: number;
79
128
  y: number;
80
129
  width: number;
81
130
  height: number;
82
131
  };
83
- path: number[];
84
- fieldCount: number;
85
- actionCount: number;
86
- fields: PageFieldModel[];
87
- actions: PageActionModel[];
88
132
  }
89
- export interface PageDialogModel {
133
+ export interface PageListItemModel {
134
+ id: string;
90
135
  name?: string;
91
- bounds: {
136
+ bounds?: {
92
137
  x: number;
93
138
  y: number;
94
139
  width: number;
95
140
  height: number;
96
141
  };
97
- path: number[];
98
- actionCount: number;
99
- actions: PageActionModel[];
100
142
  }
101
- export interface PageListModel {
143
+ export interface PageSectionDetail {
144
+ id: string;
145
+ kind: PageSectionKind;
146
+ role: string;
102
147
  name?: string;
103
148
  bounds: {
104
149
  x: number;
@@ -106,19 +151,19 @@ export interface PageListModel {
106
151
  width: number;
107
152
  height: number;
108
153
  };
109
- path: number[];
110
- itemCount: number;
111
- itemsPreview: string[];
112
- }
113
- export interface PageModel {
114
- viewport: {
115
- width: number;
116
- height: number;
154
+ summary: {
155
+ headingCount: number;
156
+ fieldCount: number;
157
+ actionCount: number;
158
+ listCount: number;
159
+ itemCount: number;
117
160
  };
118
- landmarks: PageLandmark[];
119
- forms: PageFormModel[];
120
- dialogs: PageDialogModel[];
161
+ headings: PageHeadingModel[];
162
+ fields: PageFieldModel[];
163
+ actions: PageActionModel[];
121
164
  lists: PageListModel[];
165
+ items: PageListItemModel[];
166
+ textPreview: string[];
122
167
  }
123
168
  export interface UiNodeUpdate {
124
169
  before: CompactUiNode;
@@ -126,8 +171,8 @@ export interface UiNodeUpdate {
126
171
  changes: string[];
127
172
  }
128
173
  export interface UiListCountChange {
174
+ id: string;
129
175
  name?: string;
130
- path: number[];
131
176
  beforeCount: number;
132
177
  afterCount: number;
133
178
  }
@@ -147,22 +192,40 @@ export interface Session {
147
192
  layout: Record<string, unknown> | null;
148
193
  tree: Record<string, unknown> | null;
149
194
  url: string;
195
+ /** Present when this session owns a child geometra-proxy process (pageUrl connect). */
196
+ proxyChild?: ChildProcess;
197
+ }
198
+ export interface UpdateWaitResult {
199
+ status: 'updated' | 'timed_out';
200
+ timeoutMs: number;
150
201
  }
151
202
  /**
152
203
  * Connect to a running Geometra server. Waits for the first frame so that
153
204
  * layout/tree state is available immediately after connection.
154
205
  */
155
206
  export declare function connect(url: string): Promise<Session>;
207
+ /**
208
+ * Start geometra-proxy for `pageUrl`, connect to its WebSocket, and attach the child
209
+ * process to the session so disconnect / reconnect can clean it up.
210
+ */
211
+ export declare function connectThroughProxy(options: {
212
+ pageUrl: string;
213
+ port?: number;
214
+ headless?: boolean;
215
+ width?: number;
216
+ height?: number;
217
+ slowMo?: number;
218
+ }): Promise<Session>;
156
219
  export declare function getSession(): Session | null;
157
220
  export declare function disconnect(): void;
158
221
  /**
159
222
  * Send a click event at (x, y) and wait for the next frame/patch response.
160
223
  */
161
- export declare function sendClick(session: Session, x: number, y: number): Promise<void>;
224
+ export declare function sendClick(session: Session, x: number, y: number): Promise<UpdateWaitResult>;
162
225
  /**
163
226
  * Send a sequence of key events to type text into the focused element.
164
227
  */
165
- export declare function sendType(session: Session, text: string): Promise<void>;
228
+ export declare function sendType(session: Session, text: string): Promise<UpdateWaitResult>;
166
229
  /**
167
230
  * Send a special key (Enter, Tab, Escape, etc.)
168
231
  */
@@ -171,7 +234,7 @@ export declare function sendKey(session: Session, key: string, modifiers?: {
171
234
  ctrl?: boolean;
172
235
  meta?: boolean;
173
236
  alt?: boolean;
174
- }): Promise<void>;
237
+ }): Promise<UpdateWaitResult>;
175
238
  /**
176
239
  * Attach local file(s). Paths must exist on the machine running `@geometra/proxy` (not the MCP host).
177
240
  * Optional `x`,`y` click opens a file chooser; omit to use the first `input[type=file]` in any frame.
@@ -186,7 +249,7 @@ export declare function sendFileUpload(session: Session, paths: string[], opts?:
186
249
  x: number;
187
250
  y: number;
188
251
  };
189
- }): Promise<void>;
252
+ }): Promise<UpdateWaitResult>;
190
253
  /** ARIA `role=option` listbox (e.g. React Select). Optional click opens the list. */
191
254
  export declare function sendListboxPick(session: Session, label: string, opts?: {
192
255
  exact?: boolean;
@@ -194,25 +257,26 @@ export declare function sendListboxPick(session: Session, label: string, opts?:
194
257
  x: number;
195
258
  y: number;
196
259
  };
197
- }): Promise<void>;
260
+ }): Promise<UpdateWaitResult>;
198
261
  /** Native `<select>` only: click the control center, then pick by value, label text, or zero-based index. */
199
262
  export declare function sendSelectOption(session: Session, x: number, y: number, option: {
200
263
  value?: string;
201
264
  label?: string;
202
265
  index?: number;
203
- }): Promise<void>;
266
+ }): Promise<UpdateWaitResult>;
204
267
  /** Mouse wheel / scroll. Optional `x`,`y` move pointer before scrolling. */
205
268
  export declare function sendWheel(session: Session, deltaY: number, opts?: {
206
269
  deltaX?: number;
207
270
  x?: number;
208
271
  y?: number;
209
- }): Promise<void>;
272
+ }): Promise<UpdateWaitResult>;
210
273
  /**
211
274
  * Build a flat accessibility tree from the raw UI tree + layout.
212
275
  * This is a standalone reimplementation that works with raw JSON —
213
276
  * no dependency on @geometra/core.
214
277
  */
215
278
  export declare function buildA11yTree(tree: Record<string, unknown>, layout: Record<string, unknown>): A11yNode;
279
+ export declare function nodeIdForPath(path: number[]): string;
216
280
  /**
217
281
  * Flat list of actionable / semantic nodes in the viewport, sorted with focusable first
218
282
  * then top-to-bottom reading order. Intended to minimize LLM tokens vs a full nested tree.
@@ -227,14 +291,25 @@ export declare function buildCompactUiIndex(root: A11yNode, options?: {
227
291
  };
228
292
  export declare function summarizeCompactIndex(nodes: CompactUiNode[], maxLines?: number): string;
229
293
  /**
230
- * Build a compact, webpage-shaped model from the accessibility tree:
231
- * landmarks, dialogs, forms, and lists with short previews.
294
+ * Build a summary-first, stable-ID webpage model from the accessibility tree.
295
+ * Use {@link expandPageSection} to fetch details for a specific section on demand.
232
296
  */
233
297
  export declare function buildPageModel(root: A11yNode, options?: {
234
- maxFieldsPerForm?: number;
235
- maxActionsPerContainer?: number;
236
- maxItemsPerList?: number;
298
+ maxPrimaryActions?: number;
299
+ maxSectionsPerKind?: number;
237
300
  }): PageModel;
301
+ /**
302
+ * Expand a page-model section by stable ID into richer, on-demand details.
303
+ */
304
+ export declare function expandPageSection(root: A11yNode, id: string, options?: {
305
+ maxHeadings?: number;
306
+ maxFields?: number;
307
+ maxActions?: number;
308
+ maxLists?: number;
309
+ maxItems?: number;
310
+ maxTextPreview?: number;
311
+ includeBounds?: boolean;
312
+ }): PageSectionDetail | null;
238
313
  export declare function summarizePageModel(model: PageModel, maxLines?: number): string;
239
314
  /**
240
315
  * Compare two accessibility trees at the compact viewport layer plus a few
@@ -245,3 +320,4 @@ export declare function buildUiDelta(before: A11yNode, after: A11yNode, options?
245
320
  }): UiDelta;
246
321
  export declare function hasUiDelta(delta: UiDelta): boolean;
247
322
  export declare function summarizeUiDelta(delta: UiDelta, maxLines?: number): string;
323
+ export {};