@geometra/mcp 1.19.11 → 1.19.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,12 +19,15 @@ Geometra proxy: Chromium → DOM geometry → same WebSocket as native →
19
19
  | Tool | Description |
20
20
  |---|---|
21
21
  | `geometra_connect` | Connect with `url` (ws://…) **or** `pageUrl` (https://…) to auto-start geometra-proxy; `url: "https://…"` is auto-coerced onto the proxy path |
22
- | `geometra_query` | Find elements by stable id, role, name, text content, current value, or semantic state such as `invalid`, `required`, or `busy` |
22
+ | `geometra_query` | Find elements by stable id, role, name, text content, ancestor/prompt context, current value, or semantic state such as `invalid`, `required`, or `busy` |
23
23
  | `geometra_wait_for` | Wait for a semantic condition instead of guessing sleeps (`busy`, `disabled`, alerts, values, etc.) |
24
+ | `geometra_form_schema` | Compact, fill-oriented form schema with stable field ids and collapsed radio/button groups |
25
+ | `geometra_fill_form` | Fill a form from `valuesById` / `valuesByLabel` in one MCP call; preferred low-token happy path for standard forms |
24
26
  | `geometra_fill_fields` | Fill labeled text/choice/toggle/file fields in one MCP call; can return final-only status for the smallest responses |
25
27
  | `geometra_run_actions` | Execute a batch of high-level actions in one MCP round trip and get one consolidated result, with optional final-only output |
26
28
  | `geometra_page_model` | Summary-first webpage model: archetypes, stable section ids, counts, top-level sections, primary actions |
27
- | `geometra_expand_section` | Expand one form/dialog/list/landmark from `geometra_page_model` on demand |
29
+ | `geometra_expand_section` | Expand one form/dialog/list/landmark from `geometra_page_model` on demand, with paging/filtering for long sections |
30
+ | `geometra_reveal` | Scroll until a matching node is visible instead of guessing wheel deltas |
28
31
  | `geometra_click` | Click an element by coordinates |
29
32
  | `geometra_type` | Type text into the focused element |
30
33
  | `geometra_key` | Send special keys (Enter, Tab, Escape, arrows) |
@@ -274,19 +277,15 @@ With `python3 -m http.server 8080` in `demos/proxy-mcp-sample` and `npx geometra
274
277
  Agent: geometra_connect({ url: "ws://127.0.0.1:3200" })
275
278
  → Connected. UI includes textbox "Email", button "Save", …
276
279
 
277
- Agent: geometra_page_model({})
278
- → {"viewport":{"width":1024,"height":768},"archetypes":["shell","form"],"summary":{...},"forms":[{"id":"fm:1.0","fieldCount":3,"actionCount":1}], ...}
280
+ Agent: geometra_form_schema({})
281
+ → {"forms":[{"formId":"fm:1.0","fields":[{"id":"ff:1.0.0","label":"Email"}, ...]}]}
279
282
 
280
- Agent: geometra_expand_section({ id: "fm:1.0" })
281
- {"id":"fm:1.0","kind":"form","fields":[{"id":"n:1.0.0","name":"Email"}, ...], "actions":[...]}
282
-
283
- Agent: geometra_query({ role: "textbox", name: "Email" })
284
- → bounds for the email field (viewport coordinates)
285
-
286
- Agent: geometra_click({ x: <center-x>, y: <center-y> })
287
- → Focuses the input
288
-
289
- Agent: geometra_type({ text: "hello@example.com" })
283
+ Agent: geometra_fill_form({
284
+ formId: "fm:1.0",
285
+ valuesByLabel: { "Email": "hello@example.com" },
286
+ failOnInvalid: true
287
+ })
288
+ → {"completed":true,"successCount":1,"errorCount":0,"final":{"invalidCount":0,...}}
290
289
 
291
290
  Agent: geometra_query({ role: "button", name: "Save" })
292
291
  → Click center to submit the sample form; status text updates in the DOM
@@ -298,20 +297,22 @@ Agent: geometra_query({ role: "button", name: "Save" })
298
297
  2. It receives the computed layout (`{ x, y, width, height }` for every node) and the UI tree (`kind`, `semantic`, `props`, `handlers`, `children`).
299
298
  3. It builds an accessibility tree from that data — roles, names, focusable state, bounds.
300
299
  4. **`geometra_snapshot`** defaults to a **compact** flat list of viewport-visible actionable nodes (minified JSON) to reduce LLM tokens; use `view: "full"` for the complete nested tree.
301
- 5. **`geometra_page_model`** is summary-first: page archetypes, stable section ids, counts, top-level landmarks/forms/dialogs/lists, and a few primary actions. It is designed to be cheaper than dumping full previews for every section.
302
- 6. **`geometra_expand_section`** fetches richer details only for the section you care about (fields, actions, headings, nested lists, list items, text preview).
303
- 7. After interactions, action tools return a **semantic delta** when possible (dialogs opened/closed, forms appeared/removed, list counts changed, named/focusable nodes added/removed/updated). If nothing meaningful changed, they fall back to a short current-UI overview.
304
- 8. Tools expose query, click, type, snapshot, page-model, and section-expansion operations over this structured data.
305
- 9. After each interaction, the peer sends updated geometry (full `frame` or `patch`) the MCP tools interpret that into compact summaries.
300
+ 5. **`geometra_form_schema`** is the compact form-specific path: stable field ids, required/invalid state, current values, and collapsed choice groups without layout-heavy section detail.
301
+ 6. **`geometra_fill_form`** turns a compact values object into semantic field operations server-side, so the model does not need to emit one tool call per field.
302
+ 7. **`geometra_page_model`** is still the right summary-first path for non-form exploration: page archetypes, stable section ids, counts, top-level landmarks/forms/dialogs/lists, and a few primary actions.
303
+ 8. **`geometra_expand_section`** fetches richer details only for the section you care about (fields, actions, headings, nested lists, list items, text preview).
304
+ 9. After interactions, action tools return a **semantic delta** when possible (dialogs opened/closed, forms appeared/removed, list counts changed, named/focusable nodes added/removed/updated). If nothing meaningful changed, they fall back to a short current-UI overview.
305
+ 10. After each interaction, the peer sends updated geometry (full `frame` or `patch`) — the MCP tools interpret that into compact summaries.
306
306
 
307
307
  ## Long Forms
308
308
 
309
309
  For long application flows, prefer one of these patterns:
310
310
 
311
- 1. `geometra_page_model`
312
- 2. `geometra_expand_section`
313
- 3. `geometra_fill_fields` for obvious field entry
311
+ 1. `geometra_form_schema`
312
+ 2. `geometra_fill_form`
313
+ 3. `geometra_reveal` for far-below-fold targets such as submit buttons
314
314
  4. `geometra_run_actions` when you need mixed navigation + waits + field entry
315
+ 5. `geometra_page_model` + `geometra_expand_section` when you are still exploring the page rather than filling it
315
316
 
316
317
  Typical batch:
317
318
 
@@ -333,6 +334,30 @@ For the smallest long-form responses, prefer:
333
334
  1. `detail: "minimal"` for structured step metadata instead of narrated deltas
334
335
  2. `includeSteps: false` when you only need aggregate success/error counts plus the final validation/state payload
335
336
 
337
+ Typical low-token form fill:
338
+
339
+ ```json
340
+ {
341
+ "formId": "fm:1.0",
342
+ "valuesById": {
343
+ "ff:1.0.0": "Taylor Applicant",
344
+ "ff:1.0.1": "taylor@example.com",
345
+ "ff:1.0.2": "Germany",
346
+ "ff:1.0.3": "No"
347
+ },
348
+ "failOnInvalid": true,
349
+ "includeSteps": false,
350
+ "detail": "minimal"
351
+ }
352
+ ```
353
+
354
+ For long single-page forms:
355
+
356
+ 1. Use `geometra_expand_section` with `fieldOffset` / `actionOffset` to page through large forms instead of taking a full snapshot.
357
+ 2. Add `onlyRequiredFields: true` or `onlyInvalidFields: true` when you want the actionable subset.
358
+ 3. Use `contextText` in `geometra_query` / `geometra_wait_for` to disambiguate repeated `Yes` / `No` controls by question text.
359
+ 4. Use `geometra_reveal` instead of manual wheel loops when the next target is offscreen.
360
+
336
361
  Typical field fill:
337
362
 
338
363
  ```json
@@ -1,4 +1,4 @@
1
- import { existsSync, mkdirSync, mkdtempSync, rmSync, symlinkSync, writeFileSync } from 'node:fs';
1
+ import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
2
2
  import { createRequire } from 'node:module';
3
3
  import { tmpdir } from 'node:os';
4
4
  import path from 'node:path';
@@ -78,7 +78,22 @@ describe('proxy ready helpers', () => {
78
78
  const packageDir = path.join(scopeDir, 'proxy');
79
79
  const probePath = path.join(tempRoot, 'probe.cjs');
80
80
  mkdirSync(scopeDir, { recursive: true });
81
- symlinkSync(path.resolve(process.cwd(), 'packages/proxy'), packageDir, 'dir');
81
+ mkdirSync(path.join(packageDir, 'src'), { recursive: true });
82
+ writeFileSync(path.join(packageDir, 'package.json'), JSON.stringify({
83
+ name: '@geometra/proxy',
84
+ version: '0.0.0-test',
85
+ type: 'module',
86
+ }));
87
+ writeFileSync(path.join(packageDir, 'tsconfig.build.json'), JSON.stringify({
88
+ extends: path.resolve(process.cwd(), 'tsconfig.base.json'),
89
+ compilerOptions: {
90
+ outDir: 'dist',
91
+ rootDir: 'src',
92
+ noEmit: false,
93
+ },
94
+ include: ['src'],
95
+ }));
96
+ writeFileSync(path.join(packageDir, 'src', 'index.ts'), 'console.log("proxy");\n');
82
97
  writeFileSync(probePath, 'module.exports = {}');
83
98
  const customRequire = createRequire(probePath);
84
99
  const scriptPath = resolveProxyScriptPathWith(customRequire);
@@ -89,6 +104,33 @@ describe('proxy ready helpers', () => {
89
104
  rmSync(tempRoot, { recursive: true, force: true });
90
105
  }
91
106
  });
107
+ it('prefers the current workspace proxy dist over a bundled nested dependency in source checkouts', () => {
108
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'geometra-proxy-workspace-prefer-'));
109
+ try {
110
+ const workspaceDistDir = path.join(tempRoot, 'packages', 'proxy', 'dist');
111
+ const bundledProxyDir = path.join(tempRoot, 'mcp', 'node_modules', '@geometra', 'proxy');
112
+ const bundledDistDir = path.join(bundledProxyDir, 'dist');
113
+ const mcpDistDir = path.join(tempRoot, 'mcp', 'dist');
114
+ const probePath = path.join(mcpDistDir, 'proxy-spawn.cjs');
115
+ mkdirSync(workspaceDistDir, { recursive: true });
116
+ mkdirSync(bundledDistDir, { recursive: true });
117
+ mkdirSync(mcpDistDir, { recursive: true });
118
+ writeFileSync(path.join(workspaceDistDir, 'index.js'), 'export const source = "workspace";\n');
119
+ writeFileSync(path.join(bundledDistDir, 'index.js'), 'export const source = "bundled";\n');
120
+ writeFileSync(path.join(bundledProxyDir, 'package.json'), JSON.stringify({
121
+ name: '@geometra/proxy',
122
+ version: '0.0.0-test',
123
+ type: 'module',
124
+ }));
125
+ writeFileSync(probePath, 'module.exports = {};\n');
126
+ const customRequire = createRequire(probePath);
127
+ const scriptPath = resolveProxyScriptPathWith(customRequire, mcpDistDir);
128
+ expect(scriptPath).toBe(path.join(workspaceDistDir, 'index.js'));
129
+ }
130
+ finally {
131
+ rmSync(tempRoot, { recursive: true, force: true });
132
+ }
133
+ });
92
134
  it('falls back to the packaged sibling proxy dist when package exports are stale', () => {
93
135
  const tempRoot = mkdtempSync(path.join(tmpdir(), 'geometra-proxy-stale-exports-'));
94
136
  try {
@@ -7,7 +7,7 @@ function node(role, name, options) {
7
7
  ...(options?.state ? { state: options.state } : {}),
8
8
  ...(options?.validation ? { validation: options.validation } : {}),
9
9
  ...(options?.meta ? { meta: options.meta } : {}),
10
- bounds: { x: 0, y: 0, width: 120, height: 40 },
10
+ bounds: options?.bounds ?? { x: 0, y: 0, width: 120, height: 40 },
11
11
  path: options?.path ?? [],
12
12
  children: options?.children ?? [],
13
13
  focusable: role !== 'group',
@@ -23,6 +23,9 @@ const mockState = vi.hoisted(() => ({
23
23
  url: 'ws://127.0.0.1:3200',
24
24
  updateRevision: 1,
25
25
  },
26
+ formSchemas: [],
27
+ connect: vi.fn(),
28
+ connectThroughProxy: vi.fn(),
26
29
  sendClick: vi.fn(async () => ({ status: 'updated', timeoutMs: 2000 })),
27
30
  sendType: vi.fn(async () => ({ status: 'updated', timeoutMs: 2000 })),
28
31
  sendKey: vi.fn(async () => ({ status: 'updated', timeoutMs: 2000 })),
@@ -36,8 +39,8 @@ const mockState = vi.hoisted(() => ({
36
39
  waitForUiCondition: vi.fn(async () => true),
37
40
  }));
38
41
  vi.mock('../session.js', () => ({
39
- connect: vi.fn(),
40
- connectThroughProxy: vi.fn(),
42
+ connect: mockState.connect,
43
+ connectThroughProxy: mockState.connectThroughProxy,
41
44
  disconnect: vi.fn(),
42
45
  getSession: vi.fn(() => mockState.session),
43
46
  sendClick: mockState.sendClick,
@@ -62,6 +65,7 @@ vi.mock('../session.js', () => ({
62
65
  dialogs: [],
63
66
  lists: [],
64
67
  })),
68
+ buildFormSchemas: vi.fn(() => mockState.formSchemas),
65
69
  expandPageSection: vi.fn(() => null),
66
70
  buildUiDelta: vi.fn(() => ({})),
67
71
  hasUiDelta: vi.fn(() => false),
@@ -79,6 +83,9 @@ function getToolHandler(name) {
79
83
  describe('batch MCP result shaping', () => {
80
84
  beforeEach(() => {
81
85
  vi.clearAllMocks();
86
+ mockState.connect.mockResolvedValue(mockState.session);
87
+ mockState.connectThroughProxy.mockResolvedValue(mockState.session);
88
+ mockState.formSchemas = [];
82
89
  mockState.currentA11yRoot = node('group', undefined, {
83
90
  meta: { pageUrl: 'https://jobs.example.com/application', scrollX: 0, scrollY: 420 },
84
91
  children: [
@@ -203,4 +210,213 @@ describe('batch MCP result shaping', () => {
203
210
  expect(final.invalidFields.length).toBe(4);
204
211
  expect(final.alerts.length).toBe(1);
205
212
  });
213
+ it('returns a compact structured connect payload by default', async () => {
214
+ const handler = getToolHandler('geometra_connect');
215
+ const result = await handler({
216
+ pageUrl: 'https://jobs.example.com/application',
217
+ headless: true,
218
+ });
219
+ const payload = JSON.parse(result.content[0].text);
220
+ expect(payload).toMatchObject({
221
+ connected: true,
222
+ transport: 'proxy',
223
+ wsUrl: 'ws://127.0.0.1:3200',
224
+ pageUrl: 'https://jobs.example.com/application',
225
+ });
226
+ expect(payload).not.toHaveProperty('currentUi');
227
+ });
228
+ it('returns compact form schemas without requiring section expansion', async () => {
229
+ const handler = getToolHandler('geometra_form_schema');
230
+ mockState.formSchemas = [
231
+ {
232
+ formId: 'fm:0',
233
+ name: 'Application',
234
+ fieldCount: 4,
235
+ requiredCount: 3,
236
+ invalidCount: 0,
237
+ fields: [
238
+ { id: 'ff:0.0', kind: 'text', label: 'Full name', required: true },
239
+ { id: 'ff:0.1', kind: 'choice', label: 'Preferred location', required: true },
240
+ { id: 'ff:0.2', kind: 'choice', label: 'Are you legally authorized to work in Germany?', options: ['Yes', 'No'], optionCount: 2 },
241
+ { id: 'ff:0.3', kind: 'toggle', label: 'Share my profile for future roles', controlType: 'checkbox' },
242
+ ],
243
+ },
244
+ ];
245
+ const result = await handler({ maxFields: 20 });
246
+ const payload = JSON.parse(result.content[0].text);
247
+ expect(payload.forms).toEqual([
248
+ expect.objectContaining({
249
+ formId: 'fm:0',
250
+ fieldCount: 4,
251
+ requiredCount: 3,
252
+ invalidCount: 0,
253
+ }),
254
+ ]);
255
+ });
256
+ it('fills a form from ids and labels without echoing long essay content', async () => {
257
+ const longAnswer = 'B'.repeat(220);
258
+ const handler = getToolHandler('geometra_fill_form');
259
+ mockState.formSchemas = [
260
+ {
261
+ formId: 'fm:0',
262
+ name: 'Application',
263
+ fieldCount: 4,
264
+ requiredCount: 3,
265
+ invalidCount: 0,
266
+ fields: [
267
+ { id: 'ff:0.0', kind: 'text', label: 'Full name', required: true },
268
+ { id: 'ff:0.1', kind: 'choice', label: 'Are you legally authorized to work in Germany?', options: ['Yes', 'No'], optionCount: 2 },
269
+ { id: 'ff:0.2', kind: 'toggle', label: 'Share my profile for future roles', controlType: 'checkbox' },
270
+ { id: 'ff:0.3', kind: 'text', label: 'Why Geometra?' },
271
+ ],
272
+ },
273
+ ];
274
+ mockState.currentA11yRoot = node('group', undefined, {
275
+ meta: { pageUrl: 'https://jobs.example.com/application', scrollX: 0, scrollY: 640 },
276
+ children: [
277
+ node('textbox', 'Full name', { value: 'Taylor Applicant', path: [0] }),
278
+ node('textbox', 'Why Geometra?', { value: longAnswer, path: [1] }),
279
+ node('checkbox', 'Share my profile for future roles', {
280
+ path: [2],
281
+ state: { checked: true },
282
+ }),
283
+ ],
284
+ });
285
+ const result = await handler({
286
+ valuesById: {
287
+ 'ff:0.0': 'Taylor Applicant',
288
+ },
289
+ valuesByLabel: {
290
+ 'Are you legally authorized to work in Germany?': true,
291
+ 'Share my profile for future roles': true,
292
+ 'Why Geometra?': longAnswer,
293
+ },
294
+ includeSteps: true,
295
+ detail: 'minimal',
296
+ });
297
+ const text = result.content[0].text;
298
+ const payload = JSON.parse(text);
299
+ const steps = payload.steps;
300
+ expect(text).not.toContain(longAnswer);
301
+ expect(mockState.sendFieldChoice).toHaveBeenCalledWith(mockState.session, 'Are you legally authorized to work in Germany?', 'Yes', { exact: undefined, query: undefined }, undefined);
302
+ expect(payload).toMatchObject({
303
+ completed: true,
304
+ formId: 'fm:0',
305
+ requestedValueCount: 4,
306
+ fieldCount: 4,
307
+ successCount: 4,
308
+ errorCount: 0,
309
+ });
310
+ expect(steps[3]).toMatchObject({
311
+ kind: 'text',
312
+ fieldLabel: 'Why Geometra?',
313
+ valueLength: 220,
314
+ readback: { role: 'textbox', valueLength: 220 },
315
+ });
316
+ });
317
+ });
318
+ describe('query and reveal tools', () => {
319
+ beforeEach(() => {
320
+ vi.clearAllMocks();
321
+ });
322
+ it('lets query disambiguate repeated controls by context text', async () => {
323
+ const handler = getToolHandler('geometra_query');
324
+ mockState.currentA11yRoot = node('group', undefined, {
325
+ meta: { pageUrl: 'https://jobs.example.com/application', scrollX: 0, scrollY: 900 },
326
+ children: [
327
+ node('form', 'Application', {
328
+ path: [0],
329
+ children: [
330
+ node('group', undefined, {
331
+ path: [0, 0],
332
+ children: [
333
+ node('text', 'Are you legally authorized to work here?', { path: [0, 0, 0] }),
334
+ node('button', 'Yes', { path: [0, 0, 1] }),
335
+ node('button', 'No', { path: [0, 0, 2] }),
336
+ ],
337
+ }),
338
+ node('group', undefined, {
339
+ path: [0, 1],
340
+ children: [
341
+ node('text', 'Will you require sponsorship?', { path: [0, 1, 0] }),
342
+ node('button', 'Yes', { path: [0, 1, 1] }),
343
+ node('button', 'No', { path: [0, 1, 2] }),
344
+ ],
345
+ }),
346
+ ],
347
+ }),
348
+ ],
349
+ });
350
+ const result = await handler({
351
+ role: 'button',
352
+ name: 'Yes',
353
+ contextText: 'sponsorship',
354
+ });
355
+ const payload = JSON.parse(result.content[0].text);
356
+ expect(payload).toHaveLength(1);
357
+ expect(payload[0]).toMatchObject({
358
+ role: 'button',
359
+ name: 'Yes',
360
+ context: {
361
+ prompt: 'Will you require sponsorship?',
362
+ section: 'Application',
363
+ },
364
+ });
365
+ });
366
+ it('reveals an offscreen target with semantic scrolling instead of requiring manual wheels', async () => {
367
+ const handler = getToolHandler('geometra_reveal');
368
+ mockState.currentA11yRoot = node('group', undefined, {
369
+ bounds: { x: 0, y: 0, width: 1280, height: 800 },
370
+ meta: { pageUrl: 'https://jobs.example.com/application', scrollX: 0, scrollY: 0 },
371
+ children: [
372
+ node('form', 'Application', {
373
+ bounds: { x: 20, y: -200, width: 760, height: 1900 },
374
+ path: [0],
375
+ children: [
376
+ node('button', 'Submit application', {
377
+ bounds: { x: 60, y: 1540, width: 180, height: 40 },
378
+ path: [0, 0],
379
+ }),
380
+ ],
381
+ }),
382
+ ],
383
+ });
384
+ mockState.sendWheel.mockImplementationOnce(async () => {
385
+ mockState.currentA11yRoot = node('group', undefined, {
386
+ bounds: { x: 0, y: 0, width: 1280, height: 800 },
387
+ meta: { pageUrl: 'https://jobs.example.com/application', scrollX: 0, scrollY: 1220 },
388
+ children: [
389
+ node('form', 'Application', {
390
+ bounds: { x: 20, y: -1420, width: 760, height: 1900 },
391
+ path: [0],
392
+ children: [
393
+ node('button', 'Submit application', {
394
+ bounds: { x: 60, y: 320, width: 180, height: 40 },
395
+ path: [0, 0],
396
+ }),
397
+ ],
398
+ }),
399
+ ],
400
+ });
401
+ return { status: 'updated', timeoutMs: 2500 };
402
+ });
403
+ const result = await handler({
404
+ role: 'button',
405
+ name: 'Submit application',
406
+ maxSteps: 3,
407
+ fullyVisible: true,
408
+ timeoutMs: 2500,
409
+ });
410
+ const payload = JSON.parse(result.content[0].text);
411
+ expect(mockState.sendWheel).toHaveBeenCalledWith(mockState.session, expect.any(Number), expect.objectContaining({ x: expect.any(Number), y: expect.any(Number) }), 2500);
412
+ expect(payload).toMatchObject({
413
+ revealed: true,
414
+ attempts: 1,
415
+ target: {
416
+ role: 'button',
417
+ name: 'Submit application',
418
+ visibility: { fullyVisible: true },
419
+ },
420
+ });
421
+ });
206
422
  });