@geometra/mcp 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -0
- package/dist/__tests__/session-model.test.d.ts +1 -0
- package/dist/__tests__/session-model.test.js +128 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +12 -0
- package/dist/server.d.ts +2 -0
- package/dist/server.js +352 -0
- package/dist/session.d.ts +247 -0
- package/dist/session.js +777 -0
- package/package.json +41 -0
package/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# @geometra/mcp
|
|
2
|
+
|
|
3
|
+
MCP server for [Geometra](https://github.com/razroo/geometra) — interact with running Geometra apps via the geometry protocol over WebSocket. For **native** Geometra apps there is no browser in the loop. For **any existing website**, pair this MCP server with [`@geometra/proxy`](../packages/proxy/README.md) (headless Chromium) so the same tools speak the same GEOM v1 wire format.
|
|
4
|
+
|
|
5
|
+
## What this does
|
|
6
|
+
|
|
7
|
+
Connects Claude Code, Codex, or any MCP-compatible AI agent to a WebSocket endpoint that streams `frame` / `patch` messages. The agent gets structured `{ x, y, width, height }` geometry and semantic metadata for every UI node — not screenshots, not a vision model.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Playwright + vision: screenshot → model → guess coordinates → click → repeat
|
|
11
|
+
Native Geometra: WebSocket → JSON geometry (no browser on the agent path)
|
|
12
|
+
Geometra proxy: Headless Chromium → DOM geometry → same WebSocket as native → MCP tools unchanged
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Tools
|
|
16
|
+
|
|
17
|
+
| Tool | Description |
|
|
18
|
+
|---|---|
|
|
19
|
+
| `geometra_connect` | Connect to a running Geometra server |
|
|
20
|
+
| `geometra_query` | Find elements by role, name, or text content |
|
|
21
|
+
| `geometra_page_model` | Higher-level webpage model: landmarks, forms, dialogs, lists, short previews |
|
|
22
|
+
| `geometra_click` | Click an element by coordinates |
|
|
23
|
+
| `geometra_type` | Type text into the focused element |
|
|
24
|
+
| `geometra_key` | Send special keys (Enter, Tab, Escape, arrows) |
|
|
25
|
+
| `geometra_upload_files` | Attach files: auto / hidden input / native chooser / synthetic drop (`@geometra/proxy` only) |
|
|
26
|
+
| `geometra_pick_listbox_option` | Pick `role=option` (React Select, Headless UI, etc.; `@geometra/proxy` only) |
|
|
27
|
+
| `geometra_select_option` | Choose an option on a native `<select>` (`@geometra/proxy` only) |
|
|
28
|
+
| `geometra_wheel` | Mouse wheel / scroll (`@geometra/proxy` only) |
|
|
29
|
+
| `geometra_snapshot` | Default **compact**: flat viewport-visible actionable nodes (minified JSON). `view=full` for nested tree |
|
|
30
|
+
| `geometra_layout` | Raw computed geometry for every node |
|
|
31
|
+
| `geometra_disconnect` | Close the connection |
|
|
32
|
+
|
|
33
|
+
## Setup
|
|
34
|
+
|
|
35
|
+
### Claude Code
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
claude mcp add geometra -- npx @geometra/mcp
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Claude Desktop
|
|
42
|
+
|
|
43
|
+
Add to `claude_desktop_config.json`:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"mcpServers": {
|
|
48
|
+
"geometra": {
|
|
49
|
+
"command": "npx",
|
|
50
|
+
"args": ["@geometra/mcp"]
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### From source (this repo)
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cd mcp
|
|
60
|
+
npm install
|
|
61
|
+
npm run build
|
|
62
|
+
claude mcp add geometra -- node ./dist/index.js
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
### Native Geometra server
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
cd demos/server-client
|
|
71
|
+
npm run server # starts on ws://localhost:3100
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Any web app (Geometra proxy)
|
|
75
|
+
|
|
76
|
+
In one terminal, serve or open a page (example uses the repo sample):
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
cd demos/proxy-mcp-sample
|
|
80
|
+
python3 -m http.server 8080
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
In another terminal (from repo root after `npm install` / `bun install` and `bun run build`):
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
npx geometra-proxy http://localhost:8080 --port 3200
|
|
87
|
+
# Requires Chromium: npx playwright install chromium
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Point MCP at `ws://127.0.0.1:3200` instead of a native Geometra server. The proxy translates clicks and keyboard messages into Playwright actions and streams updated geometry.
|
|
91
|
+
|
|
92
|
+
Then in Claude Code (either backend):
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
> Connect to my Geometra app at ws://localhost:3100 and tell me what's on screen
|
|
96
|
+
|
|
97
|
+
> Give me the page model first, then find the main form
|
|
98
|
+
|
|
99
|
+
> Click the "Submit" button
|
|
100
|
+
|
|
101
|
+
> Type "hello@example.com" into the email input
|
|
102
|
+
|
|
103
|
+
> Take a snapshot and check if the dialog is visible
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Example: agent testing a signup form (native server)
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
Agent: geometra_connect({ url: "ws://localhost:3100" })
|
|
110
|
+
→ Connected. UI: button "Sign Up", textbox "Email", textbox "Password"
|
|
111
|
+
|
|
112
|
+
Agent: geometra_query({ role: "textbox", name: "Email" })
|
|
113
|
+
→ [{ role: "textbox", name: "Email", bounds: {x:100, y:200, w:300, h:40}, center: {x:250, y:220} }]
|
|
114
|
+
|
|
115
|
+
Agent: geometra_click({ x: 250, y: 220 })
|
|
116
|
+
→ Clicked. Email input focused.
|
|
117
|
+
|
|
118
|
+
Agent: geometra_type({ text: "test@example.com" })
|
|
119
|
+
→ Typed. Email field updated.
|
|
120
|
+
|
|
121
|
+
Agent: geometra_query({ role: "button", name: "Sign Up" })
|
|
122
|
+
→ [{ role: "button", name: "Sign Up", bounds: {x:100, y:350, w:120, h:44}, center: {x:160, y:372} }]
|
|
123
|
+
|
|
124
|
+
Agent: geometra_click({ x: 160, y: 372 })
|
|
125
|
+
→ Clicked. Success message visible.
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
No screenshots and no vision model in the loop. JSON in, JSON out.
|
|
129
|
+
|
|
130
|
+
## Example: static HTML via `@geometra/proxy`
|
|
131
|
+
|
|
132
|
+
With `python3 -m http.server 8080` in `demos/proxy-mcp-sample` and `npx geometra-proxy http://localhost:8080 --port 3200` running:
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
Agent: geometra_connect({ url: "ws://127.0.0.1:3200" })
|
|
136
|
+
→ Connected. UI includes textbox "Email", button "Save", …
|
|
137
|
+
|
|
138
|
+
Agent: geometra_page_model({})
|
|
139
|
+
→ {"viewport":{"width":1024,"height":768},"landmarks":[...],"forms":[...],"dialogs":[],"lists":[]}
|
|
140
|
+
|
|
141
|
+
Agent: geometra_query({ role: "textbox", name: "Email" })
|
|
142
|
+
→ bounds for the email field (viewport coordinates)
|
|
143
|
+
|
|
144
|
+
Agent: geometra_click({ x: <center-x>, y: <center-y> })
|
|
145
|
+
→ Focuses the input
|
|
146
|
+
|
|
147
|
+
Agent: geometra_type({ text: "hello@example.com" })
|
|
148
|
+
|
|
149
|
+
Agent: geometra_query({ role: "button", name: "Save" })
|
|
150
|
+
→ Click center to submit the sample form; status text updates in the DOM
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## How it works
|
|
154
|
+
|
|
155
|
+
1. The MCP server connects to a WebSocket peer that speaks GEOM v1 (`frame` with `layout` + `tree`, optional `patch` updates).
|
|
156
|
+
2. It receives the computed layout (`{ x, y, width, height }` for every node) and the UI tree (`kind`, `semantic`, `props`, `handlers`, `children`).
|
|
157
|
+
3. It builds an accessibility tree from that data — roles, names, focusable state, bounds.
|
|
158
|
+
4. **`geometra_snapshot`** defaults to a **compact** flat list of viewport-visible actionable nodes (minified JSON) to reduce LLM tokens; use `view: "full"` for the complete nested tree.
|
|
159
|
+
5. **`geometra_page_model`** extracts higher-level webpage structure (landmarks, forms, dialogs, lists) so agents can reason about normal HTML pages without pulling a full tree first.
|
|
160
|
+
6. After interactions, action tools return a **semantic delta** when possible (dialogs opened/closed, forms appeared/removed, list counts changed, named/focusable nodes added/removed/updated). If nothing meaningful changed, they fall back to a short current-UI overview.
|
|
161
|
+
7. Tools expose query, click, type, snapshot, and page-model operations over this structured data.
|
|
162
|
+
8. After each interaction, the peer sends updated geometry (full `frame` or `patch`) — the MCP tools interpret that into compact summaries.
|
|
163
|
+
|
|
164
|
+
With a **native** Geometra server, layout comes from Textura/Yoga. With **`@geometra/proxy`**, layout comes from the browser’s computed DOM geometry; the MCP layer is the same.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { buildPageModel, buildUiDelta, hasUiDelta, summarizeUiDelta, } from '../session.js';
|
|
3
|
+
function node(role, name, bounds, options) {
|
|
4
|
+
return {
|
|
5
|
+
role,
|
|
6
|
+
...(name ? { name } : {}),
|
|
7
|
+
...(options?.state ? { state: options.state } : {}),
|
|
8
|
+
bounds,
|
|
9
|
+
path: options?.path ?? [],
|
|
10
|
+
children: options?.children ?? [],
|
|
11
|
+
focusable: options?.focusable ?? false,
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
describe('buildPageModel', () => {
|
|
15
|
+
it('extracts landmarks, forms, and lists from a typical webpage tree', () => {
|
|
16
|
+
const tree = node('group', undefined, { x: 0, y: 0, width: 1024, height: 768 }, {
|
|
17
|
+
children: [
|
|
18
|
+
node('navigation', 'Primary nav', { x: 0, y: 0, width: 220, height: 80 }, { path: [0] }),
|
|
19
|
+
node('main', undefined, { x: 0, y: 80, width: 1024, height: 688 }, {
|
|
20
|
+
path: [1],
|
|
21
|
+
children: [
|
|
22
|
+
node('form', 'Job application', { x: 40, y: 120, width: 520, height: 280 }, {
|
|
23
|
+
path: [1, 0],
|
|
24
|
+
children: [
|
|
25
|
+
node('textbox', 'Full name', { x: 60, y: 160, width: 300, height: 36 }, { path: [1, 0, 0] }),
|
|
26
|
+
node('textbox', 'Email', { x: 60, y: 208, width: 300, height: 36 }, { path: [1, 0, 1] }),
|
|
27
|
+
node('button', 'Submit application', { x: 60, y: 264, width: 180, height: 40 }, {
|
|
28
|
+
path: [1, 0, 2],
|
|
29
|
+
focusable: true,
|
|
30
|
+
}),
|
|
31
|
+
],
|
|
32
|
+
}),
|
|
33
|
+
node('list', 'Open roles', { x: 600, y: 120, width: 360, height: 280 }, {
|
|
34
|
+
path: [1, 1],
|
|
35
|
+
children: [
|
|
36
|
+
node('listitem', 'Designer', { x: 620, y: 148, width: 320, height: 32 }, { path: [1, 1, 0] }),
|
|
37
|
+
node('listitem', 'Engineer', { x: 620, y: 188, width: 320, height: 32 }, { path: [1, 1, 1] }),
|
|
38
|
+
],
|
|
39
|
+
}),
|
|
40
|
+
],
|
|
41
|
+
}),
|
|
42
|
+
],
|
|
43
|
+
});
|
|
44
|
+
const model = buildPageModel(tree);
|
|
45
|
+
expect(model.viewport).toEqual({ width: 1024, height: 768 });
|
|
46
|
+
expect(model.landmarks.map(item => item.role)).toEqual(['navigation', 'main', 'form']);
|
|
47
|
+
expect(model.forms).toHaveLength(1);
|
|
48
|
+
expect(model.forms[0]).toMatchObject({
|
|
49
|
+
name: 'Job application',
|
|
50
|
+
fieldCount: 2,
|
|
51
|
+
actionCount: 1,
|
|
52
|
+
});
|
|
53
|
+
expect(model.forms[0]?.fields.map(field => field.name)).toEqual(['Full name', 'Email']);
|
|
54
|
+
expect(model.forms[0]?.actions.map(action => action.name)).toEqual(['Submit application']);
|
|
55
|
+
expect(model.lists[0]).toMatchObject({
|
|
56
|
+
name: 'Open roles',
|
|
57
|
+
itemCount: 2,
|
|
58
|
+
itemsPreview: ['Designer', 'Engineer'],
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
describe('buildUiDelta', () => {
|
|
63
|
+
it('captures opened dialogs, state changes, and list count changes', () => {
|
|
64
|
+
const before = node('group', undefined, { x: 0, y: 0, width: 1024, height: 768 }, {
|
|
65
|
+
children: [
|
|
66
|
+
node('main', undefined, { x: 0, y: 0, width: 1024, height: 768 }, {
|
|
67
|
+
path: [0],
|
|
68
|
+
children: [
|
|
69
|
+
node('button', 'Save', { x: 40, y: 40, width: 120, height: 40 }, {
|
|
70
|
+
path: [0, 0],
|
|
71
|
+
focusable: true,
|
|
72
|
+
}),
|
|
73
|
+
node('list', 'Results', { x: 40, y: 120, width: 400, height: 240 }, {
|
|
74
|
+
path: [0, 1],
|
|
75
|
+
children: [
|
|
76
|
+
node('listitem', 'Row 1', { x: 60, y: 144, width: 360, height: 32 }, { path: [0, 1, 0] }),
|
|
77
|
+
node('listitem', 'Row 2', { x: 60, y: 184, width: 360, height: 32 }, { path: [0, 1, 1] }),
|
|
78
|
+
],
|
|
79
|
+
}),
|
|
80
|
+
],
|
|
81
|
+
}),
|
|
82
|
+
],
|
|
83
|
+
});
|
|
84
|
+
const after = node('group', undefined, { x: 0, y: 0, width: 1024, height: 768 }, {
|
|
85
|
+
children: [
|
|
86
|
+
node('main', undefined, { x: 0, y: 0, width: 1024, height: 768 }, {
|
|
87
|
+
path: [0],
|
|
88
|
+
children: [
|
|
89
|
+
node('button', 'Save', { x: 40, y: 40, width: 120, height: 40 }, {
|
|
90
|
+
path: [0, 0],
|
|
91
|
+
focusable: true,
|
|
92
|
+
state: { disabled: true },
|
|
93
|
+
}),
|
|
94
|
+
node('list', 'Results', { x: 40, y: 120, width: 400, height: 280 }, {
|
|
95
|
+
path: [0, 1],
|
|
96
|
+
children: [
|
|
97
|
+
node('listitem', 'Row 1', { x: 60, y: 144, width: 360, height: 32 }, { path: [0, 1, 0] }),
|
|
98
|
+
node('listitem', 'Row 2', { x: 60, y: 184, width: 360, height: 32 }, { path: [0, 1, 1] }),
|
|
99
|
+
node('listitem', 'Row 3', { x: 60, y: 224, width: 360, height: 32 }, { path: [0, 1, 2] }),
|
|
100
|
+
],
|
|
101
|
+
}),
|
|
102
|
+
node('dialog', 'Save complete', { x: 520, y: 80, width: 280, height: 180 }, {
|
|
103
|
+
path: [0, 2],
|
|
104
|
+
children: [
|
|
105
|
+
node('button', 'Close', { x: 620, y: 200, width: 100, height: 36 }, {
|
|
106
|
+
path: [0, 2, 0],
|
|
107
|
+
focusable: true,
|
|
108
|
+
}),
|
|
109
|
+
],
|
|
110
|
+
}),
|
|
111
|
+
],
|
|
112
|
+
}),
|
|
113
|
+
],
|
|
114
|
+
});
|
|
115
|
+
const delta = buildUiDelta(before, after);
|
|
116
|
+
expect(hasUiDelta(delta)).toBe(true);
|
|
117
|
+
expect(delta.dialogsOpened).toHaveLength(1);
|
|
118
|
+
expect(delta.dialogsOpened[0]?.name).toBe('Save complete');
|
|
119
|
+
expect(delta.listCountsChanged).toEqual([
|
|
120
|
+
{ name: 'Results', path: [0, 1], beforeCount: 2, afterCount: 3 },
|
|
121
|
+
]);
|
|
122
|
+
expect(delta.updated.some(update => update.after.name === 'Save' && update.changes.some(change => change.includes('disabled')))).toBe(true);
|
|
123
|
+
const summary = summarizeUiDelta(delta);
|
|
124
|
+
expect(summary).toContain('+ dialog "Save complete" opened');
|
|
125
|
+
expect(summary).toContain('~ list "Results" items 2 -> 3');
|
|
126
|
+
expect(summary).toContain('~ button "Save": disabled unset -> true');
|
|
127
|
+
});
|
|
128
|
+
});
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
3
|
+
import { createServer } from './server.js';
|
|
4
|
+
async function main() {
|
|
5
|
+
const server = createServer();
|
|
6
|
+
const transport = new StdioServerTransport();
|
|
7
|
+
await server.connect(transport);
|
|
8
|
+
}
|
|
9
|
+
main().catch((err) => {
|
|
10
|
+
console.error('geometra-mcp: failed to start', err);
|
|
11
|
+
process.exit(1);
|
|
12
|
+
});
|
package/dist/server.d.ts
ADDED
package/dist/server.js
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { connect, disconnect, getSession, sendClick, sendType, sendKey, sendFileUpload, sendListboxPick, sendSelectOption, sendWheel, buildA11yTree, buildCompactUiIndex, buildPageModel, buildUiDelta, hasUiDelta, summarizeCompactIndex, summarizePageModel, summarizeUiDelta, } from './session.js';
|
|
4
|
+
export function createServer() {
|
|
5
|
+
const server = new McpServer({ name: 'geometra', version: '0.1.0' }, { capabilities: { tools: {} } });
|
|
6
|
+
// ── connect ──────────────────────────────────────────────────
|
|
7
|
+
server.tool('geometra_connect', `Connect to a running Geometra server over WebSocket. This replaces Playwright/browser automation — you get direct access to the UI's pixel-exact geometry as JSON.
|
|
8
|
+
|
|
9
|
+
Call this first before using any other geometra tools. The peer must be listening (native Geometra server, or \`geometra-proxy\` for real web pages). File upload / wheel / native \`<select>\` require \`@geometra/proxy\`; native Textura servers return an error for those messages.`, {
|
|
10
|
+
url: z.string().describe('WebSocket URL of the Geometra server (e.g. ws://localhost:3100)'),
|
|
11
|
+
}, async ({ url }) => {
|
|
12
|
+
try {
|
|
13
|
+
const session = await connect(url);
|
|
14
|
+
const summary = compactSessionSummary(session);
|
|
15
|
+
return ok(`Connected to ${url}. UI state:\n${summary}`);
|
|
16
|
+
}
|
|
17
|
+
catch (e) {
|
|
18
|
+
return err(`Failed to connect: ${e.message}`);
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
// ── query ────────────────────────────────────────────────────
|
|
22
|
+
server.tool('geometra_query', `Find elements in the current Geometra UI by role, name, or text content. Returns matching elements with their exact pixel bounds {x, y, width, height}, role, name, and tree path.
|
|
23
|
+
|
|
24
|
+
This is the Geometra equivalent of Playwright's locator — but instant, structured, and with no browser. Use the returned bounds to click elements or assert on layout.`, {
|
|
25
|
+
role: z.string().optional().describe('ARIA role to match (e.g. "button", "textbox", "text", "heading", "listitem")'),
|
|
26
|
+
name: z.string().optional().describe('Accessible name to match (exact or substring)'),
|
|
27
|
+
text: z.string().optional().describe('Text content to search for (substring match)'),
|
|
28
|
+
}, async ({ role, name, text }) => {
|
|
29
|
+
const session = getSession();
|
|
30
|
+
if (!session?.tree || !session?.layout)
|
|
31
|
+
return err('Not connected. Call geometra_connect first.');
|
|
32
|
+
const a11y = buildA11yTree(session.tree, session.layout);
|
|
33
|
+
const matches = findNodes(a11y, { role, name, text });
|
|
34
|
+
if (matches.length === 0) {
|
|
35
|
+
return ok(`No elements found matching ${JSON.stringify({ role, name, text })}`);
|
|
36
|
+
}
|
|
37
|
+
const result = matches.map(formatNode);
|
|
38
|
+
return ok(JSON.stringify(result, null, 2));
|
|
39
|
+
});
|
|
40
|
+
// ── page model ────────────────────────────────────────────────
|
|
41
|
+
server.tool('geometra_page_model', `Get a higher-level webpage model instead of a raw node dump. Extracts common structures such as landmarks, forms, dialogs, and lists, with short previews of fields/actions/items.
|
|
42
|
+
|
|
43
|
+
Use this first on normal HTML pages when you want to understand the page shape with fewer tokens than a full snapshot.`, {
|
|
44
|
+
maxFieldsPerForm: z
|
|
45
|
+
.number()
|
|
46
|
+
.int()
|
|
47
|
+
.min(1)
|
|
48
|
+
.max(40)
|
|
49
|
+
.optional()
|
|
50
|
+
.default(12)
|
|
51
|
+
.describe('Cap returned fields per form (default 12).'),
|
|
52
|
+
maxActionsPerContainer: z
|
|
53
|
+
.number()
|
|
54
|
+
.int()
|
|
55
|
+
.min(1)
|
|
56
|
+
.max(20)
|
|
57
|
+
.optional()
|
|
58
|
+
.default(8)
|
|
59
|
+
.describe('Cap returned actions per form/dialog (default 8).'),
|
|
60
|
+
maxItemsPerList: z
|
|
61
|
+
.number()
|
|
62
|
+
.int()
|
|
63
|
+
.min(1)
|
|
64
|
+
.max(20)
|
|
65
|
+
.optional()
|
|
66
|
+
.default(5)
|
|
67
|
+
.describe('Cap list item preview strings (default 5).'),
|
|
68
|
+
}, async ({ maxFieldsPerForm, maxActionsPerContainer, maxItemsPerList }) => {
|
|
69
|
+
const session = getSession();
|
|
70
|
+
if (!session?.tree || !session?.layout)
|
|
71
|
+
return err('Not connected. Call geometra_connect first.');
|
|
72
|
+
const a11y = buildA11yTree(session.tree, session.layout);
|
|
73
|
+
const model = buildPageModel(a11y, { maxFieldsPerForm, maxActionsPerContainer, maxItemsPerList });
|
|
74
|
+
return ok(JSON.stringify(model));
|
|
75
|
+
});
|
|
76
|
+
// ── click ────────────────────────────────────────────────────
|
|
77
|
+
server.tool('geometra_click', `Click an element in the Geometra UI. Provide either the element's bounds (from geometra_query) or raw x,y coordinates. The click is dispatched server-side via the geometry protocol — no browser, no simulated DOM events.
|
|
78
|
+
|
|
79
|
+
After clicking, returns a compact semantic delta when possible (dialogs/forms/lists/nodes changed). If nothing meaningful changed, returns a short current-UI overview.`, {
|
|
80
|
+
x: z.number().describe('X coordinate to click (use center of element bounds from geometra_query)'),
|
|
81
|
+
y: z.number().describe('Y coordinate to click'),
|
|
82
|
+
}, async ({ x, y }) => {
|
|
83
|
+
const session = getSession();
|
|
84
|
+
if (!session)
|
|
85
|
+
return err('Not connected. Call geometra_connect first.');
|
|
86
|
+
const before = sessionA11y(session);
|
|
87
|
+
await sendClick(session, x, y);
|
|
88
|
+
const summary = postActionSummary(session, before);
|
|
89
|
+
return ok(`Clicked at (${x}, ${y}).\n${summary}`);
|
|
90
|
+
});
|
|
91
|
+
// ── type ─────────────────────────────────────────────────────
|
|
92
|
+
server.tool('geometra_type', `Type text into the currently focused element. First click a textbox/input with geometra_click to focus it, then use this to type.
|
|
93
|
+
|
|
94
|
+
Each character is sent as a key event through the geometry protocol. Returns a compact semantic delta when possible, otherwise a short current-UI overview.`, {
|
|
95
|
+
text: z.string().describe('Text to type into the focused element'),
|
|
96
|
+
}, async ({ text }) => {
|
|
97
|
+
const session = getSession();
|
|
98
|
+
if (!session)
|
|
99
|
+
return err('Not connected. Call geometra_connect first.');
|
|
100
|
+
const before = sessionA11y(session);
|
|
101
|
+
await sendType(session, text);
|
|
102
|
+
const summary = postActionSummary(session, before);
|
|
103
|
+
return ok(`Typed "${text}".\n${summary}`);
|
|
104
|
+
});
|
|
105
|
+
// ── key ──────────────────────────────────────────────────────
|
|
106
|
+
server.tool('geometra_key', `Send a special key press (Enter, Tab, Escape, ArrowDown, etc.) to the Geometra UI. Useful for form submission, focus navigation, and keyboard shortcuts.`, {
|
|
107
|
+
key: z.string().describe('Key to press (e.g. "Enter", "Tab", "Escape", "ArrowDown", "Backspace")'),
|
|
108
|
+
shift: z.boolean().optional().describe('Hold Shift'),
|
|
109
|
+
ctrl: z.boolean().optional().describe('Hold Ctrl'),
|
|
110
|
+
meta: z.boolean().optional().describe('Hold Meta/Cmd'),
|
|
111
|
+
alt: z.boolean().optional().describe('Hold Alt'),
|
|
112
|
+
}, async ({ key, shift, ctrl, meta, alt }) => {
|
|
113
|
+
const session = getSession();
|
|
114
|
+
if (!session)
|
|
115
|
+
return err('Not connected. Call geometra_connect first.');
|
|
116
|
+
const before = sessionA11y(session);
|
|
117
|
+
await sendKey(session, key, { shift, ctrl, meta, alt });
|
|
118
|
+
const summary = postActionSummary(session, before);
|
|
119
|
+
return ok(`Pressed ${formatKeyCombo(key, { shift, ctrl, meta, alt })}.\n${summary}`);
|
|
120
|
+
});
|
|
121
|
+
// ── upload files (proxy) ───────────────────────────────────────
|
|
122
|
+
server.tool('geometra_upload_files', `Attach local files to a file input. Requires \`@geometra/proxy\` (paths exist on the proxy host).
|
|
123
|
+
|
|
124
|
+
Strategies: **auto** (default) tries chooser click if x,y given, else hidden \`input[type=file]\`, else first visible file input. **hidden** targets hidden inputs directly. **drop** needs dropX,dropY for drag-target zones. **chooser** requires x,y.`, {
|
|
125
|
+
paths: z.array(z.string()).min(1).describe('Absolute paths on the proxy machine, e.g. /Users/you/resume.pdf'),
|
|
126
|
+
x: z.number().optional().describe('Click X to trigger native file chooser'),
|
|
127
|
+
y: z.number().optional().describe('Click Y to trigger native file chooser'),
|
|
128
|
+
strategy: z
|
|
129
|
+
.enum(['auto', 'chooser', 'hidden', 'drop'])
|
|
130
|
+
.optional()
|
|
131
|
+
.describe('Upload strategy (default auto)'),
|
|
132
|
+
dropX: z.number().optional().describe('Drop target X (viewport) for strategy drop'),
|
|
133
|
+
dropY: z.number().optional().describe('Drop target Y (viewport) for strategy drop'),
|
|
134
|
+
}, async ({ paths, x, y, strategy, dropX, dropY }) => {
|
|
135
|
+
const session = getSession();
|
|
136
|
+
if (!session)
|
|
137
|
+
return err('Not connected. Call geometra_connect first.');
|
|
138
|
+
const before = sessionA11y(session);
|
|
139
|
+
try {
|
|
140
|
+
await sendFileUpload(session, paths, {
|
|
141
|
+
click: x !== undefined && y !== undefined ? { x, y } : undefined,
|
|
142
|
+
strategy,
|
|
143
|
+
drop: dropX !== undefined && dropY !== undefined ? { x: dropX, y: dropY } : undefined,
|
|
144
|
+
});
|
|
145
|
+
const summary = postActionSummary(session, before);
|
|
146
|
+
return ok(`Uploaded ${paths.length} file(s).\n${summary}`);
|
|
147
|
+
}
|
|
148
|
+
catch (e) {
|
|
149
|
+
return err(e.message);
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
server.tool('geometra_pick_listbox_option', `Pick a visible \`role=option\` (Headless UI, React Select, Radix, etc.). Requires \`@geometra/proxy\`.
|
|
153
|
+
|
|
154
|
+
Optional openX,openY clicks the combobox first if the list is not open. Uses substring name match unless exact=true.`, {
|
|
155
|
+
label: z.string().describe('Accessible name of the option (visible text or aria-label)'),
|
|
156
|
+
exact: z.boolean().optional().describe('Exact name match'),
|
|
157
|
+
openX: z.number().optional().describe('Click to open dropdown'),
|
|
158
|
+
openY: z.number().optional().describe('Click to open dropdown'),
|
|
159
|
+
}, async ({ label, exact, openX, openY }) => {
|
|
160
|
+
const session = getSession();
|
|
161
|
+
if (!session)
|
|
162
|
+
return err('Not connected. Call geometra_connect first.');
|
|
163
|
+
const before = sessionA11y(session);
|
|
164
|
+
try {
|
|
165
|
+
await sendListboxPick(session, label, {
|
|
166
|
+
exact,
|
|
167
|
+
open: openX !== undefined && openY !== undefined ? { x: openX, y: openY } : undefined,
|
|
168
|
+
});
|
|
169
|
+
const summary = postActionSummary(session, before);
|
|
170
|
+
return ok(`Picked listbox option "${label}".\n${summary}`);
|
|
171
|
+
}
|
|
172
|
+
catch (e) {
|
|
173
|
+
return err(e.message);
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
// ── select option (proxy, native <select>) ─────────────────────
|
|
177
|
+
server.tool('geometra_select_option', `Set a native HTML \`<select>\` after clicking its center (x,y from geometra_query). Requires \`@geometra/proxy\`.
|
|
178
|
+
|
|
179
|
+
Custom React/Vue dropdowns are not supported — open them with geometra_click and pick options by snapshot instead.`, {
|
|
180
|
+
x: z.number().describe('X coordinate (e.g. center of the select from geometra_query)'),
|
|
181
|
+
y: z.number().describe('Y coordinate'),
|
|
182
|
+
value: z.string().optional().describe('Option value= attribute'),
|
|
183
|
+
label: z.string().optional().describe('Visible option label (substring match)'),
|
|
184
|
+
index: z.number().int().min(0).optional().describe('Zero-based option index'),
|
|
185
|
+
}, async ({ x, y, value, label, index }) => {
|
|
186
|
+
const session = getSession();
|
|
187
|
+
if (!session)
|
|
188
|
+
return err('Not connected. Call geometra_connect first.');
|
|
189
|
+
if (value === undefined && label === undefined && index === undefined) {
|
|
190
|
+
return err('Provide at least one of value, label, or index');
|
|
191
|
+
}
|
|
192
|
+
const before = sessionA11y(session);
|
|
193
|
+
try {
|
|
194
|
+
await sendSelectOption(session, x, y, { value, label, index });
|
|
195
|
+
const summary = postActionSummary(session, before);
|
|
196
|
+
return ok(`Selected option.\n${summary}`);
|
|
197
|
+
}
|
|
198
|
+
catch (e) {
|
|
199
|
+
return err(e.message);
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
// ── wheel / scroll (proxy) ─────────────────────────────────────
|
|
203
|
+
server.tool('geometra_wheel', `Scroll the page or an element under the pointer using the mouse wheel. Requires \`@geometra/proxy\` (e.g. virtualized lists, long application forms).`, {
|
|
204
|
+
deltaY: z.number().describe('Vertical scroll delta (positive scrolls down, typical step ~100)'),
|
|
205
|
+
deltaX: z.number().optional().describe('Horizontal scroll delta'),
|
|
206
|
+
x: z.number().optional().describe('Move pointer to X before scrolling'),
|
|
207
|
+
y: z.number().optional().describe('Move pointer to Y before scrolling'),
|
|
208
|
+
}, async ({ deltaY, deltaX, x, y }) => {
|
|
209
|
+
const session = getSession();
|
|
210
|
+
if (!session)
|
|
211
|
+
return err('Not connected. Call geometra_connect first.');
|
|
212
|
+
const before = sessionA11y(session);
|
|
213
|
+
try {
|
|
214
|
+
await sendWheel(session, deltaY, { deltaX, x, y });
|
|
215
|
+
const summary = postActionSummary(session, before);
|
|
216
|
+
return ok(`Wheel delta (${deltaX ?? 0}, ${deltaY}).\n${summary}`);
|
|
217
|
+
}
|
|
218
|
+
catch (e) {
|
|
219
|
+
return err(e.message);
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
// ── snapshot ─────────────────────────────────────────────────
|
|
223
|
+
server.tool('geometra_snapshot', `Get the current UI as JSON. Default **compact** view: flat list of viewport-visible actionable nodes (links, buttons, inputs, headings, landmarks, text leaves, focusable elements) with bounds and tree paths — far fewer tokens than a full nested tree. Use **full** for complete nested a11y + every wrapper when debugging layout.
|
|
224
|
+
|
|
225
|
+
JSON is minified in compact view to save tokens. For a webpage-shaped overview (forms, dialogs, lists, landmarks), use geometra_page_model.`, {
|
|
226
|
+
view: z
|
|
227
|
+
.enum(['compact', 'full'])
|
|
228
|
+
.optional()
|
|
229
|
+
.default('compact')
|
|
230
|
+
.describe('compact (default): token-efficient flat index. full: nested tree, every node.'),
|
|
231
|
+
maxNodes: z
|
|
232
|
+
.number()
|
|
233
|
+
.int()
|
|
234
|
+
.min(20)
|
|
235
|
+
.max(800)
|
|
236
|
+
.optional()
|
|
237
|
+
.default(400)
|
|
238
|
+
.describe('Max rows in compact view (default 400).'),
|
|
239
|
+
}, async ({ view, maxNodes }) => {
|
|
240
|
+
const session = getSession();
|
|
241
|
+
if (!session?.tree || !session?.layout)
|
|
242
|
+
return err('Not connected. Call geometra_connect first.');
|
|
243
|
+
const a11y = buildA11yTree(session.tree, session.layout);
|
|
244
|
+
if (view === 'full') {
|
|
245
|
+
return ok(JSON.stringify(a11y, null, 2));
|
|
246
|
+
}
|
|
247
|
+
const { nodes, truncated } = buildCompactUiIndex(a11y, { maxNodes });
|
|
248
|
+
const payload = {
|
|
249
|
+
view: 'compact',
|
|
250
|
+
viewport: { width: a11y.bounds.width, height: a11y.bounds.height },
|
|
251
|
+
nodes,
|
|
252
|
+
truncated,
|
|
253
|
+
};
|
|
254
|
+
return ok(JSON.stringify(payload));
|
|
255
|
+
});
|
|
256
|
+
// ── layout ───────────────────────────────────────────────────
|
|
257
|
+
server.tool('geometra_layout', `Get the raw computed layout geometry — the exact {x, y, width, height} for every node in the UI tree. This is the lowest-level view, useful for pixel-precise assertions in tests.
|
|
258
|
+
|
|
259
|
+
For a token-efficient semantic view, use geometra_snapshot (default compact). For the complete nested tree, geometra_snapshot with view=full.`, {}, async () => {
|
|
260
|
+
const session = getSession();
|
|
261
|
+
if (!session?.layout)
|
|
262
|
+
return err('Not connected. Call geometra_connect first.');
|
|
263
|
+
return ok(JSON.stringify(session.layout, null, 2));
|
|
264
|
+
});
|
|
265
|
+
// ── disconnect ───────────────────────────────────────────────
|
|
266
|
+
server.tool('geometra_disconnect', `Disconnect from the Geometra server and clean up the WebSocket connection.`, {}, async () => {
|
|
267
|
+
disconnect();
|
|
268
|
+
return ok('Disconnected.');
|
|
269
|
+
});
|
|
270
|
+
return server;
|
|
271
|
+
}
|
|
272
|
+
// ── Helpers ──────────────────────────────────────────────────────
|
|
273
|
+
function compactSessionSummary(session) {
|
|
274
|
+
const a11y = sessionA11y(session);
|
|
275
|
+
if (!a11y)
|
|
276
|
+
return 'No UI update received';
|
|
277
|
+
return sessionOverviewFromA11y(a11y);
|
|
278
|
+
}
|
|
279
|
+
function sessionA11y(session) {
|
|
280
|
+
if (!session.tree || !session.layout)
|
|
281
|
+
return null;
|
|
282
|
+
return buildA11yTree(session.tree, session.layout);
|
|
283
|
+
}
|
|
284
|
+
function sessionOverviewFromA11y(a11y) {
|
|
285
|
+
const pageSummary = summarizePageModel(buildPageModel(a11y), 8);
|
|
286
|
+
const { nodes } = buildCompactUiIndex(a11y, { maxNodes: 32 });
|
|
287
|
+
const keyNodes = nodes.length > 0 ? `Key nodes:\n${summarizeCompactIndex(nodes, 18)}` : '';
|
|
288
|
+
return [pageSummary, keyNodes].filter(Boolean).join('\n');
|
|
289
|
+
}
|
|
290
|
+
function postActionSummary(session, before) {
|
|
291
|
+
const after = sessionA11y(session);
|
|
292
|
+
if (!after)
|
|
293
|
+
return 'No UI update received';
|
|
294
|
+
if (before) {
|
|
295
|
+
const delta = buildUiDelta(before, after);
|
|
296
|
+
if (hasUiDelta(delta)) {
|
|
297
|
+
return `Changes:\n${summarizeUiDelta(delta)}`;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return `Current UI:\n${sessionOverviewFromA11y(after)}`;
|
|
301
|
+
}
|
|
302
|
+
function ok(text) {
|
|
303
|
+
return { content: [{ type: 'text', text }] };
|
|
304
|
+
}
|
|
305
|
+
function err(text) {
|
|
306
|
+
return { content: [{ type: 'text', text }], isError: true };
|
|
307
|
+
}
|
|
308
|
+
function findNodes(node, filter) {
|
|
309
|
+
const matches = [];
|
|
310
|
+
function walk(n) {
|
|
311
|
+
let match = true;
|
|
312
|
+
if (filter.role && n.role !== filter.role)
|
|
313
|
+
match = false;
|
|
314
|
+
if (filter.name && (!n.name || !n.name.includes(filter.name)))
|
|
315
|
+
match = false;
|
|
316
|
+
if (filter.text && (!n.name || !n.name.includes(filter.text)))
|
|
317
|
+
match = false;
|
|
318
|
+
if (match && (filter.role || filter.name || filter.text))
|
|
319
|
+
matches.push(n);
|
|
320
|
+
for (const child of n.children)
|
|
321
|
+
walk(child);
|
|
322
|
+
}
|
|
323
|
+
walk(node);
|
|
324
|
+
return matches;
|
|
325
|
+
}
|
|
326
|
+
function formatNode(node) {
|
|
327
|
+
return {
|
|
328
|
+
role: node.role,
|
|
329
|
+
name: node.name,
|
|
330
|
+
bounds: node.bounds,
|
|
331
|
+
center: {
|
|
332
|
+
x: Math.round(node.bounds.x + node.bounds.width / 2),
|
|
333
|
+
y: Math.round(node.bounds.y + node.bounds.height / 2),
|
|
334
|
+
},
|
|
335
|
+
focusable: node.focusable,
|
|
336
|
+
...(node.state && Object.keys(node.state).length > 0 ? { state: node.state } : {}),
|
|
337
|
+
path: node.path,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
function formatKeyCombo(key, mods) {
|
|
341
|
+
const parts = [];
|
|
342
|
+
if (mods?.ctrl)
|
|
343
|
+
parts.push('Ctrl');
|
|
344
|
+
if (mods?.meta)
|
|
345
|
+
parts.push('Cmd');
|
|
346
|
+
if (mods?.alt)
|
|
347
|
+
parts.push('Alt');
|
|
348
|
+
if (mods?.shift)
|
|
349
|
+
parts.push('Shift');
|
|
350
|
+
parts.push(key);
|
|
351
|
+
return parts.join('+');
|
|
352
|
+
}
|