@yuzc-001/grasp 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +327 -0
  3. package/README.zh-CN.md +324 -0
  4. package/examples/README.md +31 -0
  5. package/examples/claude-desktop.json +8 -0
  6. package/examples/codex-config.toml +4 -0
  7. package/grasp.skill +0 -0
  8. package/index.js +87 -0
  9. package/package.json +48 -0
  10. package/scripts/grasp_openclaw_ctl.sh +122 -0
  11. package/scripts/run-search-benchmark.mjs +287 -0
  12. package/scripts/update-star-history.mjs +274 -0
  13. package/skill/SKILL.md +61 -0
  14. package/skill/references/tools.md +306 -0
  15. package/src/cli/auto-configure.js +116 -0
  16. package/src/cli/cmd-connect.js +148 -0
  17. package/src/cli/cmd-explain.js +42 -0
  18. package/src/cli/cmd-logs.js +55 -0
  19. package/src/cli/cmd-status.js +119 -0
  20. package/src/cli/config.js +27 -0
  21. package/src/cli/detect-chrome.js +58 -0
  22. package/src/grasp/handoff/events.js +67 -0
  23. package/src/grasp/handoff/persist.js +48 -0
  24. package/src/grasp/handoff/state.js +28 -0
  25. package/src/grasp/page/capture.js +34 -0
  26. package/src/grasp/page/state.js +273 -0
  27. package/src/grasp/verify/evidence.js +40 -0
  28. package/src/grasp/verify/pipeline.js +52 -0
  29. package/src/layer1-bridge/chrome.js +416 -0
  30. package/src/layer1-bridge/webmcp.js +143 -0
  31. package/src/layer2-perception/hints.js +284 -0
  32. package/src/layer3-action/actions.js +400 -0
  33. package/src/runtime/browser-instance.js +65 -0
  34. package/src/runtime/truth/model.js +94 -0
  35. package/src/runtime/truth/snapshot.js +51 -0
  36. package/src/server/affordances.js +47 -0
  37. package/src/server/audit.js +122 -0
  38. package/src/server/boss-fast-path.js +164 -0
  39. package/src/server/boundary-guard.js +53 -0
  40. package/src/server/content.js +97 -0
  41. package/src/server/continuity.js +256 -0
  42. package/src/server/engine-selection.js +29 -0
  43. package/src/server/entry-orchestrator.js +115 -0
  44. package/src/server/error-codes.js +7 -0
  45. package/src/server/explain-share-card.js +113 -0
  46. package/src/server/fast-path-router.js +134 -0
  47. package/src/server/form-runtime.js +602 -0
  48. package/src/server/form-tasks.js +254 -0
  49. package/src/server/gateway-response.js +62 -0
  50. package/src/server/index.js +22 -0
  51. package/src/server/observe.js +52 -0
  52. package/src/server/page-projection.js +31 -0
  53. package/src/server/page-state.js +27 -0
  54. package/src/server/postconditions.js +128 -0
  55. package/src/server/prompt-assembly.js +148 -0
  56. package/src/server/responses.js +44 -0
  57. package/src/server/route-boundary.js +174 -0
  58. package/src/server/route-policy.js +168 -0
  59. package/src/server/runtime-confirmation.js +87 -0
  60. package/src/server/runtime-status.js +7 -0
  61. package/src/server/share-artifacts.js +284 -0
  62. package/src/server/state.js +132 -0
  63. package/src/server/structured-extraction.js +131 -0
  64. package/src/server/surface-prompts.js +166 -0
  65. package/src/server/task-frame.js +11 -0
  66. package/src/server/tasks/search-task.js +321 -0
  67. package/src/server/tools.actions.js +1361 -0
  68. package/src/server/tools.form.js +526 -0
  69. package/src/server/tools.gateway.js +757 -0
  70. package/src/server/tools.handoff.js +210 -0
  71. package/src/server/tools.js +20 -0
  72. package/src/server/tools.legacy.js +983 -0
  73. package/src/server/tools.strategy.js +250 -0
  74. package/src/server/tools.task-surface.js +66 -0
  75. package/src/server/tools.workspace.js +873 -0
  76. package/src/server/workspace-runtime.js +1138 -0
  77. package/src/server/workspace-tasks.js +735 -0
  78. package/start-chrome.bat +84 -0
@@ -0,0 +1,983 @@
1
+ import { z } from 'zod';
2
+
3
+ import { getActivePage, navigateTo, getTabs, switchTab, newTab, closeTab } from '../layer1-bridge/chrome.js';
4
+ import { callTool } from '../layer1-bridge/webmcp.js';
5
+ import { buildHintMap, rebindHintCandidate } from '../layer2-perception/hints.js';
6
+ import { clickByHintId, typeByHintId, scroll, watchElement, pressKey, hoverByHintId } from '../layer3-action/actions.js';
7
+ import { errorResponse, imageResponse, textResponse } from './responses.js';
8
+ import { describeMode, syncPageState } from './state.js';
9
+ import { rankAffordances } from './affordances.js';
10
+ import { extractMainContent, waitUntilStable } from './content.js';
11
+ import { audit, readLogs } from './audit.js';
12
+ import { verifyTypeResult, verifyGenericAction } from './postconditions.js';
13
+ import { normalizeFormField, summarizeFormFields, buildFormVerification } from './form-tasks.js';
14
+ import { runSearchTaskTool } from './tasks/search-task.js';
15
+ import { TYPE_FAILED } from './error-codes.js';
16
+ import { runVerifiedAction } from '../grasp/verify/pipeline.js';
17
+ import {
18
+ requestHandoff,
19
+ markHandoffInProgress,
20
+ markAwaitingReacquisition,
21
+ markResumedUnverified,
22
+ markResumeVerified,
23
+ clearHandoff,
24
+ } from '../grasp/handoff/events.js';
25
+ import { capturePageEvidence } from '../grasp/verify/evidence.js';
26
+ import { readHandoffState, writeHandoffState } from '../grasp/handoff/persist.js';
27
+
28
+ const HIGH_RISK_KEYWORDS = [
29
+ '发送', '提交', '删除', '支付', '确认', '清空', '注销', '退出', '解绑', '重置',
30
+ 'send', 'submit', 'delete', 'pay', 'confirm', 'clear', 'logout', 'unsubscribe', 'reset', 'remove',
31
+ ];
32
+
33
+ async function getActiveHintId(page) {
34
+ return page.evaluate(() => document.activeElement?.getAttribute('data-grasp-id') ?? null);
35
+ }
36
+
37
+ function createRebuildHints(page, state) {
38
+ return async (hintId) => {
39
+ const previousHint = state.hintMap.find((hint) => hint.id === hintId);
40
+ await syncPageState(page, state, { force: true });
41
+ if (!previousHint) return null;
42
+ return rebindHintCandidate(previousHint, state.hintMap);
43
+ };
44
+ }
45
+
46
+ function buildStructuredError(message, normalizedHintId, verdict) {
47
+ const meta = {
48
+ error_code: verdict?.error_code ?? TYPE_FAILED,
49
+ retryable: verdict?.retryable ?? true,
50
+ suggested_next_step: verdict?.suggested_next_step ?? 'retry',
51
+ evidence: verdict?.evidence ?? { hint_id: normalizedHintId },
52
+ };
53
+ return errorResponse(message, meta);
54
+ }
55
+
56
+ export function registerTools(server, state) {
57
+ server.registerTool(
58
+ 'navigate',
59
+ {
60
+ description: 'Navigate the browser to a URL. Auto-detects WebMCP support on arrival.',
61
+ inputSchema: { url: z.string().url().describe('Full URL to navigate to') },
62
+ },
63
+ async ({ url }) => {
64
+ try {
65
+ const page = await navigateTo(url);
66
+ audit('navigate', url);
67
+ await syncPageState(page, state, { force: true });
68
+ const title = await page.title();
69
+
70
+ if (state.webmcp?.available) {
71
+ return textResponse([
72
+ `Navigated to: ${url}`,
73
+ `Page title: ${title}`,
74
+ `WebMCP detected - ${state.webmcp.tools.length} native tool(s): ${state.webmcp.tools.map((tool) => tool.name).join(', ')}`,
75
+ 'Use call_webmcp_tool to invoke them directly.',
76
+ ]);
77
+ }
78
+
79
+ return textResponse([
80
+ `Navigated to: ${url}`,
81
+ `Page title: ${title}`,
82
+ `CDP mode - ${state.hintMap.length} interactive elements found.`,
83
+ 'Use get_hint_map to see the full element map.',
84
+ ]);
85
+ } catch (err) {
86
+ return errorResponse(`Navigation failed: ${err.message}`);
87
+ }
88
+ }
89
+ );
90
+
91
+ server.registerTool(
92
+ 'get_status',
93
+ {
94
+ description: 'Get current Grasp engine status: Chrome connection, current page, execution mode.',
95
+ inputSchema: {},
96
+ },
97
+ async () => {
98
+ try {
99
+ const page = await getActivePage();
100
+ await syncPageState(page, state);
101
+ const title = await page.title();
102
+ const { mode, detail } = describeMode(state);
103
+
104
+ state.handoff = await readHandoffState();
105
+ const handoff = state.handoff ?? { state: 'idle' };
106
+ const pageState = state.pageState ?? {};
107
+ return textResponse([
108
+ 'Grasp is connected',
109
+ '',
110
+ `Page: ${title}`,
111
+ `URL: ${page.url()}`,
112
+ `Mode: ${mode}`,
113
+ ` ${detail}`,
114
+ `Hint Map: ${state.hintMap.length} elements cached`,
115
+ `Page role: ${pageState.currentRole ?? 'unknown'}`,
116
+ `Grasp confidence: ${pageState.graspConfidence ?? 'unknown'}`,
117
+ `Reacquired: ${pageState.reacquired ? 'yes' : 'no'}`,
118
+ `Handoff: ${handoff.state}`,
119
+ ...(handoff.reason ? [` reason: ${handoff.reason}`] : []),
120
+ ]);
121
+ } catch (err) {
122
+ return errorResponse(`Grasp is NOT connected.\n${err.message}`);
123
+ }
124
+ }
125
+ );
126
+
127
+ server.registerTool(
128
+ 'request_handoff',
129
+ {
130
+ description: 'Mark that the current task/page requires a human step before the agent can continue.',
131
+ inputSchema: {
132
+ reason: z.string().describe('Why human help is required, e.g. login_required, captcha_required'),
133
+ note: z.string().optional().describe('Optional note for the human/operator'),
134
+ },
135
+ },
136
+ async ({ reason, note }) => {
137
+ state.handoff = requestHandoff(await readHandoffState(), reason, note ?? null);
138
+ await writeHandoffState(state.handoff);
139
+ await audit('handoff_request', `${reason}${note ? ` :: ${note}` : ''}`);
140
+ return textResponse([
141
+ `Handoff requested: ${reason}`,
142
+ ...(note ? [`Note: ${note}`] : []),
143
+ 'State: handoff_required',
144
+ ], { handoff: state.handoff });
145
+ }
146
+ );
147
+
148
+ server.registerTool(
149
+ 'mark_handoff_in_progress',
150
+ {
151
+ description: 'Mark that a human is currently performing the required browser step.',
152
+ inputSchema: {
153
+ note: z.string().optional().describe('Optional note about the in-progress human step'),
154
+ },
155
+ },
156
+ async ({ note } = {}) => {
157
+ state.handoff = markHandoffInProgress(state.handoff, note ?? null);
158
+ await audit('handoff_progress', note ?? 'in progress');
159
+ return textResponse([
160
+ 'Handoff is now in progress.',
161
+ ...(note ? [`Note: ${note}`] : []),
162
+ 'State: handoff_in_progress',
163
+ ], { handoff: state.handoff });
164
+ }
165
+ );
166
+
167
+ server.registerTool(
168
+ 'mark_handoff_done',
169
+ {
170
+ description: 'Mark that the human step is done and Grasp should now reacquire the page state.',
171
+ inputSchema: {
172
+ note: z.string().optional().describe('Optional note left by the human/operator'),
173
+ },
174
+ },
175
+ async ({ note } = {}) => {
176
+ state.handoff = markAwaitingReacquisition(await readHandoffState(), note ?? null);
177
+ await writeHandoffState(state.handoff);
178
+ await audit('handoff_done', note ?? 'awaiting reacquisition');
179
+ return textResponse([
180
+ 'Human step marked done.',
181
+ ...(note ? [`Note: ${note}`] : []),
182
+ 'State: awaiting_reacquisition',
183
+ 'Next: call resume_after_handoff to reacquire page state.',
184
+ ], { handoff: state.handoff });
185
+ }
186
+ );
187
+
188
+ server.registerTool(
189
+ 'resume_after_handoff',
190
+ {
191
+ description: 'Reacquire page state after a human step, then mark the handoff as resumed (verified or unverified).',
192
+ inputSchema: {
193
+ verify: z.boolean().optional().describe('Whether to require visible reacquisition evidence before marking verified (default: true)'),
194
+ note: z.string().optional().describe('Optional note about the resumed state'),
195
+ },
196
+ },
197
+ async ({ verify = true, note } = {}) => {
198
+ const page = await getActivePage();
199
+ await syncPageState(page, state, { force: true });
200
+ const evidence = await capturePageEvidence(page, state, {
201
+ action: 'resume_after_handoff',
202
+ details: {
203
+ pageIdentity: state.pageState?.pageIdentity ?? null,
204
+ },
205
+ });
206
+
207
+ if (verify && state.pageState?.reacquired) {
208
+ state.handoff = markResumeVerified(state.handoff, evidence, note ?? null);
209
+ } else {
210
+ state.handoff = markResumedUnverified(state.handoff, evidence, note ?? null);
211
+ }
212
+
213
+ await audit('handoff_resume', `${state.handoff.state}${note ? ` :: ${note}` : ''}`);
214
+ return textResponse([
215
+ `Resume state: ${state.handoff.state}`,
216
+ `Page role: ${state.pageState?.currentRole ?? 'unknown'}`,
217
+ `Grasp confidence: ${state.pageState?.graspConfidence ?? 'unknown'}`,
218
+ `Reacquired: ${state.pageState?.reacquired ? 'yes' : 'no'}`,
219
+ ], { handoff: state.handoff, evidence });
220
+ }
221
+ );
222
+
223
+ server.registerTool(
224
+ 'clear_handoff',
225
+ {
226
+ description: 'Clear the current handoff state and return to idle.',
227
+ inputSchema: {},
228
+ },
229
+ async () => {
230
+ state.handoff = clearHandoff(await readHandoffState());
231
+ await writeHandoffState(state.handoff);
232
+ await audit('handoff_clear', 'idle');
233
+ return textResponse('Handoff cleared. State: idle', { handoff: state.handoff });
234
+ }
235
+ );
236
+
237
+ server.registerTool(
238
+ 'get_page_summary',
239
+ {
240
+ description: 'Get a summary of the current page: title, URL, mode, and visible text content.',
241
+ inputSchema: {},
242
+ },
243
+ async () => {
244
+ const page = await getActivePage();
245
+ await syncPageState(page, state);
246
+
247
+ const text = await page.evaluate(() =>
248
+ document.body?.innerText?.replace(/\s+/g, ' ').trim().slice(0, 2000) ?? ''
249
+ );
250
+ const { summary } = describeMode(state);
251
+
252
+ return textResponse([
253
+ `Title: ${await page.title()}`,
254
+ `URL: ${page.url()}`,
255
+ `Mode: ${summary}`,
256
+ '',
257
+ 'Visible content (truncated):',
258
+ text,
259
+ ]);
260
+ }
261
+ );
262
+
263
+ server.registerTool(
264
+ 'wait_until_stable',
265
+ {
266
+ description: 'Wait until the page stops changing before reading content.',
267
+ inputSchema: {
268
+ checks: z.number().int().min(1).optional().describe('Number of consecutive stable snapshots required'),
269
+ interval: z.number().min(0).optional().describe('Polling interval in milliseconds'),
270
+ timeout: z.number().min(0).optional().describe('Maximum wait time in milliseconds'),
271
+ },
272
+ },
273
+ async ({ checks, interval, timeout } = {}) => {
274
+ const page = await getActivePage();
275
+ const result = await waitUntilStable(page, {
276
+ stableChecks: checks,
277
+ interval,
278
+ timeout,
279
+ });
280
+ return textResponse(
281
+ [
282
+ result.stable ? 'Page stabilized.' : 'Page did not stabilize in time.',
283
+ `Captured ${result.attempts} snapshots.`,
284
+ ],
285
+ {
286
+ stable: result.stable,
287
+ attempts: result.attempts,
288
+ snapshot: result.snapshot,
289
+ }
290
+ );
291
+ }
292
+ );
293
+
294
+ server.registerTool(
295
+ 'extract_main_content',
296
+ {
297
+ description: 'Extract the main textual content for the current page.',
298
+ inputSchema: {},
299
+ },
300
+ async () => {
301
+ const page = await getActivePage();
302
+ await syncPageState(page, state, { force: true });
303
+ const content = await extractMainContent(page);
304
+ return textResponse(
305
+ content.text,
306
+ {
307
+ title: content.title,
308
+ }
309
+ );
310
+ }
311
+ );
312
+
313
+ server.registerTool(
314
+ 'get_hint_map',
315
+ {
316
+ description: "Get the Hint Map of interactive elements. Each element gets a short ID like [B1], [I2], [L3]. Use these IDs with click and type.",
317
+ inputSchema: {
318
+ filter: z.string().optional().describe('Optional keyword to filter elements by label (case-insensitive). E.g. "发送" returns only elements whose label contains "发送".'),
319
+ },
320
+ },
321
+ async ({ filter } = {}) => {
322
+ const page = await getActivePage();
323
+ await syncPageState(page, state);
324
+ const hints = await buildHintMap(page, state.hintRegistry, state.hintCounters);
325
+ state.hintMap = hints;
326
+
327
+ if (hints.length === 0) {
328
+ return textResponse('No interactive elements found in the current viewport.');
329
+ }
330
+
331
+ const keyword = filter?.trim().toLowerCase();
332
+ const filtered = keyword
333
+ ? hints.filter((h) => h.label.toLowerCase().includes(keyword))
334
+ : hints;
335
+
336
+ if (filtered.length === 0) {
337
+ return textResponse(`No elements matching "${filter}". Total elements: ${hints.length}. Call get_hint_map without filter to see all.`);
338
+ }
339
+
340
+ const lines = filtered.map((hint) => `[${hint.id}] ${hint.label} (${hint.type}, pos:${hint.x},${hint.y})`);
341
+ const header = keyword
342
+ ? `Found ${filtered.length} elements matching "${filter}" (${hints.length} total):`
343
+ : `Found ${hints.length} interactive elements:`;
344
+
345
+ const hintChars = lines.join('\n').length;
346
+ const rawSize = await page.evaluate(() => document.documentElement.outerHTML.length);
347
+ let efficiency = '';
348
+ if (rawSize > 0 && hintChars < rawSize) {
349
+ const savedPct = Math.round((1 - hintChars / rawSize) * 100);
350
+ efficiency = `\n\nToken efficiency: ~${savedPct}% saved vs raw HTML`
351
+ + ` (hint map: ${(hintChars / 1000).toFixed(1)}K chars,`
352
+ + ` raw DOM: ${(rawSize / 1000).toFixed(1)}K chars)`;
353
+ }
354
+ return textResponse(`${header}\n\n${lines.join('\n')}${efficiency}`);
355
+ }
356
+ );
357
+
358
+ server.registerTool(
359
+ 'search_affordances',
360
+ {
361
+ description: 'Rank search-friendly hints (inputs/buttons) from the current hint map.',
362
+ inputSchema: {},
363
+ },
364
+ async () => {
365
+ const page = await getActivePage();
366
+ await syncPageState(page, state, { force: true });
367
+ const ranking = rankAffordances({ hints: state.hintMap });
368
+ return textResponse(
369
+ `Search affordance candidates: ${ranking.search_input.length}`,
370
+ {
371
+ search_input: ranking.search_input,
372
+ command_button: ranking.command_button,
373
+ }
374
+ );
375
+ }
376
+ );
377
+
378
+ server.registerTool(
379
+ 'search_task',
380
+ {
381
+ description: 'Run a verified search task with bounded recovery.',
382
+ inputSchema: {
383
+ query: z.string().describe('Query text to run through search workflow'),
384
+ max_attempts: z.number().int().min(1).optional().describe('Maximum attempts before giving up'),
385
+ },
386
+ },
387
+ async ({ query, max_attempts = 3 }) => {
388
+ return runSearchTaskTool({ state, query, max_attempts });
389
+ }
390
+ );
391
+
392
+ server.registerTool(
393
+ 'click',
394
+ {
395
+ description: "Click an element by its Hint Map ID (e.g. 'B1'). Call get_hint_map first if you don't have IDs.",
396
+ inputSchema: { hint_id: z.string().describe('Hint Map ID like B1, I2, L3') },
397
+ },
398
+ async ({ hint_id }) => {
399
+ const page = await getActivePage();
400
+ const normalizedHintId = hint_id.toUpperCase();
401
+ const prevDomRevision = state.pageState?.domRevision ?? 0;
402
+ const prevUrl = state.lastUrl;
403
+ const prevActiveId = await getActiveHintId(page);
404
+ const rebuildHints = createRebuildHints(page, state);
405
+
406
+ try {
407
+ if (state.safeMode) {
408
+ const label = await page.evaluate((id) => {
409
+ const el = document.querySelector(`[data-grasp-id="${id}"]`);
410
+ if (!el) return '';
411
+ return el.getAttribute('aria-label') || el.innerText?.trim() || '';
412
+ }, normalizedHintId);
413
+ if (HIGH_RISK_KEYWORDS.some(k => label.toLowerCase().includes(k.toLowerCase()))) {
414
+ return textResponse([
415
+ `High-risk action detected: [${normalizedHintId}] "${label}"`,
416
+ 'To proceed, call confirm_click with the same hint_id.',
417
+ 'To disable safe mode globally, set GRASP_SAFE_MODE=false in environment.',
418
+ ]);
419
+ }
420
+ }
421
+
422
+ return runVerifiedAction({
423
+ action: 'click',
424
+ page,
425
+ state,
426
+ baseEvidence: { hint_id: normalizedHintId },
427
+ execute: async () => {
428
+ const result = await clickByHintId(page, normalizedHintId, { rebuildHints });
429
+ await syncPageState(page, state, { force: true });
430
+ return result;
431
+ },
432
+ verify: async () => verifyGenericAction({
433
+ page,
434
+ hintId: normalizedHintId,
435
+ prevDomRevision,
436
+ prevUrl,
437
+ prevActiveId,
438
+ newDomRevision: state.pageState.domRevision,
439
+ }),
440
+ onFailure: async (failure) => {
441
+ await audit('click_failed', `[${normalizedHintId}] ${failure.error_code}`);
442
+ return buildStructuredError(
443
+ `Click verification failed for [${normalizedHintId}]`,
444
+ normalizedHintId,
445
+ failure
446
+ );
447
+ },
448
+ onSuccess: async ({ executionResult, evidence }) => {
449
+ audit('click', `[${normalizedHintId}] "${executionResult.label}"`);
450
+ const urlAfter = page.url();
451
+ const nav = urlAfter !== prevUrl ? `\nNavigated to: ${urlAfter}` : '';
452
+ return textResponse(
453
+ `Clicked [${normalizedHintId}]: "${executionResult.label}"${nav}\nPage now has ${state.hintMap.length} elements. Call get_hint_map to see updated state.`,
454
+ { evidence }
455
+ );
456
+ },
457
+ });
458
+ } catch (err) {
459
+ await audit('click_failed', `[${normalizedHintId}] ${err.message}`);
460
+ return buildStructuredError(`Click failed: ${err.message}`, normalizedHintId);
461
+ }
462
+ }
463
+ );
464
+
465
+ server.registerTool(
466
+ 'confirm_click',
467
+ {
468
+ description: "Force-click a high-risk element, bypassing safe mode. Use only after explicitly confirming the action is intended.",
469
+ inputSchema: { hint_id: z.string().describe('Hint Map ID to force-click, e.g. B1') },
470
+ },
471
+ async ({ hint_id }) => {
472
+ const page = await getActivePage();
473
+ const normalizedHintId = hint_id.toUpperCase();
474
+ const rebuildHints = createRebuildHints(page, state);
475
+ const prevDomRevision = state.pageState?.domRevision ?? 0;
476
+ const prevUrl = state.lastUrl;
477
+ const prevActiveId = await getActiveHintId(page);
478
+
479
+ try {
480
+ const result = await clickByHintId(page, normalizedHintId, { rebuildHints });
481
+ await syncPageState(page, state, { force: true });
482
+ const verification = await verifyGenericAction({
483
+ page,
484
+ hintId: normalizedHintId,
485
+ prevDomRevision,
486
+ prevUrl,
487
+ prevActiveId,
488
+ newDomRevision: state.pageState.domRevision,
489
+ });
490
+
491
+ if (!verification.ok) {
492
+ await audit('confirm_click_failed', `[${normalizedHintId}] ${verification.error_code}`);
493
+ return buildStructuredError(
494
+ `Confirm click verification failed for [${normalizedHintId}]`,
495
+ normalizedHintId,
496
+ verification
497
+ );
498
+ }
499
+
500
+ await audit('confirm_click', `[${normalizedHintId}] "${result.label}"`);
501
+ const urlAfter = page.url();
502
+ const nav = urlAfter !== prevUrl ? `\nNavigated to: ${urlAfter}` : '';
503
+ return textResponse(
504
+ `Force-clicked [${normalizedHintId}]: "${result.label}"${nav}\nPage now has ${state.hintMap.length} elements.`
505
+ );
506
+ } catch (err) {
507
+ await audit('confirm_click_failed', `[${normalizedHintId}] ${err.message}`);
508
+ return buildStructuredError(`confirm_click failed: ${err.message}`, normalizedHintId);
509
+ }
510
+ }
511
+ );
512
+
513
+ server.registerTool(
514
+ 'type',
515
+ {
516
+ description: 'Type text into an input field by its Hint Map ID. Clears existing content first.',
517
+ inputSchema: {
518
+ hint_id: z.string().describe('Hint Map ID of the input field, e.g. I1'),
519
+ text: z.string().describe('Text to type'),
520
+ press_enter: z.boolean().optional().describe('Press Enter after typing (default: false)'),
521
+ },
522
+ },
523
+ async ({ hint_id, text, press_enter = false }) => {
524
+ const page = await getActivePage();
525
+ const normalizedHintId = hint_id.toUpperCase();
526
+ const prevDomRevision = state.pageState?.domRevision ?? 0;
527
+ const prevUrl = page.url();
528
+ const rebuildHints = createRebuildHints(page, state);
529
+
530
+ try {
531
+ await typeByHintId(page, normalizedHintId, text, press_enter, { rebuildHints });
532
+ await syncPageState(page, state, { force: true });
533
+ const newDomRevision = state.pageState?.domRevision ?? prevDomRevision;
534
+ const verdict = await verifyTypeResult({
535
+ page,
536
+ expectedText: text,
537
+ allowPageChange: press_enter,
538
+ prevUrl,
539
+ prevDomRevision,
540
+ newDomRevision,
541
+ });
542
+ if (!verdict.ok) {
543
+ await audit('type_failed', `[${normalizedHintId}] ${verdict.error_code}`);
544
+ await syncPageState(page, state, { force: true });
545
+ return buildStructuredError(
546
+ `Type verification failed for [${normalizedHintId}]`,
547
+ normalizedHintId,
548
+ verdict
549
+ );
550
+ }
551
+
552
+ await audit('type', `[${normalizedHintId}] "${text.slice(0, 20)}${text.length > 20 ? '...' : ''}"`);
553
+ await syncPageState(page, state, { force: true });
554
+
555
+ return textResponse(
556
+ `Typed "${text}" into [${normalizedHintId}]${press_enter ? ' and pressed Enter' : ''}.`
557
+ );
558
+ } catch (err) {
559
+ await audit('type_failed', `[${normalizedHintId}] ${err.message}`);
560
+ await syncPageState(page, state, { force: true });
561
+ return buildStructuredError(
562
+ `Type failed: ${err.message}`,
563
+ normalizedHintId,
564
+ {
565
+ error_code: TYPE_FAILED,
566
+ retryable: true,
567
+ suggested_next_step: 'retry',
568
+ evidence: { hint_id: normalizedHintId, reason: err.message },
569
+ }
570
+ );
571
+ }
572
+ }
573
+ );
574
+
575
+ server.registerTool(
576
+ 'screenshot',
577
+ {
578
+ description: 'Take a screenshot of the current browser viewport.',
579
+ inputSchema: {},
580
+ },
581
+ async () => {
582
+ const page = await getActivePage();
583
+ // Wait for body to have actual content before screenshotting (prevents thin-line bug)
584
+ await page.waitForFunction(
585
+ () => document.body && document.body.getBoundingClientRect().height > 100,
586
+ { timeout: 3000 }
587
+ ).catch(() => {});
588
+ const base64 = await page.screenshot({ encoding: 'base64', fullPage: false });
589
+ return imageResponse(base64);
590
+ }
591
+ );
592
+
593
+ server.registerTool(
594
+ 'scroll',
595
+ {
596
+ description: 'Scroll the page up or down to reveal more content.',
597
+ inputSchema: {
598
+ direction: z.enum(['up', 'down']).describe('Scroll direction'),
599
+ amount: z.number().optional().describe('Pixels to scroll (default: 600)'),
600
+ },
601
+ },
602
+ async ({ direction, amount = 600 }) => {
603
+ const page = await getActivePage();
604
+ await scroll(page, direction, amount);
605
+ audit('scroll', `${direction} ${amount}px`);
606
+ await syncPageState(page, state, { force: true });
607
+
608
+ return textResponse(`Scrolled ${direction} by ${amount}px. ${state.hintMap.length} elements now visible.`);
609
+ }
610
+ );
611
+
612
+ server.registerTool(
613
+ 'watch_element',
614
+ {
615
+ description: 'Watch a CSS selector for DOM changes. Waits up to 30 seconds.',
616
+ inputSchema: {
617
+ selector: z.string().describe('CSS selector to watch'),
618
+ condition: z.enum(['appears', 'disappears', 'changes']).describe('Condition to wait for'),
619
+ },
620
+ },
621
+ async ({ selector, condition }) => {
622
+ const page = await getActivePage();
623
+ const result = await watchElement(page, selector, condition);
624
+
625
+ if (result.timeout) {
626
+ return textResponse(`watch_element timed out after 30s waiting for "${selector}" to ${condition}.`);
627
+ }
628
+
629
+ return textResponse(
630
+ `Condition met: "${selector}" ${condition}.${result.text ? `\nContent: "${result.text}"` : ''}`
631
+ );
632
+ }
633
+ );
634
+
635
+ server.registerTool(
636
+ 'call_webmcp_tool',
637
+ {
638
+ description: 'Call a native WebMCP tool exposed by the current page. Only available in WebMCP mode.',
639
+ inputSchema: {
640
+ tool_name: z.string().describe('Name of the WebMCP tool to call'),
641
+ args: z.record(z.any()).optional().describe('Arguments to pass to the tool'),
642
+ },
643
+ },
644
+ async ({ tool_name, args = {} }) => {
645
+ const page = await getActivePage();
646
+ await syncPageState(page, state);
647
+
648
+ if (!state.webmcp?.available) {
649
+ return errorResponse('WebMCP not available. Use CDP tools instead (get_hint_map -> click/type).');
650
+ }
651
+
652
+ try {
653
+ const result = await callTool(page, state.webmcp, tool_name, args);
654
+
655
+ return textResponse([
656
+ `WebMCP tool "${tool_name}" result:`,
657
+ '',
658
+ typeof result === 'string' ? result : JSON.stringify(result, null, 2),
659
+ ]);
660
+ } catch (err) {
661
+ await syncPageState(page, state, { force: true });
662
+ return errorResponse(
663
+ `WebMCP call failed: ${err.message}\nWebMCP status after re-probe: ${state.webmcp?.available ? 'still available' : 'unavailable - use CDP tools instead'}`
664
+ );
665
+ }
666
+ }
667
+ );
668
+
669
+ server.registerTool(
670
+ 'get_tabs',
671
+ {
672
+ description: 'List all open browser tabs with their index, title, and URL.',
673
+ inputSchema: {},
674
+ },
675
+ async () => {
676
+ try {
677
+ const tabs = await getTabs();
678
+ const lines = tabs.map((t) => `[${t.index}] ${t.title || '(no title)'} ${t.url}`);
679
+ return textResponse(`${tabs.length} open tabs:\n\n${lines.join('\n')}`);
680
+ } catch (err) {
681
+ return errorResponse(`get_tabs failed: ${err.message}`);
682
+ }
683
+ }
684
+ );
685
+
686
+ server.registerTool(
687
+ 'switch_tab',
688
+ {
689
+ description: 'Switch to a tab by its index (from get_tabs).',
690
+ inputSchema: { index: z.number().int().describe('Tab index from get_tabs') },
691
+ },
692
+ async ({ index }) => {
693
+ try {
694
+ const page = await switchTab(index);
695
+ await syncPageState(page, state, { force: true });
696
+ return textResponse(`Switched to tab [${index}]: ${page.url()}`);
697
+ } catch (err) {
698
+ return errorResponse(`switch_tab failed: ${err.message}`);
699
+ }
700
+ }
701
+ );
702
+
703
+ server.registerTool(
704
+ 'new_tab',
705
+ {
706
+ description: 'Open a URL in a new browser tab and switch to it.',
707
+ inputSchema: { url: z.string().url().describe('URL to open in new tab') },
708
+ },
709
+ async ({ url }) => {
710
+ try {
711
+ const page = await newTab(url);
712
+ await syncPageState(page, state, { force: true });
713
+ const title = await page.title();
714
+ return textResponse(`Opened new tab: ${title}\nURL: ${url}`);
715
+ } catch (err) {
716
+ return errorResponse(`new_tab failed: ${err.message}`);
717
+ }
718
+ }
719
+ );
720
+
721
+ server.registerTool(
722
+ 'close_tab',
723
+ {
724
+ description: 'Close a tab by its index (from get_tabs).',
725
+ inputSchema: { index: z.number().int().describe('Tab index to close') },
726
+ },
727
+ async ({ index }) => {
728
+ try {
729
+ await closeTab(index);
730
+ return textResponse(`Closed tab [${index}].`);
731
+ } catch (err) {
732
+ return errorResponse(`close_tab failed: ${err.message}`);
733
+ }
734
+ }
735
+ );
736
+
737
+ server.registerTool(
738
+ 'get_logs',
739
+ {
740
+ description: 'Read recent Grasp audit log entries. Shows the last N operations performed.',
741
+ inputSchema: {
742
+ lines: z.number().int().optional().describe('Number of recent log lines to return (default: 50)'),
743
+ },
744
+ },
745
+ async ({ lines = 50 } = {}) => {
746
+ const entries = await readLogs(lines);
747
+ if (entries.length === 0) {
748
+ return textResponse('No audit log entries yet. Log is written to ~/.grasp/audit.log');
749
+ }
750
+ return textResponse(`Last ${entries.length} operations:\n\n${entries.join('\n')}`);
751
+ }
752
+ );
753
+
754
+ server.registerTool(
755
+ 'press_key',
756
+ {
757
+ description: 'Press a keyboard key or shortcut. Examples: "Enter", "Escape", "Tab", "Control+Enter", "Control+a".',
758
+ inputSchema: { key: z.string().describe('Key or shortcut, e.g. "Enter", "Control+Enter"') },
759
+ },
760
+ async ({ key }) => {
761
+ const page = await getActivePage();
762
+ const prevDomRevision = state.pageState?.domRevision ?? 0;
763
+ const prevUrl = state.lastUrl;
764
+ const prevActiveId = await getActiveHintId(page);
765
+
766
+ try {
767
+ await pressKey(page, key);
768
+ await syncPageState(page, state, { force: true });
769
+ const verification = await verifyGenericAction({
770
+ page,
771
+ hintId: null,
772
+ prevDomRevision,
773
+ prevUrl,
774
+ prevActiveId,
775
+ newDomRevision: state.pageState.domRevision,
776
+ });
777
+
778
+ if (!verification.ok) {
779
+ await audit('press_key_failed', `[${key}] ${verification.error_code}`);
780
+ return buildStructuredError(
781
+ `Press key verification failed for ${key}`,
782
+ key,
783
+ verification
784
+ );
785
+ }
786
+
787
+ audit('press_key', key);
788
+ return textResponse(`Pressed: ${key}`);
789
+ } catch (err) {
790
+ await audit('press_key_failed', key);
791
+ return buildStructuredError(`press_key failed: ${err.message}`, key);
792
+ }
793
+ }
794
+ );
795
+
796
+ server.registerTool(
797
+ 'get_form_fields',
798
+ {
799
+ description: 'Identify form fields on the current page grouped by form. Returns field IDs that can be used directly with type and click.',
800
+ inputSchema: {},
801
+ },
802
+ async () => {
803
+ const page = await getActivePage();
804
+ await syncPageState(page, state);
805
+
806
+ const groups = await page.evaluate(() => {
807
+ function getHintId(el) {
808
+ return el.getAttribute('data-grasp-id') || null;
809
+ }
810
+
811
+ function getLabelForText(el) {
812
+ const id = el.getAttribute('id');
813
+ if (!id) return '';
814
+ const lbl = document.querySelector(`label[for="${id}"]`);
815
+ return lbl?.textContent?.trim() ?? '';
816
+ }
817
+
818
+ function describeField(el) {
819
+ const tag = el.tagName.toLowerCase();
820
+ const type = el.getAttribute('type') || (tag === 'select' ? 'select' : tag === 'textarea' ? 'textarea' : tag);
821
+ const required = el.required || el.getAttribute('required') !== null;
822
+ return {
823
+ hint_id: getHintId(el),
824
+ tag,
825
+ type,
826
+ required,
827
+ value: 'value' in el ? el.value : null,
828
+ checked: 'checked' in el ? el.checked : null,
829
+ ariaLabelledByText: (() => {
830
+ const labelledBy = el.getAttribute('aria-labelledby');
831
+ if (!labelledBy) return '';
832
+ return labelledBy.trim().split(/\s+/)
833
+ .map((id) => document.getElementById(id)?.textContent?.trim() ?? '')
834
+ .filter(Boolean)
835
+ .join(' ');
836
+ })(),
837
+ ariaLabel: el.getAttribute('aria-label')?.trim() ?? '',
838
+ labelForText: getLabelForText(el),
839
+ placeholder: el.getAttribute('placeholder')?.trim() ?? '',
840
+ name: el.getAttribute('name')?.trim() ?? '',
841
+ };
842
+ }
843
+
844
+ const FIELD_TAGS = new Set(['input', 'textarea', 'select', 'button']);
845
+ const FIELD_TYPES_SKIP = new Set(['hidden']);
846
+
847
+ function collectFields(root) {
848
+ return [...root.querySelectorAll('input, textarea, select, button')]
849
+ .filter(el => {
850
+ const type = el.getAttribute('type') || '';
851
+ if (FIELD_TYPES_SKIP.has(type)) return false;
852
+ const rect = el.getBoundingClientRect();
853
+ return rect.width > 0 && rect.height > 0;
854
+ });
855
+ }
856
+
857
+ // 1. Named <form> groups
858
+ const forms = [...document.querySelectorAll('form')];
859
+ const result = [];
860
+
861
+ if (forms.length > 0) {
862
+ for (let i = 0; i < forms.length; i++) {
863
+ const form = forms[i];
864
+ const fields = collectFields(form);
865
+ if (fields.length === 0) continue;
866
+ const action = form.getAttribute('action') || 'no action';
867
+ result.push({
868
+ header: `Form ${i + 1} (action="${action}"):`,
869
+ fields: fields.map(describeField),
870
+ });
871
+ }
872
+ }
873
+
874
+ // 2. Fallback: inputs not inside any <form>
875
+ const orphans = [...document.querySelectorAll('input, textarea, select, button')]
876
+ .filter(el => {
877
+ if (!FIELD_TAGS.has(el.tagName.toLowerCase())) return false;
878
+ const type = el.getAttribute('type') || '';
879
+ if (FIELD_TYPES_SKIP.has(type)) return false;
880
+ if (el.closest('form')) return false;
881
+ const rect = el.getBoundingClientRect();
882
+ return rect.width > 0 && rect.height > 0;
883
+ });
884
+
885
+ if (orphans.length > 0) {
886
+ result.push({
887
+ header: 'Ungrouped fields (no <form> wrapper):',
888
+ fields: orphans.map(describeField),
889
+ });
890
+ }
891
+
892
+ return result;
893
+ });
894
+
895
+ if (groups.length === 0) {
896
+ return textResponse('No form fields found on the current page. Call get_hint_map to see all interactive elements.');
897
+ }
898
+
899
+ const normalizedGroups = groups.map((group) => ({
900
+ header: group.header,
901
+ fields: group.fields.map((field) => {
902
+ const normalized = normalizeFormField(field);
903
+ const idStr = normalized.hint_id ? `[${normalized.hint_id}]` : '(no hint id — call get_hint_map first)';
904
+ return ` ${idStr} ${normalized.label} (${normalized.type}${normalized.required ? ', required' : ''})`;
905
+ }),
906
+ }));
907
+ const summary = summarizeFormFields(groups.flatMap((group) => group.fields));
908
+ const verification = buildFormVerification(groups.flatMap((group) => group.fields));
909
+ const lines = [
910
+ ...normalizedGroups.flatMap((g) => [g.header, ...g.fields, '']),
911
+ 'Summary:',
912
+ ...summary.lines.map((line) => ` ${line}`),
913
+ 'Verification:',
914
+ ` completion_status: ${verification.completion_status}`,
915
+ ` missing_required: ${verification.summary.missing_required}`,
916
+ ` risky_pending: ${verification.summary.risky_pending}`,
917
+ ` unresolved: ${verification.summary.unresolved}`,
918
+ ];
919
+ return textResponse(lines.join('\n').trimEnd());
920
+ }
921
+ );
922
+
923
+ server.registerTool(
924
+ 'hover',
925
+ {
926
+ description: 'Hover over an element by Hint Map ID to trigger dropdown menus or tooltips.',
927
+ inputSchema: { hint_id: z.string().describe('Hint Map ID to hover over, e.g. B1, L3') },
928
+ },
929
+ async ({ hint_id }) => {
930
+ const page = await getActivePage();
931
+ const normalizedHintId = hint_id.toUpperCase();
932
+ const rebuildHints = createRebuildHints(page, state);
933
+ const prevDomRevision = state.pageState?.domRevision ?? 0;
934
+ const prevUrl = state.lastUrl;
935
+ const prevActiveId = await getActiveHintId(page);
936
+
937
+ try {
938
+ const result = await hoverByHintId(page, normalizedHintId, { rebuildHints });
939
+ await syncPageState(page, state, { force: true });
940
+ const verification = await verifyGenericAction({
941
+ page,
942
+ hintId: normalizedHintId,
943
+ prevDomRevision,
944
+ prevUrl,
945
+ prevActiveId,
946
+ newDomRevision: state.pageState.domRevision,
947
+ });
948
+
949
+ if (!verification.ok) {
950
+ await audit('hover_failed', `[${normalizedHintId}] ${verification.error_code}`);
951
+ return buildStructuredError(
952
+ `Hover verification failed for [${normalizedHintId}]`,
953
+ normalizedHintId,
954
+ verification
955
+ );
956
+ }
957
+
958
+ audit('hover', `[${normalizedHintId}] "${result.label}"`);
959
+ const urlAfter = page.url();
960
+ const nav = urlAfter !== prevUrl ? `\nNavigated to: ${urlAfter}` : '';
961
+ return textResponse(
962
+ `Hovered over [${normalizedHintId}]: "${result.label}".${nav}\n${state.hintMap.length} elements now visible.`
963
+ );
964
+ } catch (err) {
965
+ await audit('hover_failed', `[${normalizedHintId}] ${err.message}`);
966
+ await syncPageState(page, state, { force: true });
967
+ return buildStructuredError(
968
+ `hover failed: ${err.message}`,
969
+ normalizedHintId,
970
+ {
971
+ error_code: TYPE_FAILED,
972
+ retryable: true,
973
+ suggested_next_step: 'retry',
974
+ evidence: {
975
+ hint_id: normalizedHintId,
976
+ reason: err.message,
977
+ },
978
+ }
979
+ );
980
+ }
981
+ }
982
+ );
983
+ }