moltbrowser-mcp-server 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,7 @@ MoltBrowser-MCP fixes that. When an agent lands on x.com it gets `hub_post-tweet
27
27
  "mcpServers": {
28
28
  "moltbrowser-mcp": {
29
29
  "command": "npx",
30
- "args": ["moltbrowser-mcp"],
30
+ "args": ["moltbrowser-mcp-server"],
31
31
  "env": {
32
32
  "HUB_API_KEY": "whub_your_api_key"
33
33
  }
@@ -76,7 +76,8 @@ These tools are always available when hub integration is enabled:
76
76
  | `contribute_delete-tool` | Delete a tool from a hub config (requires `HUB_API_KEY`) |
77
77
  | `contribute_vote-on-tool` | Upvote or downvote a tool to signal quality (requires `HUB_API_KEY`) |
78
78
 
79
- ### Configuration
79
+ <details>
80
+ <summary>Configuration</summary>
80
81
 
81
82
  All standard browser automation options are supported:
82
83
 
@@ -129,6 +130,8 @@ All standard browser automation options are supported:
129
130
 
130
131
  <!--- End of options generated section -->
131
132
 
133
+ </details>
134
+
132
135
  <details>
133
136
  <summary><b>Advanced configuration</b></summary>
134
137
 
@@ -165,7 +168,7 @@ state [here](https://playwright.dev/docs/auth).
165
168
  "playwright": {
166
169
  "command": "npx",
167
170
  "args": [
168
- "moltbrowser-mcp",
171
+ "moltbrowser-mcp-server",
169
172
  "--isolated",
170
173
  "--storage-state={path/to/storage.json}"
171
174
  ]
@@ -209,7 +212,7 @@ The server can be configured using a JSON configuration file. You can specify th
209
212
  using the `--config` command line option:
210
213
 
211
214
  ```bash
212
- npx moltbrowser-mcp --config path/to/config.json
215
+ npx moltbrowser-mcp-server --config path/to/config.json
213
216
  ```
214
217
 
215
218
  <details>
@@ -439,7 +442,7 @@ When running headed browser on system w/o display or from worker processes of th
439
442
  run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable HTTP transport.
440
443
 
441
444
  ```bash
442
- npx moltbrowser-mcp --port 8931
445
+ npx moltbrowser-mcp-server --port 8931
443
446
  ```
444
447
 
445
448
  And then in MCP client config, set the `url` to the HTTP endpoint:
@@ -462,7 +465,7 @@ And then in MCP client config, set the `url` to the HTTP endpoint:
462
465
  ```js
463
466
  import http from 'http';
464
467
 
465
- import { createConnection } from 'moltbrowser-mcp';
468
+ import { createConnection } from 'moltbrowser-mcp-server';
466
469
  import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
467
470
 
468
471
  http.createServer(async (req, res) => {
package/hub-cli.js CHANGED
@@ -6,7 +6,7 @@
6
6
  * with WebMCP Hub integration for dynamic, per-site tools.
7
7
  *
8
8
  * Usage:
9
- * npx moltbrowser-mcp [options]
9
+ * npx moltbrowser-mcp-server [options]
10
10
  *
11
11
  * Hub options:
12
12
  * --hub-url=<url> Override hub URL (default: https://webmcp-hub.com)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "moltbrowser-mcp-server",
3
- "version": "1.0.1",
3
+ "version": "1.1.0",
4
4
  "description": "Playwright MCP with WebMCP Hub integration — dynamic, per-site tools for browser agents",
5
5
  "repository": {
6
6
  "type": "git",
@@ -78,6 +78,65 @@ function isNativeFillType(type) {
78
78
  return !type || type === 'text' || type === 'textarea' || type === 'number' || type === 'date';
79
79
  }
80
80
 
81
+ // --- Shadow DOM fallback generator ---
82
+
83
+ /**
84
+ * Wrap a Playwright locator call with a try/catch that falls back to
85
+ * page.evaluate() with deepQuery when the element is inside Shadow DOM.
86
+ * Playwright's page.locator() can't pierce shadow roots with plain CSS
87
+ * selectors, so we try native Playwright first (trusted events, framework
88
+ * compatible) and fall back to deepQuery (shadow-piercing).
89
+ *
90
+ * @param {string} playwrightLine - The `await page.locator(...)...` code
91
+ * @param {string} fallbackBody - JS code to run inside page.evaluate() on failure
92
+ * @returns {string} try/catch code block
93
+ */
94
+ // Short timeout for the Playwright try path — if the element is in Shadow DOM,
95
+ // page.locator() won't find it. 3s is plenty for a non-Shadow element to appear;
96
+ // the default 30s would waste time before the fallback kicks in.
97
+ const SHADOW_TRY_TIMEOUT = 3000;
98
+
99
+ function withShadowFallback(playwrightLine, fallbackBody) {
100
+ // Inject timeout into Playwright locator calls so the fallback kicks in fast.
101
+ // Matches .click(), .press(...), .fill(...), .check(), .uncheck(), .selectOption(...)
102
+ // and adds { timeout: SHADOW_TRY_TIMEOUT } as the last argument.
103
+ const timedLine = playwrightLine.replace(
104
+ /\.(click|press|fill|check|uncheck|selectOption)\(([^)]*)\)/,
105
+ (_, method, args) => {
106
+ const timeout = `{ timeout: ${SHADOW_TRY_TIMEOUT} }`;
107
+ return args.trim() ? `.${method}(${args}, ${timeout})` : `.${method}(${timeout})`;
108
+ }
109
+ );
110
+ return [
111
+ `try {`,
112
+ ` ${timedLine}`,
113
+ `} catch {`,
114
+ ` await page.evaluate(() => { ${DEEP_QUERY_FNS} ${fallbackBody} });`,
115
+ `}`,
116
+ ].join('\n');
117
+ }
118
+
119
+ /**
120
+ * Shadow DOM fallback for text input: focus via deepQuery, then type with
121
+ * Playwright's keyboard API. This produces trusted InputEvents that
122
+ * framework-controlled inputs (React, Polymer/Lit web components) respond to,
123
+ * unlike setting .value directly which bypasses their event systems.
124
+ *
125
+ * @param {string} sel - CSS selector for the input element
126
+ * @param {string} value - Text to type
127
+ * @returns {string} try/catch code block
128
+ */
129
+ function withShadowFillFallback(sel, value) {
130
+ return [
131
+ `try {`,
132
+ ` await page.locator(${quote(sel)}).fill(${quote(value)}, { timeout: ${SHADOW_TRY_TIMEOUT} });`,
133
+ `} catch {`,
134
+ ` await page.evaluate(() => { ${DEEP_QUERY_FNS} const _el = deepQuery(${qs(sel)}); if (_el) { _el.focus(); _el.value = ''; _el.dispatchEvent(new Event('input', { bubbles: true })); } });`,
135
+ ` await page.keyboard.type(${quote(value)});`,
136
+ `}`,
137
+ ].join('\n');
138
+ }
139
+
81
140
  // --- Main entry point ---
82
141
 
83
142
  /**
@@ -139,26 +198,38 @@ function translateSimple(execution, args) {
139
198
  : null;
140
199
  const sel = lastField ? lastField.selector : execution.selector;
141
200
 
201
+ // Use Playwright's native .press('Enter') for trusted keyboard events.
202
+ // Falls back to deepQuery + dispatchEvent for Shadow DOM elements.
203
+ flushBatch();
142
204
  if (isPlaywrightSelector(sel)) {
143
- flushBatch();
144
205
  phases.push(`await page.locator(${quote(sel)}).press('Enter');`);
145
206
  } else {
146
- batch.push(
147
- `{ const _el = deepQuery(${qs(sel)});`,
148
- ` if (_el) {`,
149
- ` _el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));`,
150
- ` _el.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', bubbles: true }));`,
151
- ` _el.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));`,
152
- ` const _form = _el.closest('form');`,
153
- ` if (_form) { _form.requestSubmit ? _form.requestSubmit() : _form.submit(); }`,
154
- ` }`,
207
+ const enterFallback = [
208
+ `const _el = deepQuery(${qs(sel)});`,
209
+ `if (_el) {`,
210
+ ` _el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));`,
211
+ ` _el.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', bubbles: true }));`,
212
+ ` _el.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));`,
213
+ ` const _form = _el.closest('form');`,
214
+ ` if (_form) { _form.requestSubmit ? _form.requestSubmit() : _form.submit(); }`,
155
215
  `}`,
156
- );
216
+ ].join(' ');
217
+ phases.push(withShadowFallback(
218
+ `await page.locator(${quote(sel)}).press('Enter');`,
219
+ enterFallback,
220
+ ));
157
221
  }
158
222
  } else {
159
223
  const submitSel = execution.submitSelector || `${execution.selector} [type="submit"], ${execution.selector} button`;
160
224
  flushBatch();
161
- phases.push(`await page.locator(${quote(submitSel)}).first().click();`);
225
+ if (isPlaywrightSelector(submitSel)) {
226
+ phases.push(`await page.locator(${quote(submitSel)}).first().click();`);
227
+ } else {
228
+ phases.push(withShadowFallback(
229
+ `await page.locator(${quote(submitSel)}).first().click();`,
230
+ `const _el = deepQuery(${qs(submitSel)}); if (_el) _el.click();`,
231
+ ));
232
+ }
162
233
  }
163
234
  }
164
235
 
@@ -204,14 +275,25 @@ function translateSteps(execution, args, opts = {}) {
204
275
  case 'click':
205
276
  if (selector) {
206
277
  flushBatch();
207
- phases.push(`await page.locator(${quote(selector)}).first().click();`);
278
+ if (isPlaywrightSelector(selector)) {
279
+ phases.push(`await page.locator(${quote(selector)}).first().click();`);
280
+ } else {
281
+ phases.push(withShadowFallback(
282
+ `await page.locator(${quote(selector)}).first().click();`,
283
+ `const _el = deepQuery(${qs(selector)}); if (_el) _el.click();`,
284
+ ));
285
+ }
208
286
  }
209
287
  break;
210
288
 
211
289
  case 'fill':
212
290
  if (selector && value !== null) {
213
291
  flushBatch();
214
- phases.push(`await page.locator(${quote(selector)}).first().fill(${quote(value)});`);
292
+ if (isPlaywrightSelector(selector)) {
293
+ phases.push(`await page.locator(${quote(selector)}).first().fill(${quote(value)});`);
294
+ } else {
295
+ phases.push(withShadowFillFallback(selector, value));
296
+ }
215
297
  }
216
298
  break;
217
299
 
@@ -382,21 +464,34 @@ function domFieldAction(field, value) {
382
464
  }
383
465
 
384
466
  /**
385
- * Generate Playwright API lines for filling a field with Playwright-specific selectors.
386
- * Returns an array of code lines (each is a standalone statement).
467
+ * Generate Playwright API lines for filling a field.
468
+ * For Playwright-specific selectors, uses direct locator calls.
469
+ * For plain CSS selectors, wraps in try/catch with deepQuery fallback
470
+ * to handle elements inside Shadow DOM.
387
471
  */
388
472
  function playwrightFieldAction(field, value) {
389
473
  const sel = field.selector;
474
+ const pw = isPlaywrightSelector(sel);
390
475
 
391
476
  switch (field.type) {
392
- case 'select':
393
- return [`await page.locator(${quote(sel)}).selectOption(${quote(String(value))});`];
477
+ case 'select': {
478
+ const line = `await page.locator(${quote(sel)}).selectOption(${quote(String(value))});`;
479
+ if (pw) return [line];
480
+ return [withShadowFallback(line,
481
+ `const _el = deepQuery(${qs(sel)}); if (_el) { _el.value = ${qs(String(value))}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
482
+ )];
483
+ }
394
484
 
395
- case 'checkbox':
396
- if (value === true || value === 'true' || value === 'on') {
397
- return [`await page.locator(${quote(sel)}).check();`];
398
- }
399
- return [`await page.locator(${quote(sel)}).uncheck();`];
485
+ case 'checkbox': {
486
+ const checked = value === true || value === 'true' || value === 'on';
487
+ const line = checked
488
+ ? `await page.locator(${quote(sel)}).check();`
489
+ : `await page.locator(${quote(sel)}).uncheck();`;
490
+ if (pw) return [line];
491
+ return [withShadowFallback(line,
492
+ `const _el = deepQuery(${qs(sel)}); if (_el) { _el.checked = ${checked}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
493
+ )];
494
+ }
400
495
 
401
496
  case 'radio': {
402
497
  let radioSel = sel + `[value="${value}"]`;
@@ -404,11 +499,17 @@ function playwrightFieldAction(field, value) {
404
499
  const option = field.options.find(o => o.value === String(value));
405
500
  if (option && option.selector) radioSel = option.selector;
406
501
  }
407
- return [`await page.locator(${quote(radioSel)}).click();`];
502
+ const line = `await page.locator(${quote(radioSel)}).click();`;
503
+ if (pw || isPlaywrightSelector(radioSel)) return [line];
504
+ return [withShadowFallback(line,
505
+ `const _el = deepQuery(${qs(radioSel)}); if (_el) { _el.checked = true; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
506
+ )];
408
507
  }
409
508
 
410
- default: // text, number, textarea, date, hidden
411
- return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
509
+ default: { // text, number, textarea, date, hidden
510
+ if (pw) return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
511
+ return [withShadowFillFallback(sel, String(value))];
512
+ }
412
513
  }
413
514
  }
414
515
 
@@ -445,7 +546,7 @@ function addResultWait(phases, execution) {
445
546
  */
446
547
  function addExtraction(phases, selector, extractMode, attribute) {
447
548
  if (!selector) {
448
- phases.push(`return '[action ran — no result selector configured]';`);
549
+ phases.push(`return '[action completed successfully]';`);
449
550
  return;
450
551
  }
451
552
  addStepExtraction(phases, selector, extractMode, attribute);
package/src/hub-tools.js CHANGED
@@ -102,6 +102,8 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
102
102
  };
103
103
  }
104
104
 
105
+ const toolDisplayName = hubEntry.tool.name.replace(/^hub_/, '');
106
+
105
107
  try {
106
108
  const result = await upstreamClient.callTool({
107
109
  name: 'browser_run_code',
@@ -113,18 +115,44 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
113
115
  return {
114
116
  content: [{
115
117
  type: 'text',
116
- text: `Hub tool "${hubEntry.tool.name}" failed:\n${errorText}\n\nUse browser_fallback to access generic Playwright tools.`,
118
+ text: `Hub tool "${toolDisplayName}" failed:\n${errorText}\n\nUse browser_fallback to access generic Playwright tools.`,
119
+ }, {
120
+ type: 'text',
121
+ text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
117
122
  }],
118
123
  isError: true,
119
124
  };
120
125
  }
121
126
 
127
+ // Check if the result indicates a genuine failure.
128
+ // "[action completed successfully]" is the normal output for action-only tools (no resultSelector) — that's fine.
129
+ // "[resultSelector matched no elements" means extraction was configured but found nothing — that's a real problem.
130
+ const resultText = result.content?.map(c => c.text || '').join('\n') || '';
131
+ const looksLikeBroken = resultText.includes('[resultSelector matched no elements');
132
+
133
+ if (result.content) {
134
+ if (looksLikeBroken) {
135
+ result.content.push({
136
+ type: 'text',
137
+ text: `\n<vote-hint>This hub tool ran but its result selector matched nothing — the selectors may be broken. Downvote it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
138
+ });
139
+ } else {
140
+ result.content.push({
141
+ type: 'text',
142
+ text: `\n<vote-hint>This hub tool worked. Upvote it so other agents prefer it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: 1 })</vote-hint>`,
143
+ });
144
+ }
145
+ }
146
+
122
147
  return result;
123
148
  } catch (err) {
124
149
  return {
125
150
  content: [{
126
151
  type: 'text',
127
- text: `Hub tool "${hubEntry.tool.name}" failed: ${err.message}\n\nUse browser_fallback to access generic Playwright tools.`,
152
+ text: `Hub tool "${toolDisplayName}" failed: ${err.message}\n\nUse browser_fallback to access generic Playwright tools.`,
153
+ }, {
154
+ type: 'text',
155
+ text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
128
156
  }],
129
157
  isError: true,
130
158
  };
@@ -220,27 +248,33 @@ const hubWriteTools = [
220
248
  ' steps: [{ action: "click", selector: "[data-testid=tweetButtonInline]" }]',
221
249
  '})',
222
250
  '',
223
- 'EXAMPLE — search form (fill + submit is still atomic enough):',
251
+ 'EXAMPLE — fill a search field (submit is handled by browser_press_key, not this tool):',
224
252
  'contribute_add-tool({',
225
253
  ' configId: "abc123",',
226
- ' name: "search-products",',
227
- ' description: "Search products by keyword",',
254
+ ' name: "fill-search",',
255
+ ' description: "Fill the search input field with a query. After calling this, use browser_press_key({ key: \'Enter\' }) to submit.",',
228
256
  ' selector: "#searchForm",',
229
- ' autosubmit: true,',
230
- ' submitSelector: "#searchBtn",',
231
- ' submitAction: "click",',
232
- ' fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }],',
233
- ' resultSelector: ".results li",',
234
- ' resultExtract: "list"',
257
+ ' fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }]',
235
258
  '})',
259
+ '→ Then the agent calls browser_press_key({ key: "Enter" }) to submit — no CSS selector needed for the button.',
236
260
  '',
237
261
  'KEY RULES:',
262
+ '- SELECTORS MUST BE LOCALE-INDEPENDENT. Configs are shared globally — selectors with localized text break for other users.',
263
+ ' Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
264
+ ' NEVER use aria-label with translated text (e.g. aria-label="Søk", aria-label="Suche", aria-label="Rechercher").',
265
+ ' If aria-label is the only option, use the English value only.',
266
+ ' WRONG: input[aria-label="Søk"] — only works in Norwegian',
267
+ ' RIGHT: input[name="search_query"], input#search, input[type="search"]',
238
268
  '- Tools must be GENERAL, not hardcoded to a specific instance or position. WRONG: "like-first-post" (hardcoded to first). RIGHT: "like-post" with a parameter that identifies which post (e.g. postIndex: number, or postText: string used in a :has-text selector). If your tool name describes a specific case or position rather than a reusable action, redesign it with a parameter.',
239
- '- Prefer small, single-action tools over multi-step workflows',
240
- '- For multi-step interactions, create one tool per action (click-compose, fill-text, click-submit) the calling agent will chain them',
241
- '- Click tools use steps: [{ action: "click", selector: "..." }] do NOT use autosubmit: true for standalone buttons',
242
- '- Fill tools need: selector + one field entry',
243
- '- Tool names must be kebab-case with a verb: "get-posts", "click-compose-button", "fill-tweet-text", "search-products"',
269
+ '- ONE ACTION PER TOOL. Each tool does exactly ONE thing. NEVER combine fill + submit in one tool.',
270
+ ' A fill tool ONLY fills a field (no autosubmit, no submitSelector, no steps with clicks).',
271
+ ' For submit/search: the agent calls browser_press_key({ key: "Enter" }) after the fill tool no button selector needed.',
272
+ ' WRONG: "search-videos" with fields + autosubmit combines fill and submit.',
273
+ ' WRONG: "click-search" fragile, requires finding a submit button selector.',
274
+ ' RIGHT: "fill-search" (fields only) → agent uses browser_press_key({ key: "Enter" }) to submit.',
275
+ '- Do NOT create click-submit or click-search tools. Use browser_press_key instead.',
276
+ '- Fill tools need: selector + one field entry. No autosubmit, no submitSelector, no submitAction.',
277
+ '- Tool names must be kebab-case with a verb: "get-posts", "click-compose-button", "fill-search"',
244
278
  '- Read-only tools only need: selector, resultSelector, resultExtract. No autosubmit, no fields.',
245
279
  '- Use fields[] for form inputs — each field\'s name becomes a tool parameter automatically',
246
280
  '- resultExtract options: text, html, attribute, list, table',
@@ -452,6 +486,56 @@ const hubWriteTools = [
452
486
  const VALID_RESULT_EXTRACTS = new Set(['text', 'html', 'attribute', 'list', 'table']);
453
487
  const VALID_STEP_ACTIONS = new Set(['navigate', 'click', 'fill', 'select', 'wait', 'extract', 'scroll', 'condition', 'evaluate']);
454
488
 
489
+ /**
490
+ * Detect localized (non-ASCII) text inside aria-label selectors.
491
+ * Returns an array of { selector, match } objects for each violation found.
492
+ *
493
+ * Matches patterns like: aria-label="Søk", aria-label='Rechercher', aria-label="Suche"
494
+ * Flags any aria-label value containing non-ASCII characters (accented, CJK, Cyrillic, etc.)
495
+ */
496
+ // eslint-disable-next-line no-control-regex
497
+ const ARIA_LABEL_RE = /aria-label\s*=\s*["']([^"']+)["']/gi;
498
+ const NON_ASCII_RE = /[^\x00-\x7F]/;
499
+
500
+ function findLocalizedSelectors(args) {
501
+ const violations = [];
502
+
503
+ // Collect all selector strings from the flat args
504
+ const selectorSources = [];
505
+ if (args.selector) selectorSources.push({ path: 'selector', value: args.selector });
506
+ if (args.submitSelector) selectorSources.push({ path: 'submitSelector', value: args.submitSelector });
507
+ if (args.resultSelector) selectorSources.push({ path: 'resultSelector', value: args.resultSelector });
508
+ if (args.resultWaitSelector) selectorSources.push({ path: 'resultWaitSelector', value: args.resultWaitSelector });
509
+
510
+ if (Array.isArray(args.fields)) {
511
+ for (let i = 0; i < args.fields.length; i++) {
512
+ if (args.fields[i].selector) {
513
+ selectorSources.push({ path: `fields[${i}].selector`, value: args.fields[i].selector });
514
+ }
515
+ }
516
+ }
517
+
518
+ if (Array.isArray(args.steps)) {
519
+ for (let i = 0; i < args.steps.length; i++) {
520
+ if (args.steps[i].selector) {
521
+ selectorSources.push({ path: `steps[${i}].selector`, value: args.steps[i].selector });
522
+ }
523
+ }
524
+ }
525
+
526
+ for (const { path, value } of selectorSources) {
527
+ ARIA_LABEL_RE.lastIndex = 0;
528
+ let m;
529
+ while ((m = ARIA_LABEL_RE.exec(value)) !== null) {
530
+ if (NON_ASCII_RE.test(m[1])) {
531
+ violations.push({ path, selector: value, label: m[1] });
532
+ }
533
+ }
534
+ }
535
+
536
+ return violations;
537
+ }
538
+
455
539
  /**
456
540
  * Validate that each step has the fields required for its action type.
457
541
  * Returns an array of human-readable error strings with exact paths.
@@ -714,6 +798,18 @@ async function handleHubWriteTool(toolName, args) {
714
798
  };
715
799
  }
716
800
 
801
+ // Check for localized aria-label selectors
802
+ const localizedViolations = findLocalizedSelectors(args);
803
+ if (localizedViolations.length > 0) {
804
+ const details = localizedViolations.map(v =>
805
+ `- ${v.path}: aria-label="${v.label}" contains localized text`
806
+ ).join('\n');
807
+ return {
808
+ content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
809
+ isError: true,
810
+ };
811
+ }
812
+
717
813
  // Build inputSchema and execution from flat fields
718
814
  const inputSchema = buildInputSchema(args);
719
815
  const execution = buildExecution(args);
@@ -812,6 +908,18 @@ async function handleHubWriteTool(toolName, args) {
812
908
  };
813
909
  }
814
910
 
911
+ // Check for localized aria-label selectors
912
+ const localizedViolations = findLocalizedSelectors(args);
913
+ if (localizedViolations.length > 0) {
914
+ const details = localizedViolations.map(v =>
915
+ `- ${v.path}: aria-label="${v.label}" contains localized text`
916
+ ).join('\n');
917
+ return {
918
+ content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
919
+ isError: true,
920
+ };
921
+ }
922
+
815
923
  // Build new inputSchema and execution from flat fields
816
924
  const inputSchema = buildInputSchema(args);
817
925
  const execution = buildExecution(args);
@@ -88,9 +88,10 @@ async function startProxy(options) {
88
88
  '<workflow>',
89
89
  '1. Navigate with browser_navigate. Hub tools are discovered automatically.',
90
90
  '2. If hub tools are listed in the response, use hub_execute to run them. ALWAYS prefer hub tools.',
91
- '3. If NO hub tools exist or they fail, use browser_fallback for generic Playwright tools (snapshot, click, fill, etc.). Call browser_fallback without arguments to list available tools.',
92
- '4. NEVER call browser tools in parallel always sequentially, one at a time.',
93
- '5. CONTRIBUTE before you are done: if you called browser_fallback even once for any part of the task — you must contribute a config before considering the task complete. This is not optional. See <contribution> below.',
91
+ '3. After a hub fill tool, use browser_press_key({ key: "Enter" }) to submit. This is more reliable than clicking a submit button.',
92
+ '4. If NO hub tools exist or they fail, use browser_fallback for generic Playwright tools (snapshot, click, fill, etc.). Call browser_fallback without arguments to list available tools.',
93
+ '5. NEVER call browser tools in parallelalways sequentially, one at a time.',
94
+ '6. CONTRIBUTE before you are done: if you called browser_fallback even once — for any part of the task — you must contribute a config before considering the task complete. This is not optional. See <contribution> below.',
94
95
  '</workflow>',
95
96
  '',
96
97
  '<contribution>',
@@ -99,13 +100,32 @@ async function startProxy(options) {
99
100
  '',
100
101
  'Contribute ONLY when ALL of these are true:',
101
102
  '1. You called browser_fallback at least once (even for a single step)',
102
- '2. You used browser_snapshot and discovered real CSS selectors on the page',
103
- '3. You tested those selectors and they worked',
103
+ '2. You inspected the DOM to discover real CSS selectors (see <finding-selectors> below)',
104
+ '3. You verified each selector by testing it on the page',
104
105
  ' If you used a condition step, verify the selector in EACH branch separately with',
105
106
  ' browser_snapshot — never assume two contexts (e.g. dialog vs. inline) share the same test IDs.',
106
107
  '',
107
108
  'NEVER contribute if you have not explored the page. A config without real CSS selectors is useless.',
108
109
  '',
110
+ '<finding-selectors>',
111
+ 'browser_snapshot returns an accessibility tree with refs (e.g. "e12"), NOT CSS selectors.',
112
+ 'You MUST inspect the actual DOM to find real CSS selectors. Do NOT guess selectors from the snapshot.',
113
+ '',
114
+ 'To find a CSS selector for an element you interacted with:',
115
+ '1. Use browser_evaluate to inspect the element:',
116
+ ' browser_fallback({ tool: "browser_evaluate", arguments: {',
117
+ ' expression: "document.querySelector(\'input[name=search_query]\')?.tagName"',
118
+ ' }})',
119
+ '2. Or inspect multiple attributes at once:',
120
+ ' browser_fallback({ tool: "browser_evaluate", arguments: {',
121
+ ' expression: "JSON.stringify([...document.querySelectorAll(\'input\')].map(e => ({ tag: e.tagName, id: e.id, name: e.name, type: e.type, placeholder: e.placeholder })))"',
122
+ ' }})',
123
+ '3. Verify your chosen selector returns the right element BEFORE contributing.',
124
+ '',
125
+ 'NEVER fabricate selectors like "input#search" without verifying. On YouTube, #search is a <div>,',
126
+ 'not an <input>. The actual input is input[name="search_query"]. Always check the DOM.',
127
+ '</finding-selectors>',
128
+ '',
109
129
  'How to contribute:',
110
130
  '- No hub config exists yet → contribute_create-config(...) then contribute_add-tool(...) for each tool',
111
131
  '- Hub config already exists → contribute_add-tool(...) with the config ID shown in the navigation response. Do NOT create a new config.',
@@ -118,9 +138,22 @@ async function startProxy(options) {
118
138
  ' - "example.com" ONLY for truly site-wide tools (navigation, global search)',
119
139
  'contribute_add-tool({ configId, name, description, selector, ... }) → adds one tool',
120
140
  ' Always add read-only extraction tools first (get-posts, get-content, list-items).',
121
- ' Create small, single-action tools NOT multi-step workflows.',
141
+ ' ONE ACTION PER TOOL. Each tool does exactly ONE thing:',
142
+ ' - A fill tool ONLY fills a field (no submit, no autosubmit)',
143
+ ' - A click tool ONLY clicks a button',
144
+ ' - For search/submit: create a fill tool, then the agent uses browser_press_key({ key: "Enter" }) to submit',
145
+ ' NEVER combine fill + submit in one tool. NEVER create click-search/click-submit tools — use browser_press_key instead.',
146
+ ' WRONG: "search-videos" that fills AND submits. WRONG: "click-search" (fragile button selector).',
147
+ ' RIGHT: "fill-search" (fill only) → agent calls browser_press_key({ key: "Enter" }) to submit.',
122
148
  ' Shadow DOM is fully supported — selectors targeting web components work transparently.',
123
149
  '',
150
+ ' SELECTOR RULES — configs are shared globally, selectors must work for ALL users:',
151
+ ' - Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
152
+ ' - NEVER use aria-label with localized/translated text (e.g. aria-label="Søk", aria-label="Suche")',
153
+ ' - If aria-label is the only option, use the English value only',
154
+ ' - WRONG: input[aria-label="Søk"] — this only works in Norwegian',
155
+ ' - RIGHT: input[name="search_query"], input#search, input[type="search"]',
156
+ '',
124
157
  'BEFORE SAYING YOU ARE DONE — run this checklist:',
125
158
  ' [ ] Did I call browser_fallback at any point? → If yes:',
126
159
  ' [ ] Did I contribute_create-config or identify the existing config ID?',
@@ -135,14 +168,29 @@ async function startProxy(options) {
135
168
  function getBrowserFallbackDefinition() {
136
169
  return {
137
170
  name: 'browser_fallback',
138
- description: [
139
- 'Access generic Playwright browser tools as a fallback when hub tools are insufficient.',
140
- 'Call without arguments to list all available tools.',
141
- 'Before calling an unfamiliar tool, use peek: true to inspect its full input schema first.',
142
- 'Common tools: browser_snapshot (see page accessibility tree), browser_click (click element by ref),',
143
- 'browser_fill_form (fill multiple fields), browser_type (type text),',
144
- 'browser_evaluate (run JS on page), browser_take_screenshot (capture page image).',
145
- ].join(' '),
171
+ description: `Access generic Playwright browser tools as a fallback when hub tools are insufficient.
172
+ Works in three modes:
173
+ - No arguments: lists all available Playwright tools
174
+ - peek: true: inspects a tool's full input schema before calling it
175
+ - tool + arguments: executes a Playwright tool (e.g. browser_click, browser_snapshot)
176
+ <important>
177
+ All element-targeting tools use "ref" values from browser_snapshot (e.g., "e12", "e37"), NOT CSS selectors.
178
+ Always take a browser_snapshot first to get element refs, then use those refs in tool calls.
179
+ If you get a validation error, the correct schema will be included in the error response.
180
+ </important>
181
+ <tool-schemas>
182
+ Common tools — use EXACTLY these argument shapes:
183
+
184
+ browser_click: { "ref": "e12" } — ref from snapshot, NOT a selector
185
+ browser_type: { "ref": "e12", "text": "hello" } — ref from snapshot + text to type
186
+ browser_press_key: { "key": "Enter" } — key name
187
+ browser_hover: { "ref": "e12" } — ref from snapshot
188
+ browser_select_option: { "ref": "e12", "values": ["opt1"] } — ref + values array
189
+ browser_fill_form: { "fields": [{"ref":"e12","value":"hi"},{"ref":"e15","value":"there"}] } — array of {ref, value} objects
190
+
191
+ WRONG: { "selector": "...", "text": "..." } — never use "selector", always use "ref"
192
+ WRONG: { "fields": {"search": "..."} } — fields is an ARRAY of {ref, value}, not an object
193
+ </tool-schemas>`,
146
194
  inputSchema: {
147
195
  type: 'object',
148
196
  properties: {
@@ -156,7 +204,7 @@ async function startProxy(options) {
156
204
  },
157
205
  arguments: {
158
206
  type: 'object',
159
- description: 'Arguments for the Playwright tool.',
207
+ description: 'Arguments for the Playwright tool. Use ref values from browser_snapshot for element targeting.',
160
208
  additionalProperties: true,
161
209
  },
162
210
  },
@@ -165,18 +213,25 @@ async function startProxy(options) {
165
213
  }
166
214
 
167
215
  // --- 5. Handle tools/list — minimal tool set ---
216
+ // Expose browser_navigate and browser_press_key directly from upstream.
217
+ // browser_press_key is first-class because it's essential for submitting
218
+ // after hub fill tools (e.g. fill-search → press Enter) without needing
219
+ // fragile CSS selectors for submit buttons.
220
+ const FIRST_CLASS_UPSTREAM = ['browser_navigate', 'browser_press_key'];
221
+
168
222
  proxyServer.setRequestHandler(ListToolsRequestSchema, async () => {
169
223
  const upstreamTools = await getUpstreamTools();
170
224
 
171
- // Only expose browser_navigate directly from upstream
172
- const navigate = upstreamTools.find(t => t.name === 'browser_navigate');
225
+ const firstClassTools = FIRST_CLASS_UPSTREAM
226
+ .map(name => upstreamTools.find(t => t.name === name))
227
+ .filter(Boolean);
173
228
 
174
229
  const hubExecute = noHub ? [] : [getHubExecuteToolDefinition()];
175
230
  const writeTools = noHub ? [] : getHubWriteToolDefinitions();
176
231
 
177
232
  return {
178
233
  tools: [
179
- ...(navigate ? [navigate] : []),
234
+ ...firstClassTools,
180
235
  ...hubExecute,
181
236
  getBrowserFallbackDefinition(),
182
237
  ...writeTools,
@@ -267,6 +322,22 @@ async function startProxy(options) {
267
322
  // Proxy to upstream
268
323
  const result = await upstreamClient.callTool({ name: innerTool, arguments: innerArgs });
269
324
 
325
+ // Auto-peek on validation error: if the upstream returned a schema validation error
326
+ // (invalid_type, unrecognized_keys, etc.), automatically append the correct schema
327
+ // so the agent can self-correct without an extra round-trip.
328
+ if (result.isError || result.content?.some(c => c.type === 'text' && c.text && (
329
+ c.text.includes('invalid_type') || c.text.includes('unrecognized_keys') || c.text.includes('invalid_union')
330
+ ))) {
331
+ const tools = await getUpstreamTools();
332
+ const match = tools.find(t => t.name === innerTool);
333
+ if (match) {
334
+ result.content.push({
335
+ type: 'text',
336
+ text: `\n<correct-schema>\nThe call to ${innerTool} failed due to invalid arguments. Here is the correct schema:\n\n${JSON.stringify(match.inputSchema, null, 2)}\n\nDescription: ${match.description || '(none)'}\n\nRetry with the correct argument format.\n</correct-schema>`,
337
+ });
338
+ }
339
+ }
340
+
270
341
  // After browser_snapshot, check whether the page URL has changed since our last hub lookup.
271
342
  // This catches SPA client-side redirects (e.g. x.com → x.com/home) that complete AFTER
272
343
  // page.goto() returns, so they are invisible to handleNavigate's redirect detection.