npm - moltbrowser-mcp-server - Versions diffs - 1.0.1 → 1.1.0 - Mend

moltbrowser-mcp-server 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +9 -6
package/hub-cli.js +1 -1
package/package.json +1 -1
package/src/execution-translator.js +128 -27
package/src/hub-tools.js +124 -16
package/src/proxy-server.js +89 -18

package/README.md CHANGED Viewed

@@ -27,7 +27,7 @@ MoltBrowser-MCP fixes that. When an agent lands on x.com it gets `hub_post-tweet
   "mcpServers": {
     "moltbrowser-mcp": {
       "command": "npx",
-      "args": ["moltbrowser-mcp"],
+      "args": ["moltbrowser-mcp-server"],
       "env": {
         "HUB_API_KEY": "whub_your_api_key"
       }
@@ -76,7 +76,8 @@ These tools are always available when hub integration is enabled:
 | `contribute_delete-tool` | Delete a tool from a hub config (requires `HUB_API_KEY`) |
 | `contribute_vote-on-tool` | Upvote or downvote a tool to signal quality (requires `HUB_API_KEY`) |
-### Configuration
+<details>
+<summary>Configuration</summary>
 All standard browser automation options are supported:
@@ -129,6 +130,8 @@ All standard browser automation options are supported:
 <!--- End of options generated section -->
+</details>
 <details>
 <summary><b>Advanced configuration</b></summary>
@@ -165,7 +168,7 @@ state [here](https://playwright.dev/docs/auth).
     "playwright": {
       "command": "npx",
       "args": [
-        "moltbrowser-mcp",
+        "moltbrowser-mcp-server",
         "--isolated",
         "--storage-state={path/to/storage.json}"
       ]
@@ -209,7 +212,7 @@ The server can be configured using a JSON configuration file. You can specify th
 using the `--config` command line option:
 ```bash
-npx moltbrowser-mcp --config path/to/config.json
+npx moltbrowser-mcp-server --config path/to/config.json
 ```
 <details>
@@ -439,7 +442,7 @@ When running headed browser on system w/o display or from worker processes of th
 run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable HTTP transport.
 ```bash
-npx moltbrowser-mcp --port 8931
+npx moltbrowser-mcp-server --port 8931
 ```
 And then in MCP client config, set the `url` to the HTTP endpoint:
@@ -462,7 +465,7 @@ And then in MCP client config, set the `url` to the HTTP endpoint:
 ```js
 import http from 'http';
-import { createConnection } from 'moltbrowser-mcp';
+import { createConnection } from 'moltbrowser-mcp-server';
 import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
 http.createServer(async (req, res) => {

package/hub-cli.js CHANGED Viewed

@@ -6,7 +6,7 @@
  * with WebMCP Hub integration for dynamic, per-site tools.
  *
  * Usage:
- *   npx moltbrowser-mcp [options]
+ *   npx moltbrowser-mcp-server [options]
  *
  * Hub options:
  *   --hub-url=<url>        Override hub URL (default: https://webmcp-hub.com)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "moltbrowser-mcp-server",
-  "version": "1.0.1",
+  "version": "1.1.0",
   "description": "Playwright MCP with WebMCP Hub integration — dynamic, per-site tools for browser agents",
   "repository": {
     "type": "git",

package/src/execution-translator.js CHANGED Viewed

@@ -78,6 +78,65 @@ function isNativeFillType(type) {
   return !type || type === 'text' || type === 'textarea' || type === 'number' || type === 'date';
 }
+// --- Shadow DOM fallback generator ---
+/**
+ * Wrap a Playwright locator call with a try/catch that falls back to
+ * page.evaluate() with deepQuery when the element is inside Shadow DOM.
+ * Playwright's page.locator() can't pierce shadow roots with plain CSS
+ * selectors, so we try native Playwright first (trusted events, framework
+ * compatible) and fall back to deepQuery (shadow-piercing).
+ *
+ * @param {string} playwrightLine - The `await page.locator(...)...` code
+ * @param {string} fallbackBody - JS code to run inside page.evaluate() on failure
+ * @returns {string} try/catch code block
+ */
+// Short timeout for the Playwright try path — if the element is in Shadow DOM,
+// page.locator() won't find it. 3s is plenty for a non-Shadow element to appear;
+// the default 30s would waste time before the fallback kicks in.
+const SHADOW_TRY_TIMEOUT = 3000;
+function withShadowFallback(playwrightLine, fallbackBody) {
+  // Inject timeout into Playwright locator calls so the fallback kicks in fast.
+  // Matches .click(), .press(...), .fill(...), .check(), .uncheck(), .selectOption(...)
+  // and adds { timeout: SHADOW_TRY_TIMEOUT } as the last argument.
+  const timedLine = playwrightLine.replace(
+    /\.(click|press|fill|check|uncheck|selectOption)\(([^)]*)\)/,
+    (_, method, args) => {
+      const timeout = `{ timeout: ${SHADOW_TRY_TIMEOUT} }`;
+      return args.trim() ? `.${method}(${args}, ${timeout})` : `.${method}(${timeout})`;
+    }
+  );
+  return [
+    `try {`,
+    `  ${timedLine}`,
+    `} catch {`,
+    `  await page.evaluate(() => { ${DEEP_QUERY_FNS} ${fallbackBody} });`,
+    `}`,
+  ].join('\n');
+}
+/**
+ * Shadow DOM fallback for text input: focus via deepQuery, then type with
+ * Playwright's keyboard API. This produces trusted InputEvents that
+ * framework-controlled inputs (React, Polymer/Lit web components) respond to,
+ * unlike setting .value directly which bypasses their event systems.
+ *
+ * @param {string} sel - CSS selector for the input element
+ * @param {string} value - Text to type
+ * @returns {string} try/catch code block
+ */
+function withShadowFillFallback(sel, value) {
+  return [
+    `try {`,
+    `  await page.locator(${quote(sel)}).fill(${quote(value)}, { timeout: ${SHADOW_TRY_TIMEOUT} });`,
+    `} catch {`,
+    `  await page.evaluate(() => { ${DEEP_QUERY_FNS} const _el = deepQuery(${qs(sel)}); if (_el) { _el.focus(); _el.value = ''; _el.dispatchEvent(new Event('input', { bubbles: true })); } });`,
+    `  await page.keyboard.type(${quote(value)});`,
+    `}`,
+  ].join('\n');
+}
 // --- Main entry point ---
 /**
@@ -139,26 +198,38 @@ function translateSimple(execution, args) {
         : null;
       const sel = lastField ? lastField.selector : execution.selector;
+      // Use Playwright's native .press('Enter') for trusted keyboard events.
+      // Falls back to deepQuery + dispatchEvent for Shadow DOM elements.
+      flushBatch();
       if (isPlaywrightSelector(sel)) {
-        flushBatch();
         phases.push(`await page.locator(${quote(sel)}).press('Enter');`);
       } else {
-        batch.push(
-          `{ const _el = deepQuery(${qs(sel)});`,
-          `  if (_el) {`,
-          `    _el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));`,
-          `    _el.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', bubbles: true }));`,
-          `    _el.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));`,
-          `    const _form = _el.closest('form');`,
-          `    if (_form) { _form.requestSubmit ? _form.requestSubmit() : _form.submit(); }`,
-          `  }`,
+        const enterFallback = [
+          `const _el = deepQuery(${qs(sel)});`,
+          `if (_el) {`,
+          `  _el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));`,
+          `  _el.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', bubbles: true }));`,
+          `  _el.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));`,
+          `  const _form = _el.closest('form');`,
+          `  if (_form) { _form.requestSubmit ? _form.requestSubmit() : _form.submit(); }`,
           `}`,
-        );
+        ].join(' ');
+        phases.push(withShadowFallback(
+          `await page.locator(${quote(sel)}).press('Enter');`,
+          enterFallback,
+        ));
       }
     } else {
       const submitSel = execution.submitSelector || `${execution.selector} [type="submit"], ${execution.selector} button`;
       flushBatch();
-      phases.push(`await page.locator(${quote(submitSel)}).first().click();`);
+      if (isPlaywrightSelector(submitSel)) {
+        phases.push(`await page.locator(${quote(submitSel)}).first().click();`);
+      } else {
+        phases.push(withShadowFallback(
+          `await page.locator(${quote(submitSel)}).first().click();`,
+          `const _el = deepQuery(${qs(submitSel)}); if (_el) _el.click();`,
+        ));
+      }
     }
   }
@@ -204,14 +275,25 @@ function translateSteps(execution, args, opts = {}) {
       case 'click':
         if (selector) {
           flushBatch();
-          phases.push(`await page.locator(${quote(selector)}).first().click();`);
+          if (isPlaywrightSelector(selector)) {
+            phases.push(`await page.locator(${quote(selector)}).first().click();`);
+          } else {
+            phases.push(withShadowFallback(
+              `await page.locator(${quote(selector)}).first().click();`,
+              `const _el = deepQuery(${qs(selector)}); if (_el) _el.click();`,
+            ));
+          }
         }
         break;
       case 'fill':
         if (selector && value !== null) {
           flushBatch();
-          phases.push(`await page.locator(${quote(selector)}).first().fill(${quote(value)});`);
+          if (isPlaywrightSelector(selector)) {
+            phases.push(`await page.locator(${quote(selector)}).first().fill(${quote(value)});`);
+          } else {
+            phases.push(withShadowFillFallback(selector, value));
+          }
         }
         break;
@@ -382,21 +464,34 @@ function domFieldAction(field, value) {
 }
 /**
- * Generate Playwright API lines for filling a field with Playwright-specific selectors.
- * Returns an array of code lines (each is a standalone statement).
+ * Generate Playwright API lines for filling a field.
+ * For Playwright-specific selectors, uses direct locator calls.
+ * For plain CSS selectors, wraps in try/catch with deepQuery fallback
+ * to handle elements inside Shadow DOM.
  */
 function playwrightFieldAction(field, value) {
   const sel = field.selector;
+  const pw = isPlaywrightSelector(sel);
   switch (field.type) {
-    case 'select':
-      return [`await page.locator(${quote(sel)}).selectOption(${quote(String(value))});`];
+    case 'select': {
+      const line = `await page.locator(${quote(sel)}).selectOption(${quote(String(value))});`;
+      if (pw) return [line];
+      return [withShadowFallback(line,
+        `const _el = deepQuery(${qs(sel)}); if (_el) { _el.value = ${qs(String(value))}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
+      )];
+    }
-    case 'checkbox':
-      if (value === true || value === 'true' || value === 'on') {
-        return [`await page.locator(${quote(sel)}).check();`];
-      }
-      return [`await page.locator(${quote(sel)}).uncheck();`];
+    case 'checkbox': {
+      const checked = value === true || value === 'true' || value === 'on';
+      const line = checked
+        ? `await page.locator(${quote(sel)}).check();`
+        : `await page.locator(${quote(sel)}).uncheck();`;
+      if (pw) return [line];
+      return [withShadowFallback(line,
+        `const _el = deepQuery(${qs(sel)}); if (_el) { _el.checked = ${checked}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
+      )];
+    }
     case 'radio': {
       let radioSel = sel + `[value="${value}"]`;
@@ -404,11 +499,17 @@ function playwrightFieldAction(field, value) {
         const option = field.options.find(o => o.value === String(value));
         if (option && option.selector) radioSel = option.selector;
       }
-      return [`await page.locator(${quote(radioSel)}).click();`];
+      const line = `await page.locator(${quote(radioSel)}).click();`;
+      if (pw || isPlaywrightSelector(radioSel)) return [line];
+      return [withShadowFallback(line,
+        `const _el = deepQuery(${qs(radioSel)}); if (_el) { _el.checked = true; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
+      )];
     }
-    default: // text, number, textarea, date, hidden
-      return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
+    default: { // text, number, textarea, date, hidden
+      if (pw) return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
+      return [withShadowFillFallback(sel, String(value))];
+    }
   }
 }
@@ -445,7 +546,7 @@ function addResultWait(phases, execution) {
  */
 function addExtraction(phases, selector, extractMode, attribute) {
   if (!selector) {
-    phases.push(`return '[action ran — no result selector configured]';`);
+    phases.push(`return '[action completed successfully]';`);
     return;
   }
   addStepExtraction(phases, selector, extractMode, attribute);

package/src/hub-tools.js CHANGED Viewed

@@ -102,6 +102,8 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
     };
   }
+  const toolDisplayName = hubEntry.tool.name.replace(/^hub_/, '');
   try {
     const result = await upstreamClient.callTool({
       name: 'browser_run_code',
@@ -113,18 +115,44 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
       return {
         content: [{
           type: 'text',
-          text: `Hub tool "${hubEntry.tool.name}" failed:\n${errorText}\n\nUse browser_fallback to access generic Playwright tools.`,
+          text: `Hub tool "${toolDisplayName}" failed:\n${errorText}\n\nUse browser_fallback to access generic Playwright tools.`,
+        }, {
+          type: 'text',
+          text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
         }],
         isError: true,
       };
     }
+    // Check if the result indicates a genuine failure.
+    // "[action completed successfully]" is the normal output for action-only tools (no resultSelector) — that's fine.
+    // "[resultSelector matched no elements" means extraction was configured but found nothing — that's a real problem.
+    const resultText = result.content?.map(c => c.text || '').join('\n') || '';
+    const looksLikeBroken = resultText.includes('[resultSelector matched no elements');
+    if (result.content) {
+      if (looksLikeBroken) {
+        result.content.push({
+          type: 'text',
+          text: `\n<vote-hint>This hub tool ran but its result selector matched nothing — the selectors may be broken. Downvote it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
+        });
+      } else {
+        result.content.push({
+          type: 'text',
+          text: `\n<vote-hint>This hub tool worked. Upvote it so other agents prefer it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: 1 })</vote-hint>`,
+        });
+      }
+    }
     return result;
   } catch (err) {
     return {
       content: [{
         type: 'text',
-        text: `Hub tool "${hubEntry.tool.name}" failed: ${err.message}\n\nUse browser_fallback to access generic Playwright tools.`,
+        text: `Hub tool "${toolDisplayName}" failed: ${err.message}\n\nUse browser_fallback to access generic Playwright tools.`,
+      }, {
+        type: 'text',
+        text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
       }],
       isError: true,
     };
@@ -220,27 +248,33 @@ const hubWriteTools = [
       '  steps: [{ action: "click", selector: "[data-testid=tweetButtonInline]" }]',
       '})',
       '',
-      'EXAMPLE — search form (fill + submit is still atomic enough):',
+      'EXAMPLE — fill a search field (submit is handled by browser_press_key, not this tool):',
       'contribute_add-tool({',
       '  configId: "abc123",',
-      '  name: "search-products",',
-      '  description: "Search products by keyword",',
+      '  name: "fill-search",',
+      '  description: "Fill the search input field with a query. After calling this, use browser_press_key({ key: \'Enter\' }) to submit.",',
       '  selector: "#searchForm",',
-      '  autosubmit: true,',
-      '  submitSelector: "#searchBtn",',
-      '  submitAction: "click",',
-      '  fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }],',
-      '  resultSelector: ".results li",',
-      '  resultExtract: "list"',
+      '  fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }]',
       '})',
+      '→ Then the agent calls browser_press_key({ key: "Enter" }) to submit — no CSS selector needed for the button.',
       '',
       'KEY RULES:',
+      '- SELECTORS MUST BE LOCALE-INDEPENDENT. Configs are shared globally — selectors with localized text break for other users.',
+      '  Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
+      '  NEVER use aria-label with translated text (e.g. aria-label="Søk", aria-label="Suche", aria-label="Rechercher").',
+      '  If aria-label is the only option, use the English value only.',
+      '  WRONG: input[aria-label="Søk"]  — only works in Norwegian',
+      '  RIGHT: input[name="search_query"], input#search, input[type="search"]',
       '- Tools must be GENERAL, not hardcoded to a specific instance or position. WRONG: "like-first-post" (hardcoded to first). RIGHT: "like-post" with a parameter that identifies which post (e.g. postIndex: number, or postText: string used in a :has-text selector). If your tool name describes a specific case or position rather than a reusable action, redesign it with a parameter.',
-      '- Prefer small, single-action tools over multi-step workflows',
-      '- For multi-step interactions, create one tool per action (click-compose, fill-text, click-submit) — the calling agent will chain them',
-      '- Click tools use steps: [{ action: "click", selector: "..." }] — do NOT use autosubmit: true for standalone buttons',
-      '- Fill tools need: selector + one field entry',
-      '- Tool names must be kebab-case with a verb: "get-posts", "click-compose-button", "fill-tweet-text", "search-products"',
+      '- ONE ACTION PER TOOL. Each tool does exactly ONE thing. NEVER combine fill + submit in one tool.',
+      '  A fill tool ONLY fills a field (no autosubmit, no submitSelector, no steps with clicks).',
+      '  For submit/search: the agent calls browser_press_key({ key: "Enter" }) after the fill tool — no button selector needed.',
+      '  WRONG: "search-videos" with fields + autosubmit — combines fill and submit.',
+      '  WRONG: "click-search" — fragile, requires finding a submit button selector.',
+      '  RIGHT: "fill-search" (fields only) → agent uses browser_press_key({ key: "Enter" }) to submit.',
+      '- Do NOT create click-submit or click-search tools. Use browser_press_key instead.',
+      '- Fill tools need: selector + one field entry. No autosubmit, no submitSelector, no submitAction.',
+      '- Tool names must be kebab-case with a verb: "get-posts", "click-compose-button", "fill-search"',
       '- Read-only tools only need: selector, resultSelector, resultExtract. No autosubmit, no fields.',
       '- Use fields[] for form inputs — each field\'s name becomes a tool parameter automatically',
       '- resultExtract options: text, html, attribute, list, table',
@@ -452,6 +486,56 @@ const hubWriteTools = [
 const VALID_RESULT_EXTRACTS = new Set(['text', 'html', 'attribute', 'list', 'table']);
 const VALID_STEP_ACTIONS = new Set(['navigate', 'click', 'fill', 'select', 'wait', 'extract', 'scroll', 'condition', 'evaluate']);
+/**
+ * Detect localized (non-ASCII) text inside aria-label selectors.
+ * Returns an array of { selector, match } objects for each violation found.
+ *
+ * Matches patterns like: aria-label="Søk", aria-label='Rechercher', aria-label="Suche"
+ * Flags any aria-label value containing non-ASCII characters (accented, CJK, Cyrillic, etc.)
+ */
+// eslint-disable-next-line no-control-regex
+const ARIA_LABEL_RE = /aria-label\s*=\s*["']([^"']+)["']/gi;
+const NON_ASCII_RE = /[^\x00-\x7F]/;
+function findLocalizedSelectors(args) {
+  const violations = [];
+  // Collect all selector strings from the flat args
+  const selectorSources = [];
+  if (args.selector) selectorSources.push({ path: 'selector', value: args.selector });
+  if (args.submitSelector) selectorSources.push({ path: 'submitSelector', value: args.submitSelector });
+  if (args.resultSelector) selectorSources.push({ path: 'resultSelector', value: args.resultSelector });
+  if (args.resultWaitSelector) selectorSources.push({ path: 'resultWaitSelector', value: args.resultWaitSelector });
+  if (Array.isArray(args.fields)) {
+    for (let i = 0; i < args.fields.length; i++) {
+      if (args.fields[i].selector) {
+        selectorSources.push({ path: `fields[${i}].selector`, value: args.fields[i].selector });
+      }
+    }
+  }
+  if (Array.isArray(args.steps)) {
+    for (let i = 0; i < args.steps.length; i++) {
+      if (args.steps[i].selector) {
+        selectorSources.push({ path: `steps[${i}].selector`, value: args.steps[i].selector });
+      }
+    }
+  }
+  for (const { path, value } of selectorSources) {
+    ARIA_LABEL_RE.lastIndex = 0;
+    let m;
+    while ((m = ARIA_LABEL_RE.exec(value)) !== null) {
+      if (NON_ASCII_RE.test(m[1])) {
+        violations.push({ path, selector: value, label: m[1] });
+      }
+    }
+  }
+  return violations;
+}
 /**
  * Validate that each step has the fields required for its action type.
  * Returns an array of human-readable error strings with exact paths.
@@ -714,6 +798,18 @@ async function handleHubWriteTool(toolName, args) {
         };
       }
+      // Check for localized aria-label selectors
+      const localizedViolations = findLocalizedSelectors(args);
+      if (localizedViolations.length > 0) {
+        const details = localizedViolations.map(v =>
+          `- ${v.path}: aria-label="${v.label}" contains localized text`
+        ).join('\n');
+        return {
+          content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
+          isError: true,
+        };
+      }
       // Build inputSchema and execution from flat fields
       const inputSchema = buildInputSchema(args);
       const execution = buildExecution(args);
@@ -812,6 +908,18 @@ async function handleHubWriteTool(toolName, args) {
         };
       }
+      // Check for localized aria-label selectors
+      const localizedViolations = findLocalizedSelectors(args);
+      if (localizedViolations.length > 0) {
+        const details = localizedViolations.map(v =>
+          `- ${v.path}: aria-label="${v.label}" contains localized text`
+        ).join('\n');
+        return {
+          content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
+          isError: true,
+        };
+      }
       // Build new inputSchema and execution from flat fields
       const inputSchema = buildInputSchema(args);
       const execution = buildExecution(args);

package/src/proxy-server.js CHANGED Viewed

@@ -88,9 +88,10 @@ async function startProxy(options) {
         '<workflow>',
         '1. Navigate with browser_navigate. Hub tools are discovered automatically.',
         '2. If hub tools are listed in the response, use hub_execute to run them. ALWAYS prefer hub tools.',
-        '3. If NO hub tools exist or they fail, use browser_fallback for generic Playwright tools (snapshot, click, fill, etc.). Call browser_fallback without arguments to list available tools.',
-        '4. NEVER call browser tools in parallel — always sequentially, one at a time.',
-        '5. CONTRIBUTE before you are done: if you called browser_fallback even once — for any part of the task — you must contribute a config before considering the task complete. This is not optional. See <contribution> below.',
+        '3. After a hub fill tool, use browser_press_key({ key: "Enter" }) to submit. This is more reliable than clicking a submit button.',
+        '4. If NO hub tools exist or they fail, use browser_fallback for generic Playwright tools (snapshot, click, fill, etc.). Call browser_fallback without arguments to list available tools.',
+        '5. NEVER call browser tools in parallel — always sequentially, one at a time.',
+        '6. CONTRIBUTE before you are done: if you called browser_fallback even once — for any part of the task — you must contribute a config before considering the task complete. This is not optional. See <contribution> below.',
         '</workflow>',
         '',
         '<contribution>',
@@ -99,13 +100,32 @@ async function startProxy(options) {
         '',
         'Contribute ONLY when ALL of these are true:',
         '1. You called browser_fallback at least once (even for a single step)',
-        '2. You used browser_snapshot and discovered real CSS selectors on the page',
-        '3. You tested those selectors and they worked',
+        '2. You inspected the DOM to discover real CSS selectors (see <finding-selectors> below)',
+        '3. You verified each selector by testing it on the page',
         '   If you used a condition step, verify the selector in EACH branch separately with',
         '   browser_snapshot — never assume two contexts (e.g. dialog vs. inline) share the same test IDs.',
         '',
         'NEVER contribute if you have not explored the page. A config without real CSS selectors is useless.',
         '',
+        '<finding-selectors>',
+        'browser_snapshot returns an accessibility tree with refs (e.g. "e12"), NOT CSS selectors.',
+        'You MUST inspect the actual DOM to find real CSS selectors. Do NOT guess selectors from the snapshot.',
+        '',
+        'To find a CSS selector for an element you interacted with:',
+        '1. Use browser_evaluate to inspect the element:',
+        '   browser_fallback({ tool: "browser_evaluate", arguments: {',
+        '     expression: "document.querySelector(\'input[name=search_query]\')?.tagName"',
+        '   }})',
+        '2. Or inspect multiple attributes at once:',
+        '   browser_fallback({ tool: "browser_evaluate", arguments: {',
+        '     expression: "JSON.stringify([...document.querySelectorAll(\'input\')].map(e => ({ tag: e.tagName, id: e.id, name: e.name, type: e.type, placeholder: e.placeholder })))"',
+        '   }})',
+        '3. Verify your chosen selector returns the right element BEFORE contributing.',
+        '',
+        'NEVER fabricate selectors like "input#search" without verifying. On YouTube, #search is a <div>,',
+        'not an <input>. The actual input is input[name="search_query"]. Always check the DOM.',
+        '</finding-selectors>',
+        '',
         'How to contribute:',
         '- No hub config exists yet → contribute_create-config(...) then contribute_add-tool(...) for each tool',
         '- Hub config already exists → contribute_add-tool(...) with the config ID shown in the navigation response. Do NOT create a new config.',
@@ -118,9 +138,22 @@ async function startProxy(options) {
         '   - "example.com" ONLY for truly site-wide tools (navigation, global search)',
         'contribute_add-tool({ configId, name, description, selector, ... }) → adds one tool',
         '   Always add read-only extraction tools first (get-posts, get-content, list-items).',
-        '   Create small, single-action tools — NOT multi-step workflows.',
+        '   ONE ACTION PER TOOL. Each tool does exactly ONE thing:',
+        '     - A fill tool ONLY fills a field (no submit, no autosubmit)',
+        '     - A click tool ONLY clicks a button',
+        '     - For search/submit: create a fill tool, then the agent uses browser_press_key({ key: "Enter" }) to submit',
+        '   NEVER combine fill + submit in one tool. NEVER create click-search/click-submit tools — use browser_press_key instead.',
+        '   WRONG: "search-videos" that fills AND submits. WRONG: "click-search" (fragile button selector).',
+        '   RIGHT: "fill-search" (fill only) → agent calls browser_press_key({ key: "Enter" }) to submit.',
         '   Shadow DOM is fully supported — selectors targeting web components work transparently.',
         '',
+        '   SELECTOR RULES — configs are shared globally, selectors must work for ALL users:',
+        '   - Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
+        '   - NEVER use aria-label with localized/translated text (e.g. aria-label="Søk", aria-label="Suche")',
+        '   - If aria-label is the only option, use the English value only',
+        '   - WRONG: input[aria-label="Søk"] — this only works in Norwegian',
+        '   - RIGHT: input[name="search_query"], input#search, input[type="search"]',
+        '',
         'BEFORE SAYING YOU ARE DONE — run this checklist:',
         '  [ ] Did I call browser_fallback at any point? → If yes:',
         '  [ ] Did I contribute_create-config or identify the existing config ID?',
@@ -135,14 +168,29 @@ async function startProxy(options) {
   function getBrowserFallbackDefinition() {
     return {
       name: 'browser_fallback',
-      description: [
-        'Access generic Playwright browser tools as a fallback when hub tools are insufficient.',
-        'Call without arguments to list all available tools.',
-        'Before calling an unfamiliar tool, use peek: true to inspect its full input schema first.',
-        'Common tools: browser_snapshot (see page accessibility tree), browser_click (click element by ref),',
-        'browser_fill_form (fill multiple fields), browser_type (type text),',
-        'browser_evaluate (run JS on page), browser_take_screenshot (capture page image).',
-      ].join(' '),
+      description: `Access generic Playwright browser tools as a fallback when hub tools are insufficient.
+Works in three modes:
+- No arguments: lists all available Playwright tools
+- peek: true: inspects a tool's full input schema before calling it
+- tool + arguments: executes a Playwright tool (e.g. browser_click, browser_snapshot)
+<important>
+All element-targeting tools use "ref" values from browser_snapshot (e.g., "e12", "e37"), NOT CSS selectors.
+Always take a browser_snapshot first to get element refs, then use those refs in tool calls.
+If you get a validation error, the correct schema will be included in the error response.
+</important>
+<tool-schemas>
+Common tools — use EXACTLY these argument shapes:
+browser_click:         { "ref": "e12" }                         — ref from snapshot, NOT a selector
+browser_type:          { "ref": "e12", "text": "hello" }        — ref from snapshot + text to type
+browser_press_key:     { "key": "Enter" }                       — key name
+browser_hover:         { "ref": "e12" }                         — ref from snapshot
+browser_select_option: { "ref": "e12", "values": ["opt1"] }     — ref + values array
+browser_fill_form:     { "fields": [{"ref":"e12","value":"hi"},{"ref":"e15","value":"there"}] }  — array of {ref, value} objects
+WRONG: { "selector": "...", "text": "..." }   — never use "selector", always use "ref"
+WRONG: { "fields": {"search": "..."} }        — fields is an ARRAY of {ref, value}, not an object
+</tool-schemas>`,
       inputSchema: {
         type: 'object',
         properties: {
@@ -156,7 +204,7 @@ async function startProxy(options) {
           },
           arguments: {
             type: 'object',
-            description: 'Arguments for the Playwright tool.',
+            description: 'Arguments for the Playwright tool. Use ref values from browser_snapshot for element targeting.',
             additionalProperties: true,
           },
         },
@@ -165,18 +213,25 @@ async function startProxy(options) {
   }
   // --- 5. Handle tools/list — minimal tool set ---
+  // Expose browser_navigate and browser_press_key directly from upstream.
+  // browser_press_key is first-class because it's essential for submitting
+  // after hub fill tools (e.g. fill-search → press Enter) without needing
+  // fragile CSS selectors for submit buttons.
+  const FIRST_CLASS_UPSTREAM = ['browser_navigate', 'browser_press_key'];
   proxyServer.setRequestHandler(ListToolsRequestSchema, async () => {
     const upstreamTools = await getUpstreamTools();
-    // Only expose browser_navigate directly from upstream
-    const navigate = upstreamTools.find(t => t.name === 'browser_navigate');
+    const firstClassTools = FIRST_CLASS_UPSTREAM
+      .map(name => upstreamTools.find(t => t.name === name))
+      .filter(Boolean);
     const hubExecute = noHub ? [] : [getHubExecuteToolDefinition()];
     const writeTools = noHub ? [] : getHubWriteToolDefinitions();
     return {
       tools: [
-        ...(navigate ? [navigate] : []),
+        ...firstClassTools,
         ...hubExecute,
         getBrowserFallbackDefinition(),
         ...writeTools,
@@ -267,6 +322,22 @@ async function startProxy(options) {
     // Proxy to upstream
     const result = await upstreamClient.callTool({ name: innerTool, arguments: innerArgs });
+    // Auto-peek on validation error: if the upstream returned a schema validation error
+    // (invalid_type, unrecognized_keys, etc.), automatically append the correct schema
+    // so the agent can self-correct without an extra round-trip.
+    if (result.isError || result.content?.some(c => c.type === 'text' && c.text && (
+      c.text.includes('invalid_type') || c.text.includes('unrecognized_keys') || c.text.includes('invalid_union')
+    ))) {
+      const tools = await getUpstreamTools();
+      const match = tools.find(t => t.name === innerTool);
+      if (match) {
+        result.content.push({
+          type: 'text',
+          text: `\n<correct-schema>\nThe call to ${innerTool} failed due to invalid arguments. Here is the correct schema:\n\n${JSON.stringify(match.inputSchema, null, 2)}\n\nDescription: ${match.description || '(none)'}\n\nRetry with the correct argument format.\n</correct-schema>`,
+        });
+      }
+    }
     // After browser_snapshot, check whether the page URL has changed since our last hub lookup.
     // This catches SPA client-side redirects (e.g. x.com → x.com/home) that complete AFTER
     // page.goto() returns, so they are invisible to handleNavigate's redirect detection.