moltbrowser-mcp-server 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -6
- package/hub-cli.js +1 -1
- package/package.json +1 -1
- package/src/execution-translator.js +128 -27
- package/src/hub-tools.js +124 -16
- package/src/proxy-server.js +89 -18
package/README.md
CHANGED
|
@@ -27,7 +27,7 @@ MoltBrowser-MCP fixes that. When an agent lands on x.com it gets `hub_post-tweet
|
|
|
27
27
|
"mcpServers": {
|
|
28
28
|
"moltbrowser-mcp": {
|
|
29
29
|
"command": "npx",
|
|
30
|
-
"args": ["moltbrowser-mcp"],
|
|
30
|
+
"args": ["moltbrowser-mcp-server"],
|
|
31
31
|
"env": {
|
|
32
32
|
"HUB_API_KEY": "whub_your_api_key"
|
|
33
33
|
}
|
|
@@ -76,7 +76,8 @@ These tools are always available when hub integration is enabled:
|
|
|
76
76
|
| `contribute_delete-tool` | Delete a tool from a hub config (requires `HUB_API_KEY`) |
|
|
77
77
|
| `contribute_vote-on-tool` | Upvote or downvote a tool to signal quality (requires `HUB_API_KEY`) |
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
<details>
|
|
80
|
+
<summary>Configuration</summary>
|
|
80
81
|
|
|
81
82
|
All standard browser automation options are supported:
|
|
82
83
|
|
|
@@ -129,6 +130,8 @@ All standard browser automation options are supported:
|
|
|
129
130
|
|
|
130
131
|
<!--- End of options generated section -->
|
|
131
132
|
|
|
133
|
+
</details>
|
|
134
|
+
|
|
132
135
|
<details>
|
|
133
136
|
<summary><b>Advanced configuration</b></summary>
|
|
134
137
|
|
|
@@ -165,7 +168,7 @@ state [here](https://playwright.dev/docs/auth).
|
|
|
165
168
|
"playwright": {
|
|
166
169
|
"command": "npx",
|
|
167
170
|
"args": [
|
|
168
|
-
"moltbrowser-mcp",
|
|
171
|
+
"moltbrowser-mcp-server",
|
|
169
172
|
"--isolated",
|
|
170
173
|
"--storage-state={path/to/storage.json}"
|
|
171
174
|
]
|
|
@@ -209,7 +212,7 @@ The server can be configured using a JSON configuration file. You can specify th
|
|
|
209
212
|
using the `--config` command line option:
|
|
210
213
|
|
|
211
214
|
```bash
|
|
212
|
-
npx moltbrowser-mcp --config path/to/config.json
|
|
215
|
+
npx moltbrowser-mcp-server --config path/to/config.json
|
|
213
216
|
```
|
|
214
217
|
|
|
215
218
|
<details>
|
|
@@ -439,7 +442,7 @@ When running headed browser on system w/o display or from worker processes of th
|
|
|
439
442
|
run the MCP server from environment with the DISPLAY and pass the `--port` flag to enable HTTP transport.
|
|
440
443
|
|
|
441
444
|
```bash
|
|
442
|
-
npx moltbrowser-mcp --port 8931
|
|
445
|
+
npx moltbrowser-mcp-server --port 8931
|
|
443
446
|
```
|
|
444
447
|
|
|
445
448
|
And then in MCP client config, set the `url` to the HTTP endpoint:
|
|
@@ -462,7 +465,7 @@ And then in MCP client config, set the `url` to the HTTP endpoint:
|
|
|
462
465
|
```js
|
|
463
466
|
import http from 'http';
|
|
464
467
|
|
|
465
|
-
import { createConnection } from 'moltbrowser-mcp';
|
|
468
|
+
import { createConnection } from 'moltbrowser-mcp-server';
|
|
466
469
|
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
|
467
470
|
|
|
468
471
|
http.createServer(async (req, res) => {
|
package/hub-cli.js
CHANGED
package/package.json
CHANGED
|
@@ -78,6 +78,65 @@ function isNativeFillType(type) {
|
|
|
78
78
|
return !type || type === 'text' || type === 'textarea' || type === 'number' || type === 'date';
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
// --- Shadow DOM fallback generator ---
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Wrap a Playwright locator call with a try/catch that falls back to
|
|
85
|
+
* page.evaluate() with deepQuery when the element is inside Shadow DOM.
|
|
86
|
+
* Playwright's page.locator() can't pierce shadow roots with plain CSS
|
|
87
|
+
* selectors, so we try native Playwright first (trusted events, framework
|
|
88
|
+
* compatible) and fall back to deepQuery (shadow-piercing).
|
|
89
|
+
*
|
|
90
|
+
* @param {string} playwrightLine - The `await page.locator(...)...` code
|
|
91
|
+
* @param {string} fallbackBody - JS code to run inside page.evaluate() on failure
|
|
92
|
+
* @returns {string} try/catch code block
|
|
93
|
+
*/
|
|
94
|
+
// Short timeout for the Playwright try path — if the element is in Shadow DOM,
|
|
95
|
+
// page.locator() won't find it. 3s is plenty for a non-Shadow element to appear;
|
|
96
|
+
// the default 30s would waste time before the fallback kicks in.
|
|
97
|
+
const SHADOW_TRY_TIMEOUT = 3000;
|
|
98
|
+
|
|
99
|
+
function withShadowFallback(playwrightLine, fallbackBody) {
|
|
100
|
+
// Inject timeout into Playwright locator calls so the fallback kicks in fast.
|
|
101
|
+
// Matches .click(), .press(...), .fill(...), .check(), .uncheck(), .selectOption(...)
|
|
102
|
+
// and adds { timeout: SHADOW_TRY_TIMEOUT } as the last argument.
|
|
103
|
+
const timedLine = playwrightLine.replace(
|
|
104
|
+
/\.(click|press|fill|check|uncheck|selectOption)\(([^)]*)\)/,
|
|
105
|
+
(_, method, args) => {
|
|
106
|
+
const timeout = `{ timeout: ${SHADOW_TRY_TIMEOUT} }`;
|
|
107
|
+
return args.trim() ? `.${method}(${args}, ${timeout})` : `.${method}(${timeout})`;
|
|
108
|
+
}
|
|
109
|
+
);
|
|
110
|
+
return [
|
|
111
|
+
`try {`,
|
|
112
|
+
` ${timedLine}`,
|
|
113
|
+
`} catch {`,
|
|
114
|
+
` await page.evaluate(() => { ${DEEP_QUERY_FNS} ${fallbackBody} });`,
|
|
115
|
+
`}`,
|
|
116
|
+
].join('\n');
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Shadow DOM fallback for text input: focus via deepQuery, then type with
|
|
121
|
+
* Playwright's keyboard API. This produces trusted InputEvents that
|
|
122
|
+
* framework-controlled inputs (React, Polymer/Lit web components) respond to,
|
|
123
|
+
* unlike setting .value directly which bypasses their event systems.
|
|
124
|
+
*
|
|
125
|
+
* @param {string} sel - CSS selector for the input element
|
|
126
|
+
* @param {string} value - Text to type
|
|
127
|
+
* @returns {string} try/catch code block
|
|
128
|
+
*/
|
|
129
|
+
function withShadowFillFallback(sel, value) {
|
|
130
|
+
return [
|
|
131
|
+
`try {`,
|
|
132
|
+
` await page.locator(${quote(sel)}).fill(${quote(value)}, { timeout: ${SHADOW_TRY_TIMEOUT} });`,
|
|
133
|
+
`} catch {`,
|
|
134
|
+
` await page.evaluate(() => { ${DEEP_QUERY_FNS} const _el = deepQuery(${qs(sel)}); if (_el) { _el.focus(); _el.value = ''; _el.dispatchEvent(new Event('input', { bubbles: true })); } });`,
|
|
135
|
+
` await page.keyboard.type(${quote(value)});`,
|
|
136
|
+
`}`,
|
|
137
|
+
].join('\n');
|
|
138
|
+
}
|
|
139
|
+
|
|
81
140
|
// --- Main entry point ---
|
|
82
141
|
|
|
83
142
|
/**
|
|
@@ -139,26 +198,38 @@ function translateSimple(execution, args) {
|
|
|
139
198
|
: null;
|
|
140
199
|
const sel = lastField ? lastField.selector : execution.selector;
|
|
141
200
|
|
|
201
|
+
// Use Playwright's native .press('Enter') for trusted keyboard events.
|
|
202
|
+
// Falls back to deepQuery + dispatchEvent for Shadow DOM elements.
|
|
203
|
+
flushBatch();
|
|
142
204
|
if (isPlaywrightSelector(sel)) {
|
|
143
|
-
flushBatch();
|
|
144
205
|
phases.push(`await page.locator(${quote(sel)}).press('Enter');`);
|
|
145
206
|
} else {
|
|
146
|
-
|
|
147
|
-
`
|
|
148
|
-
`
|
|
149
|
-
`
|
|
150
|
-
`
|
|
151
|
-
`
|
|
152
|
-
`
|
|
153
|
-
`
|
|
154
|
-
` }`,
|
|
207
|
+
const enterFallback = [
|
|
208
|
+
`const _el = deepQuery(${qs(sel)});`,
|
|
209
|
+
`if (_el) {`,
|
|
210
|
+
` _el.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));`,
|
|
211
|
+
` _el.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', bubbles: true }));`,
|
|
212
|
+
` _el.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));`,
|
|
213
|
+
` const _form = _el.closest('form');`,
|
|
214
|
+
` if (_form) { _form.requestSubmit ? _form.requestSubmit() : _form.submit(); }`,
|
|
155
215
|
`}`,
|
|
156
|
-
);
|
|
216
|
+
].join(' ');
|
|
217
|
+
phases.push(withShadowFallback(
|
|
218
|
+
`await page.locator(${quote(sel)}).press('Enter');`,
|
|
219
|
+
enterFallback,
|
|
220
|
+
));
|
|
157
221
|
}
|
|
158
222
|
} else {
|
|
159
223
|
const submitSel = execution.submitSelector || `${execution.selector} [type="submit"], ${execution.selector} button`;
|
|
160
224
|
flushBatch();
|
|
161
|
-
|
|
225
|
+
if (isPlaywrightSelector(submitSel)) {
|
|
226
|
+
phases.push(`await page.locator(${quote(submitSel)}).first().click();`);
|
|
227
|
+
} else {
|
|
228
|
+
phases.push(withShadowFallback(
|
|
229
|
+
`await page.locator(${quote(submitSel)}).first().click();`,
|
|
230
|
+
`const _el = deepQuery(${qs(submitSel)}); if (_el) _el.click();`,
|
|
231
|
+
));
|
|
232
|
+
}
|
|
162
233
|
}
|
|
163
234
|
}
|
|
164
235
|
|
|
@@ -204,14 +275,25 @@ function translateSteps(execution, args, opts = {}) {
|
|
|
204
275
|
case 'click':
|
|
205
276
|
if (selector) {
|
|
206
277
|
flushBatch();
|
|
207
|
-
|
|
278
|
+
if (isPlaywrightSelector(selector)) {
|
|
279
|
+
phases.push(`await page.locator(${quote(selector)}).first().click();`);
|
|
280
|
+
} else {
|
|
281
|
+
phases.push(withShadowFallback(
|
|
282
|
+
`await page.locator(${quote(selector)}).first().click();`,
|
|
283
|
+
`const _el = deepQuery(${qs(selector)}); if (_el) _el.click();`,
|
|
284
|
+
));
|
|
285
|
+
}
|
|
208
286
|
}
|
|
209
287
|
break;
|
|
210
288
|
|
|
211
289
|
case 'fill':
|
|
212
290
|
if (selector && value !== null) {
|
|
213
291
|
flushBatch();
|
|
214
|
-
|
|
292
|
+
if (isPlaywrightSelector(selector)) {
|
|
293
|
+
phases.push(`await page.locator(${quote(selector)}).first().fill(${quote(value)});`);
|
|
294
|
+
} else {
|
|
295
|
+
phases.push(withShadowFillFallback(selector, value));
|
|
296
|
+
}
|
|
215
297
|
}
|
|
216
298
|
break;
|
|
217
299
|
|
|
@@ -382,21 +464,34 @@ function domFieldAction(field, value) {
|
|
|
382
464
|
}
|
|
383
465
|
|
|
384
466
|
/**
|
|
385
|
-
* Generate Playwright API lines for filling a field
|
|
386
|
-
*
|
|
467
|
+
* Generate Playwright API lines for filling a field.
|
|
468
|
+
* For Playwright-specific selectors, uses direct locator calls.
|
|
469
|
+
* For plain CSS selectors, wraps in try/catch with deepQuery fallback
|
|
470
|
+
* to handle elements inside Shadow DOM.
|
|
387
471
|
*/
|
|
388
472
|
function playwrightFieldAction(field, value) {
|
|
389
473
|
const sel = field.selector;
|
|
474
|
+
const pw = isPlaywrightSelector(sel);
|
|
390
475
|
|
|
391
476
|
switch (field.type) {
|
|
392
|
-
case 'select':
|
|
393
|
-
|
|
477
|
+
case 'select': {
|
|
478
|
+
const line = `await page.locator(${quote(sel)}).selectOption(${quote(String(value))});`;
|
|
479
|
+
if (pw) return [line];
|
|
480
|
+
return [withShadowFallback(line,
|
|
481
|
+
`const _el = deepQuery(${qs(sel)}); if (_el) { _el.value = ${qs(String(value))}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
|
|
482
|
+
)];
|
|
483
|
+
}
|
|
394
484
|
|
|
395
|
-
case 'checkbox':
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
485
|
+
case 'checkbox': {
|
|
486
|
+
const checked = value === true || value === 'true' || value === 'on';
|
|
487
|
+
const line = checked
|
|
488
|
+
? `await page.locator(${quote(sel)}).check();`
|
|
489
|
+
: `await page.locator(${quote(sel)}).uncheck();`;
|
|
490
|
+
if (pw) return [line];
|
|
491
|
+
return [withShadowFallback(line,
|
|
492
|
+
`const _el = deepQuery(${qs(sel)}); if (_el) { _el.checked = ${checked}; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
|
|
493
|
+
)];
|
|
494
|
+
}
|
|
400
495
|
|
|
401
496
|
case 'radio': {
|
|
402
497
|
let radioSel = sel + `[value="${value}"]`;
|
|
@@ -404,11 +499,17 @@ function playwrightFieldAction(field, value) {
|
|
|
404
499
|
const option = field.options.find(o => o.value === String(value));
|
|
405
500
|
if (option && option.selector) radioSel = option.selector;
|
|
406
501
|
}
|
|
407
|
-
|
|
502
|
+
const line = `await page.locator(${quote(radioSel)}).click();`;
|
|
503
|
+
if (pw || isPlaywrightSelector(radioSel)) return [line];
|
|
504
|
+
return [withShadowFallback(line,
|
|
505
|
+
`const _el = deepQuery(${qs(radioSel)}); if (_el) { _el.checked = true; _el.dispatchEvent(new Event('change', { bubbles: true })); }`
|
|
506
|
+
)];
|
|
408
507
|
}
|
|
409
508
|
|
|
410
|
-
default: // text, number, textarea, date, hidden
|
|
411
|
-
return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
|
|
509
|
+
default: { // text, number, textarea, date, hidden
|
|
510
|
+
if (pw) return [`await page.locator(${quote(sel)}).fill(${quote(String(value))});`];
|
|
511
|
+
return [withShadowFillFallback(sel, String(value))];
|
|
512
|
+
}
|
|
412
513
|
}
|
|
413
514
|
}
|
|
414
515
|
|
|
@@ -445,7 +546,7 @@ function addResultWait(phases, execution) {
|
|
|
445
546
|
*/
|
|
446
547
|
function addExtraction(phases, selector, extractMode, attribute) {
|
|
447
548
|
if (!selector) {
|
|
448
|
-
phases.push(`return '[action
|
|
549
|
+
phases.push(`return '[action completed successfully]';`);
|
|
449
550
|
return;
|
|
450
551
|
}
|
|
451
552
|
addStepExtraction(phases, selector, extractMode, attribute);
|
package/src/hub-tools.js
CHANGED
|
@@ -102,6 +102,8 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
|
|
|
102
102
|
};
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
const toolDisplayName = hubEntry.tool.name.replace(/^hub_/, '');
|
|
106
|
+
|
|
105
107
|
try {
|
|
106
108
|
const result = await upstreamClient.callTool({
|
|
107
109
|
name: 'browser_run_code',
|
|
@@ -113,18 +115,44 @@ async function executeHubTool(upstreamClient, hubEntry, args) {
|
|
|
113
115
|
return {
|
|
114
116
|
content: [{
|
|
115
117
|
type: 'text',
|
|
116
|
-
text: `Hub tool "${
|
|
118
|
+
text: `Hub tool "${toolDisplayName}" failed:\n${errorText}\n\nUse browser_fallback to access generic Playwright tools.`,
|
|
119
|
+
}, {
|
|
120
|
+
type: 'text',
|
|
121
|
+
text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
|
|
117
122
|
}],
|
|
118
123
|
isError: true,
|
|
119
124
|
};
|
|
120
125
|
}
|
|
121
126
|
|
|
127
|
+
// Check if the result indicates a genuine failure.
|
|
128
|
+
// "[action completed successfully]" is the normal output for action-only tools (no resultSelector) — that's fine.
|
|
129
|
+
// "[resultSelector matched no elements" means extraction was configured but found nothing — that's a real problem.
|
|
130
|
+
const resultText = result.content?.map(c => c.text || '').join('\n') || '';
|
|
131
|
+
const looksLikeBroken = resultText.includes('[resultSelector matched no elements');
|
|
132
|
+
|
|
133
|
+
if (result.content) {
|
|
134
|
+
if (looksLikeBroken) {
|
|
135
|
+
result.content.push({
|
|
136
|
+
type: 'text',
|
|
137
|
+
text: `\n<vote-hint>This hub tool ran but its result selector matched nothing — the selectors may be broken. Downvote it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
|
|
138
|
+
});
|
|
139
|
+
} else {
|
|
140
|
+
result.content.push({
|
|
141
|
+
type: 'text',
|
|
142
|
+
text: `\n<vote-hint>This hub tool worked. Upvote it so other agents prefer it:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: 1 })</vote-hint>`,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
122
147
|
return result;
|
|
123
148
|
} catch (err) {
|
|
124
149
|
return {
|
|
125
150
|
content: [{
|
|
126
151
|
type: 'text',
|
|
127
|
-
text: `Hub tool "${
|
|
152
|
+
text: `Hub tool "${toolDisplayName}" failed: ${err.message}\n\nUse browser_fallback to access generic Playwright tools.`,
|
|
153
|
+
}, {
|
|
154
|
+
type: 'text',
|
|
155
|
+
text: `\n<vote-hint>This hub tool failed. Downvote it so other agents know it's broken:\ncontribute_vote-on-tool({ configId: "${hubEntry.configId}", toolName: "${toolDisplayName}", vote: -1 })</vote-hint>`,
|
|
128
156
|
}],
|
|
129
157
|
isError: true,
|
|
130
158
|
};
|
|
@@ -220,27 +248,33 @@ const hubWriteTools = [
|
|
|
220
248
|
' steps: [{ action: "click", selector: "[data-testid=tweetButtonInline]" }]',
|
|
221
249
|
'})',
|
|
222
250
|
'',
|
|
223
|
-
'EXAMPLE — search
|
|
251
|
+
'EXAMPLE — fill a search field (submit is handled by browser_press_key, not this tool):',
|
|
224
252
|
'contribute_add-tool({',
|
|
225
253
|
' configId: "abc123",',
|
|
226
|
-
' name: "search
|
|
227
|
-
' description: "
|
|
254
|
+
' name: "fill-search",',
|
|
255
|
+
' description: "Fill the search input field with a query. After calling this, use browser_press_key({ key: \'Enter\' }) to submit.",',
|
|
228
256
|
' selector: "#searchForm",',
|
|
229
|
-
'
|
|
230
|
-
' submitSelector: "#searchBtn",',
|
|
231
|
-
' submitAction: "click",',
|
|
232
|
-
' fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }],',
|
|
233
|
-
' resultSelector: ".results li",',
|
|
234
|
-
' resultExtract: "list"',
|
|
257
|
+
' fields: [{ type: "text", selector: "#searchInput", name: "query", description: "Search term" }]',
|
|
235
258
|
'})',
|
|
259
|
+
'→ Then the agent calls browser_press_key({ key: "Enter" }) to submit — no CSS selector needed for the button.',
|
|
236
260
|
'',
|
|
237
261
|
'KEY RULES:',
|
|
262
|
+
'- SELECTORS MUST BE LOCALE-INDEPENDENT. Configs are shared globally — selectors with localized text break for other users.',
|
|
263
|
+
' Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
|
|
264
|
+
' NEVER use aria-label with translated text (e.g. aria-label="Søk", aria-label="Suche", aria-label="Rechercher").',
|
|
265
|
+
' If aria-label is the only option, use the English value only.',
|
|
266
|
+
' WRONG: input[aria-label="Søk"] — only works in Norwegian',
|
|
267
|
+
' RIGHT: input[name="search_query"], input#search, input[type="search"]',
|
|
238
268
|
'- Tools must be GENERAL, not hardcoded to a specific instance or position. WRONG: "like-first-post" (hardcoded to first). RIGHT: "like-post" with a parameter that identifies which post (e.g. postIndex: number, or postText: string used in a :has-text selector). If your tool name describes a specific case or position rather than a reusable action, redesign it with a parameter.',
|
|
239
|
-
'-
|
|
240
|
-
'
|
|
241
|
-
'
|
|
242
|
-
'-
|
|
243
|
-
'-
|
|
269
|
+
'- ONE ACTION PER TOOL. Each tool does exactly ONE thing. NEVER combine fill + submit in one tool.',
|
|
270
|
+
' A fill tool ONLY fills a field (no autosubmit, no submitSelector, no steps with clicks).',
|
|
271
|
+
' For submit/search: the agent calls browser_press_key({ key: "Enter" }) after the fill tool — no button selector needed.',
|
|
272
|
+
' WRONG: "search-videos" with fields + autosubmit — combines fill and submit.',
|
|
273
|
+
' WRONG: "click-search" — fragile, requires finding a submit button selector.',
|
|
274
|
+
' RIGHT: "fill-search" (fields only) → agent uses browser_press_key({ key: "Enter" }) to submit.',
|
|
275
|
+
'- Do NOT create click-submit or click-search tools. Use browser_press_key instead.',
|
|
276
|
+
'- Fill tools need: selector + one field entry. No autosubmit, no submitSelector, no submitAction.',
|
|
277
|
+
'- Tool names must be kebab-case with a verb: "get-posts", "click-compose-button", "fill-search"',
|
|
244
278
|
'- Read-only tools only need: selector, resultSelector, resultExtract. No autosubmit, no fields.',
|
|
245
279
|
'- Use fields[] for form inputs — each field\'s name becomes a tool parameter automatically',
|
|
246
280
|
'- resultExtract options: text, html, attribute, list, table',
|
|
@@ -452,6 +486,56 @@ const hubWriteTools = [
|
|
|
452
486
|
const VALID_RESULT_EXTRACTS = new Set(['text', 'html', 'attribute', 'list', 'table']);
|
|
453
487
|
const VALID_STEP_ACTIONS = new Set(['navigate', 'click', 'fill', 'select', 'wait', 'extract', 'scroll', 'condition', 'evaluate']);
|
|
454
488
|
|
|
489
|
+
/**
|
|
490
|
+
* Detect localized (non-ASCII) text inside aria-label selectors.
|
|
491
|
+
* Returns an array of { selector, match } objects for each violation found.
|
|
492
|
+
*
|
|
493
|
+
* Matches patterns like: aria-label="Søk", aria-label='Rechercher', aria-label="Suche"
|
|
494
|
+
* Flags any aria-label value containing non-ASCII characters (accented, CJK, Cyrillic, etc.)
|
|
495
|
+
*/
|
|
496
|
+
// eslint-disable-next-line no-control-regex
|
|
497
|
+
const ARIA_LABEL_RE = /aria-label\s*=\s*["']([^"']+)["']/gi;
|
|
498
|
+
const NON_ASCII_RE = /[^\x00-\x7F]/;
|
|
499
|
+
|
|
500
|
+
function findLocalizedSelectors(args) {
|
|
501
|
+
const violations = [];
|
|
502
|
+
|
|
503
|
+
// Collect all selector strings from the flat args
|
|
504
|
+
const selectorSources = [];
|
|
505
|
+
if (args.selector) selectorSources.push({ path: 'selector', value: args.selector });
|
|
506
|
+
if (args.submitSelector) selectorSources.push({ path: 'submitSelector', value: args.submitSelector });
|
|
507
|
+
if (args.resultSelector) selectorSources.push({ path: 'resultSelector', value: args.resultSelector });
|
|
508
|
+
if (args.resultWaitSelector) selectorSources.push({ path: 'resultWaitSelector', value: args.resultWaitSelector });
|
|
509
|
+
|
|
510
|
+
if (Array.isArray(args.fields)) {
|
|
511
|
+
for (let i = 0; i < args.fields.length; i++) {
|
|
512
|
+
if (args.fields[i].selector) {
|
|
513
|
+
selectorSources.push({ path: `fields[${i}].selector`, value: args.fields[i].selector });
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
if (Array.isArray(args.steps)) {
|
|
519
|
+
for (let i = 0; i < args.steps.length; i++) {
|
|
520
|
+
if (args.steps[i].selector) {
|
|
521
|
+
selectorSources.push({ path: `steps[${i}].selector`, value: args.steps[i].selector });
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
for (const { path, value } of selectorSources) {
|
|
527
|
+
ARIA_LABEL_RE.lastIndex = 0;
|
|
528
|
+
let m;
|
|
529
|
+
while ((m = ARIA_LABEL_RE.exec(value)) !== null) {
|
|
530
|
+
if (NON_ASCII_RE.test(m[1])) {
|
|
531
|
+
violations.push({ path, selector: value, label: m[1] });
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
return violations;
|
|
537
|
+
}
|
|
538
|
+
|
|
455
539
|
/**
|
|
456
540
|
* Validate that each step has the fields required for its action type.
|
|
457
541
|
* Returns an array of human-readable error strings with exact paths.
|
|
@@ -714,6 +798,18 @@ async function handleHubWriteTool(toolName, args) {
|
|
|
714
798
|
};
|
|
715
799
|
}
|
|
716
800
|
|
|
801
|
+
// Check for localized aria-label selectors
|
|
802
|
+
const localizedViolations = findLocalizedSelectors(args);
|
|
803
|
+
if (localizedViolations.length > 0) {
|
|
804
|
+
const details = localizedViolations.map(v =>
|
|
805
|
+
`- ${v.path}: aria-label="${v.label}" contains localized text`
|
|
806
|
+
).join('\n');
|
|
807
|
+
return {
|
|
808
|
+
content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
|
|
809
|
+
isError: true,
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
|
|
717
813
|
// Build inputSchema and execution from flat fields
|
|
718
814
|
const inputSchema = buildInputSchema(args);
|
|
719
815
|
const execution = buildExecution(args);
|
|
@@ -812,6 +908,18 @@ async function handleHubWriteTool(toolName, args) {
|
|
|
812
908
|
};
|
|
813
909
|
}
|
|
814
910
|
|
|
911
|
+
// Check for localized aria-label selectors
|
|
912
|
+
const localizedViolations = findLocalizedSelectors(args);
|
|
913
|
+
if (localizedViolations.length > 0) {
|
|
914
|
+
const details = localizedViolations.map(v =>
|
|
915
|
+
`- ${v.path}: aria-label="${v.label}" contains localized text`
|
|
916
|
+
).join('\n');
|
|
917
|
+
return {
|
|
918
|
+
content: [{ type: 'text', text: `Error: Selectors contain localized aria-label text that won't work for users in other locales.\n\n${details}\n\nUse locale-independent selectors instead: data-testid, id, name, type, role, or structural selectors (e.g. input[type="search"], form input[name="q"]).\nIf aria-label is the only option, use the English value.` }],
|
|
919
|
+
isError: true,
|
|
920
|
+
};
|
|
921
|
+
}
|
|
922
|
+
|
|
815
923
|
// Build new inputSchema and execution from flat fields
|
|
816
924
|
const inputSchema = buildInputSchema(args);
|
|
817
925
|
const execution = buildExecution(args);
|
package/src/proxy-server.js
CHANGED
|
@@ -88,9 +88,10 @@ async function startProxy(options) {
|
|
|
88
88
|
'<workflow>',
|
|
89
89
|
'1. Navigate with browser_navigate. Hub tools are discovered automatically.',
|
|
90
90
|
'2. If hub tools are listed in the response, use hub_execute to run them. ALWAYS prefer hub tools.',
|
|
91
|
-
'3.
|
|
92
|
-
'4.
|
|
93
|
-
'5.
|
|
91
|
+
'3. After a hub fill tool, use browser_press_key({ key: "Enter" }) to submit. This is more reliable than clicking a submit button.',
|
|
92
|
+
'4. If NO hub tools exist or they fail, use browser_fallback for generic Playwright tools (snapshot, click, fill, etc.). Call browser_fallback without arguments to list available tools.',
|
|
93
|
+
'5. NEVER call browser tools in parallel — always sequentially, one at a time.',
|
|
94
|
+
'6. CONTRIBUTE before you are done: if you called browser_fallback even once — for any part of the task — you must contribute a config before considering the task complete. This is not optional. See <contribution> below.',
|
|
94
95
|
'</workflow>',
|
|
95
96
|
'',
|
|
96
97
|
'<contribution>',
|
|
@@ -99,13 +100,32 @@ async function startProxy(options) {
|
|
|
99
100
|
'',
|
|
100
101
|
'Contribute ONLY when ALL of these are true:',
|
|
101
102
|
'1. You called browser_fallback at least once (even for a single step)',
|
|
102
|
-
'2. You
|
|
103
|
-
'3. You
|
|
103
|
+
'2. You inspected the DOM to discover real CSS selectors (see <finding-selectors> below)',
|
|
104
|
+
'3. You verified each selector by testing it on the page',
|
|
104
105
|
' If you used a condition step, verify the selector in EACH branch separately with',
|
|
105
106
|
' browser_snapshot — never assume two contexts (e.g. dialog vs. inline) share the same test IDs.',
|
|
106
107
|
'',
|
|
107
108
|
'NEVER contribute if you have not explored the page. A config without real CSS selectors is useless.',
|
|
108
109
|
'',
|
|
110
|
+
'<finding-selectors>',
|
|
111
|
+
'browser_snapshot returns an accessibility tree with refs (e.g. "e12"), NOT CSS selectors.',
|
|
112
|
+
'You MUST inspect the actual DOM to find real CSS selectors. Do NOT guess selectors from the snapshot.',
|
|
113
|
+
'',
|
|
114
|
+
'To find a CSS selector for an element you interacted with:',
|
|
115
|
+
'1. Use browser_evaluate to inspect the element:',
|
|
116
|
+
' browser_fallback({ tool: "browser_evaluate", arguments: {',
|
|
117
|
+
' expression: "document.querySelector(\'input[name=search_query]\')?.tagName"',
|
|
118
|
+
' }})',
|
|
119
|
+
'2. Or inspect multiple attributes at once:',
|
|
120
|
+
' browser_fallback({ tool: "browser_evaluate", arguments: {',
|
|
121
|
+
' expression: "JSON.stringify([...document.querySelectorAll(\'input\')].map(e => ({ tag: e.tagName, id: e.id, name: e.name, type: e.type, placeholder: e.placeholder })))"',
|
|
122
|
+
' }})',
|
|
123
|
+
'3. Verify your chosen selector returns the right element BEFORE contributing.',
|
|
124
|
+
'',
|
|
125
|
+
'NEVER fabricate selectors like "input#search" without verifying. On YouTube, #search is a <div>,',
|
|
126
|
+
'not an <input>. The actual input is input[name="search_query"]. Always check the DOM.',
|
|
127
|
+
'</finding-selectors>',
|
|
128
|
+
'',
|
|
109
129
|
'How to contribute:',
|
|
110
130
|
'- No hub config exists yet → contribute_create-config(...) then contribute_add-tool(...) for each tool',
|
|
111
131
|
'- Hub config already exists → contribute_add-tool(...) with the config ID shown in the navigation response. Do NOT create a new config.',
|
|
@@ -118,9 +138,22 @@ async function startProxy(options) {
|
|
|
118
138
|
' - "example.com" ONLY for truly site-wide tools (navigation, global search)',
|
|
119
139
|
'contribute_add-tool({ configId, name, description, selector, ... }) → adds one tool',
|
|
120
140
|
' Always add read-only extraction tools first (get-posts, get-content, list-items).',
|
|
121
|
-
'
|
|
141
|
+
' ONE ACTION PER TOOL. Each tool does exactly ONE thing:',
|
|
142
|
+
' - A fill tool ONLY fills a field (no submit, no autosubmit)',
|
|
143
|
+
' - A click tool ONLY clicks a button',
|
|
144
|
+
' - For search/submit: create a fill tool, then the agent uses browser_press_key({ key: "Enter" }) to submit',
|
|
145
|
+
' NEVER combine fill + submit in one tool. NEVER create click-search/click-submit tools — use browser_press_key instead.',
|
|
146
|
+
' WRONG: "search-videos" that fills AND submits. WRONG: "click-search" (fragile button selector).',
|
|
147
|
+
' RIGHT: "fill-search" (fill only) → agent calls browser_press_key({ key: "Enter" }) to submit.',
|
|
122
148
|
' Shadow DOM is fully supported — selectors targeting web components work transparently.',
|
|
123
149
|
'',
|
|
150
|
+
' SELECTOR RULES — configs are shared globally, selectors must work for ALL users:',
|
|
151
|
+
' - Prefer: data-testid, id, name, type, role, or structural selectors (e.g. form input[type="search"])',
|
|
152
|
+
' - NEVER use aria-label with localized/translated text (e.g. aria-label="Søk", aria-label="Suche")',
|
|
153
|
+
' - If aria-label is the only option, use the English value only',
|
|
154
|
+
' - WRONG: input[aria-label="Søk"] — this only works in Norwegian',
|
|
155
|
+
' - RIGHT: input[name="search_query"], input#search, input[type="search"]',
|
|
156
|
+
'',
|
|
124
157
|
'BEFORE SAYING YOU ARE DONE — run this checklist:',
|
|
125
158
|
' [ ] Did I call browser_fallback at any point? → If yes:',
|
|
126
159
|
' [ ] Did I contribute_create-config or identify the existing config ID?',
|
|
@@ -135,14 +168,29 @@ async function startProxy(options) {
|
|
|
135
168
|
function getBrowserFallbackDefinition() {
|
|
136
169
|
return {
|
|
137
170
|
name: 'browser_fallback',
|
|
138
|
-
description:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
171
|
+
description: `Access generic Playwright browser tools as a fallback when hub tools are insufficient.
|
|
172
|
+
Works in three modes:
|
|
173
|
+
- No arguments: lists all available Playwright tools
|
|
174
|
+
- peek: true: inspects a tool's full input schema before calling it
|
|
175
|
+
- tool + arguments: executes a Playwright tool (e.g. browser_click, browser_snapshot)
|
|
176
|
+
<important>
|
|
177
|
+
All element-targeting tools use "ref" values from browser_snapshot (e.g., "e12", "e37"), NOT CSS selectors.
|
|
178
|
+
Always take a browser_snapshot first to get element refs, then use those refs in tool calls.
|
|
179
|
+
If you get a validation error, the correct schema will be included in the error response.
|
|
180
|
+
</important>
|
|
181
|
+
<tool-schemas>
|
|
182
|
+
Common tools — use EXACTLY these argument shapes:
|
|
183
|
+
|
|
184
|
+
browser_click: { "ref": "e12" } — ref from snapshot, NOT a selector
|
|
185
|
+
browser_type: { "ref": "e12", "text": "hello" } — ref from snapshot + text to type
|
|
186
|
+
browser_press_key: { "key": "Enter" } — key name
|
|
187
|
+
browser_hover: { "ref": "e12" } — ref from snapshot
|
|
188
|
+
browser_select_option: { "ref": "e12", "values": ["opt1"] } — ref + values array
|
|
189
|
+
browser_fill_form: { "fields": [{"ref":"e12","value":"hi"},{"ref":"e15","value":"there"}] } — array of {ref, value} objects
|
|
190
|
+
|
|
191
|
+
WRONG: { "selector": "...", "text": "..." } — never use "selector", always use "ref"
|
|
192
|
+
WRONG: { "fields": {"search": "..."} } — fields is an ARRAY of {ref, value}, not an object
|
|
193
|
+
</tool-schemas>`,
|
|
146
194
|
inputSchema: {
|
|
147
195
|
type: 'object',
|
|
148
196
|
properties: {
|
|
@@ -156,7 +204,7 @@ async function startProxy(options) {
|
|
|
156
204
|
},
|
|
157
205
|
arguments: {
|
|
158
206
|
type: 'object',
|
|
159
|
-
description: 'Arguments for the Playwright tool.',
|
|
207
|
+
description: 'Arguments for the Playwright tool. Use ref values from browser_snapshot for element targeting.',
|
|
160
208
|
additionalProperties: true,
|
|
161
209
|
},
|
|
162
210
|
},
|
|
@@ -165,18 +213,25 @@ async function startProxy(options) {
|
|
|
165
213
|
}
|
|
166
214
|
|
|
167
215
|
// --- 5. Handle tools/list — minimal tool set ---
|
|
216
|
+
// Expose browser_navigate and browser_press_key directly from upstream.
|
|
217
|
+
// browser_press_key is first-class because it's essential for submitting
|
|
218
|
+
// after hub fill tools (e.g. fill-search → press Enter) without needing
|
|
219
|
+
// fragile CSS selectors for submit buttons.
|
|
220
|
+
const FIRST_CLASS_UPSTREAM = ['browser_navigate', 'browser_press_key'];
|
|
221
|
+
|
|
168
222
|
proxyServer.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
169
223
|
const upstreamTools = await getUpstreamTools();
|
|
170
224
|
|
|
171
|
-
|
|
172
|
-
|
|
225
|
+
const firstClassTools = FIRST_CLASS_UPSTREAM
|
|
226
|
+
.map(name => upstreamTools.find(t => t.name === name))
|
|
227
|
+
.filter(Boolean);
|
|
173
228
|
|
|
174
229
|
const hubExecute = noHub ? [] : [getHubExecuteToolDefinition()];
|
|
175
230
|
const writeTools = noHub ? [] : getHubWriteToolDefinitions();
|
|
176
231
|
|
|
177
232
|
return {
|
|
178
233
|
tools: [
|
|
179
|
-
...
|
|
234
|
+
...firstClassTools,
|
|
180
235
|
...hubExecute,
|
|
181
236
|
getBrowserFallbackDefinition(),
|
|
182
237
|
...writeTools,
|
|
@@ -267,6 +322,22 @@ async function startProxy(options) {
|
|
|
267
322
|
// Proxy to upstream
|
|
268
323
|
const result = await upstreamClient.callTool({ name: innerTool, arguments: innerArgs });
|
|
269
324
|
|
|
325
|
+
// Auto-peek on validation error: if the upstream returned a schema validation error
|
|
326
|
+
// (invalid_type, unrecognized_keys, etc.), automatically append the correct schema
|
|
327
|
+
// so the agent can self-correct without an extra round-trip.
|
|
328
|
+
if (result.isError || result.content?.some(c => c.type === 'text' && c.text && (
|
|
329
|
+
c.text.includes('invalid_type') || c.text.includes('unrecognized_keys') || c.text.includes('invalid_union')
|
|
330
|
+
))) {
|
|
331
|
+
const tools = await getUpstreamTools();
|
|
332
|
+
const match = tools.find(t => t.name === innerTool);
|
|
333
|
+
if (match) {
|
|
334
|
+
result.content.push({
|
|
335
|
+
type: 'text',
|
|
336
|
+
text: `\n<correct-schema>\nThe call to ${innerTool} failed due to invalid arguments. Here is the correct schema:\n\n${JSON.stringify(match.inputSchema, null, 2)}\n\nDescription: ${match.description || '(none)'}\n\nRetry with the correct argument format.\n</correct-schema>`,
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
270
341
|
// After browser_snapshot, check whether the page URL has changed since our last hub lookup.
|
|
271
342
|
// This catches SPA client-side redirects (e.g. x.com → x.com/home) that complete AFTER
|
|
272
343
|
// page.goto() returns, so they are invisible to handleNavigate's redirect detection.
|