mcpbrowser 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/actions/click-element.js +140 -23
- package/src/actions/close-tab.js +121 -27
- package/src/actions/fetch-page.js +125 -18
- package/src/actions/get-current-html.js +106 -13
- package/src/actions/type-text.js +134 -21
- package/src/core/responses.js +130 -0
- package/src/mcp-browser.js +24 -121
- package/tests/actions/click-element.test.js +8 -2
- package/tests/actions/close-tab.test.js +29 -27
- package/tests/actions/fetch-page.test.js +17 -14
- package/tests/actions/get-current-html.test.js +13 -7
- package/tests/actions/type-text.test.js +8 -2
- package/tests/core/auth.test.js +2 -1
- package/tests/core/responses.test.js +220 -0
- package/tests/demo-type-safety.js +163 -0
- package/tests/run-all.js +2 -1
- package/tests/tool-selection/run-tool-selection-tests.js +44 -0
- package/tests/tool-selection/tool-selection-tests.json +113 -0
- package/tests/tool-selection/tool-selection.test.js +472 -0
- package/tests/verify-nextsteps.test.js +95 -0
- package/tests/verify-structured-output.test.js +223 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcpbrowser",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"mcpName": "io.github.cherchyk/mcpbrowser",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "MCP browser server - fetch web pages using real Chrome/Edge browser. Handles authentication, SSO, CAPTCHAs, and anti-bot protection. Browser automation for AI assistants.",
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
"mcp": "node src/mcp-browser.js",
|
|
13
13
|
"test": "node tests/run-all.js",
|
|
14
14
|
"test:unit": "node tests/run-unit.js",
|
|
15
|
-
"test:
|
|
15
|
+
"test:descriptions": "node tests/tool-selection/run-tool-selection-tests.js"
|
|
16
16
|
},
|
|
17
17
|
"keywords": [
|
|
18
18
|
"mcp",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* click.js - Click element action
|
|
2
|
+
* click-element.js - Click element action
|
|
3
3
|
*
|
|
4
4
|
* This function handles two distinct use cases:
|
|
5
5
|
*
|
|
@@ -26,6 +26,105 @@
|
|
|
26
26
|
|
|
27
27
|
import { getBrowser, domainPages } from '../core/browser.js';
|
|
28
28
|
import { extractAndProcessHtml, waitForPageStability } from '../core/page.js';
|
|
29
|
+
import { MCPResponse, ErrorResponse } from '../core/responses.js';
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @typedef {import('@modelcontextprotocol/sdk/types.js').Tool} Tool
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
// ============================================================================
|
|
36
|
+
// RESPONSE CLASS
|
|
37
|
+
// ============================================================================
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Response for successful click_element operations
|
|
41
|
+
*/
|
|
42
|
+
export class ClickElementSuccessResponse extends MCPResponse {
|
|
43
|
+
/**
|
|
44
|
+
* @param {string} currentUrl - URL after click
|
|
45
|
+
* @param {string} message - Success message
|
|
46
|
+
* @param {string|null} html - Page HTML if returnHtml was true
|
|
47
|
+
* @param {string[]} nextSteps - Suggested next actions
|
|
48
|
+
*/
|
|
49
|
+
constructor(currentUrl, message, html, nextSteps) {
|
|
50
|
+
super(nextSteps);
|
|
51
|
+
|
|
52
|
+
if (typeof currentUrl !== 'string') {
|
|
53
|
+
throw new TypeError('currentUrl must be a string');
|
|
54
|
+
}
|
|
55
|
+
if (typeof message !== 'string') {
|
|
56
|
+
throw new TypeError('message must be a string');
|
|
57
|
+
}
|
|
58
|
+
if (html !== null && typeof html !== 'string') {
|
|
59
|
+
throw new TypeError('html must be a string or null');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
this.currentUrl = currentUrl;
|
|
63
|
+
this.message = message;
|
|
64
|
+
this.html = html;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
_getAdditionalFields() {
|
|
68
|
+
return {
|
|
69
|
+
currentUrl: this.currentUrl,
|
|
70
|
+
message: this.message,
|
|
71
|
+
html: this.html
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
getTextSummary() {
|
|
76
|
+
return this.message || "Element clicked successfully";
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ============================================================================
|
|
81
|
+
// TOOL DEFINITION
|
|
82
|
+
// ============================================================================
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* @type {Tool}
|
|
86
|
+
*/
|
|
87
|
+
export const CLICK_ELEMENT_TOOL = {
|
|
88
|
+
name: "click_element",
|
|
89
|
+
title: "Click Element",
|
|
90
|
+
description: "**BROWSER INTERACTION** - Clicks elements on browser-loaded pages. Use this for navigation (clicking links/buttons), form submission, and any user interaction that requires clicking.\n\nWorks with any clickable element including buttons, links, or elements with onclick handlers. Can target by CSS selector or text content. Waits for page stability and returns updated HTML by default.\n\n**PREREQUISITE**: Page MUST be loaded with fetch_webpage first. This tool operates on an already-loaded page in the browser.",
|
|
91
|
+
inputSchema: {
|
|
92
|
+
type: "object",
|
|
93
|
+
properties: {
|
|
94
|
+
url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
|
|
95
|
+
selector: { type: "string", description: "CSS selector for the element to click (e.g., '#submit-btn', '.login-button')" },
|
|
96
|
+
text: { type: "string", description: "Text content to search for if selector is not provided (e.g., 'Sign In', 'Submit')" },
|
|
97
|
+
waitForElementTimeout: { type: "number", description: "Maximum time to wait for element in milliseconds", default: 1000 },
|
|
98
|
+
returnHtml: { type: "boolean", description: "Whether to wait for stability and return HTML after clicking. Set to false for fast form interactions (checkboxes, radio buttons).", default: true },
|
|
99
|
+
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%. Only used when returnHtml is true.", default: true },
|
|
100
|
+
postClickWait: { type: "number", description: "Milliseconds to wait after click for SPAs to render dynamic content.", default: 1000 }
|
|
101
|
+
},
|
|
102
|
+
required: ["url"],
|
|
103
|
+
additionalProperties: false,
|
|
104
|
+
},
|
|
105
|
+
outputSchema: {
|
|
106
|
+
type: "object",
|
|
107
|
+
properties: {
|
|
108
|
+
currentUrl: { type: "string", description: "URL after click" },
|
|
109
|
+
message: { type: "string", description: "Success message" },
|
|
110
|
+
html: {
|
|
111
|
+
type: ["string", "null"],
|
|
112
|
+
description: "Page HTML if returnHtml was true, null otherwise"
|
|
113
|
+
},
|
|
114
|
+
nextSteps: {
|
|
115
|
+
type: "array",
|
|
116
|
+
items: { type: "string" },
|
|
117
|
+
description: "Suggested next actions"
|
|
118
|
+
}
|
|
119
|
+
},
|
|
120
|
+
required: ["currentUrl", "message", "html", "nextSteps"],
|
|
121
|
+
additionalProperties: false
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// ============================================================================
|
|
126
|
+
// ACTION FUNCTION
|
|
127
|
+
// ============================================================================
|
|
29
128
|
|
|
30
129
|
/**
|
|
31
130
|
* Click on an element on the page
|
|
@@ -73,10 +172,12 @@ export async function clickElement({ url, selector, text, waitForElementTimeout
|
|
|
73
172
|
let page = domainPages.get(hostname);
|
|
74
173
|
|
|
75
174
|
if (!page || page.isClosed()) {
|
|
76
|
-
return
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
175
|
+
return new ErrorResponse(
|
|
176
|
+
`No open page found for ${hostname}. Please fetch the page first using fetch_webpage.`,
|
|
177
|
+
[
|
|
178
|
+
"Use fetch_webpage to load the page first"
|
|
179
|
+
]
|
|
180
|
+
);
|
|
80
181
|
}
|
|
81
182
|
|
|
82
183
|
try {
|
|
@@ -113,10 +214,14 @@ export async function clickElement({ url, selector, text, waitForElementTimeout
|
|
|
113
214
|
}
|
|
114
215
|
|
|
115
216
|
if (!elementHandle || !elementHandle.asElement()) {
|
|
116
|
-
return
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
217
|
+
return new ErrorResponse(
|
|
218
|
+
selector ? `Element not found: ${selector}` : `Element with text "${text}" not found`,
|
|
219
|
+
[
|
|
220
|
+
"Use get_current_html to verify page content",
|
|
221
|
+
"Try a different selector or text",
|
|
222
|
+
"Check if the element is visible on the page"
|
|
223
|
+
]
|
|
224
|
+
);
|
|
120
225
|
}
|
|
121
226
|
|
|
122
227
|
// Scroll element into view and click
|
|
@@ -142,13 +247,17 @@ export async function clickElement({ url, selector, text, waitForElementTimeout
|
|
|
142
247
|
const currentUrl = page.url();
|
|
143
248
|
const html = await extractAndProcessHtml(page, removeUnnecessaryHTML);
|
|
144
249
|
|
|
145
|
-
return
|
|
146
|
-
success: true,
|
|
147
|
-
message: selector ? `Clicked element: ${selector}` : `Clicked element with text: "${text}"`,
|
|
250
|
+
return new ClickElementSuccessResponse(
|
|
148
251
|
currentUrl,
|
|
252
|
+
selector ? `Clicked element: ${selector}` : `Clicked element with text: "${text}"`,
|
|
149
253
|
html,
|
|
150
|
-
|
|
151
|
-
|
|
254
|
+
[
|
|
255
|
+
"Use click_element again to navigate further",
|
|
256
|
+
"Use type_text to fill forms if needed",
|
|
257
|
+
"Use get_current_html to refresh page state",
|
|
258
|
+
"Use close_tab when finished"
|
|
259
|
+
]
|
|
260
|
+
);
|
|
152
261
|
} else {
|
|
153
262
|
// Wait for page to stabilize even for fast clicks (ensures JS has finished)
|
|
154
263
|
await waitForPageStability(page);
|
|
@@ -160,17 +269,25 @@ export async function clickElement({ url, selector, text, waitForElementTimeout
|
|
|
160
269
|
|
|
161
270
|
const currentUrl = page.url();
|
|
162
271
|
|
|
163
|
-
return
|
|
164
|
-
success: true,
|
|
165
|
-
message: selector ? `Clicked element: ${selector}` : `Clicked element with text: "${text}"`,
|
|
272
|
+
return new ClickElementSuccessResponse(
|
|
166
273
|
currentUrl,
|
|
167
|
-
|
|
168
|
-
|
|
274
|
+
selector ? `Clicked element: ${selector}` : `Clicked element with text: "${text}"`,
|
|
275
|
+
null,
|
|
276
|
+
[
|
|
277
|
+
"Use get_current_html to see updated page state",
|
|
278
|
+
"Use click_element or type_text for more interactions",
|
|
279
|
+
"Use close_tab when finished"
|
|
280
|
+
]
|
|
281
|
+
);
|
|
169
282
|
}
|
|
170
283
|
} catch (err) {
|
|
171
|
-
return
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
284
|
+
return new ErrorResponse(
|
|
285
|
+
`Failed to click element: ${err.message}`,
|
|
286
|
+
[
|
|
287
|
+
"Use get_current_html to check current page state",
|
|
288
|
+
"Verify the selector or text is correct",
|
|
289
|
+
"Try fetch_webpage to reload if page is stale"
|
|
290
|
+
]
|
|
291
|
+
);
|
|
175
292
|
}
|
|
176
293
|
}
|
package/src/actions/close-tab.js
CHANGED
|
@@ -1,8 +1,91 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Close a tab for a specific domain
|
|
2
|
+
* close-tab.js - Close a tab for a specific domain
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { domainPages } from '../core/browser.js';
|
|
6
|
+
import { MCPResponse, ErrorResponse } from '../core/responses.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* @typedef {import('@modelcontextprotocol/sdk/types.js').Tool} Tool
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// RESPONSE CLASS
|
|
14
|
+
// ============================================================================
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Response for successful close_tab operations
|
|
18
|
+
*/
|
|
19
|
+
export class CloseTabSuccessResponse extends MCPResponse {
|
|
20
|
+
/**
|
|
21
|
+
* @param {string} message - Success message
|
|
22
|
+
* @param {string} hostname - Hostname that was closed
|
|
23
|
+
* @param {string[]} nextSteps - Suggested next actions
|
|
24
|
+
*/
|
|
25
|
+
constructor(message, hostname, nextSteps) {
|
|
26
|
+
super(nextSteps);
|
|
27
|
+
|
|
28
|
+
if (typeof message !== 'string') {
|
|
29
|
+
throw new TypeError('message must be a string');
|
|
30
|
+
}
|
|
31
|
+
if (typeof hostname !== 'string') {
|
|
32
|
+
throw new TypeError('hostname must be a string');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
this.message = message;
|
|
36
|
+
this.hostname = hostname;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
_getAdditionalFields() {
|
|
40
|
+
return {
|
|
41
|
+
message: this.message,
|
|
42
|
+
hostname: this.hostname
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
getTextSummary() {
|
|
47
|
+
return this.message || `Closed tab for: ${this.hostname}`;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ============================================================================
|
|
52
|
+
// TOOL DEFINITION
|
|
53
|
+
// ============================================================================
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* @type {Tool}
|
|
57
|
+
*/
|
|
58
|
+
export const CLOSE_TAB_TOOL = {
|
|
59
|
+
name: "close_tab",
|
|
60
|
+
title: "Close Tab",
|
|
61
|
+
description: "**BROWSER MANAGEMENT** - Closes the browser tab for the given URL's hostname. This removes the page from the tab pool and forces a fresh session on the next visit to that hostname. Useful for memory management or when you need to clear session state. Note: Uses exact hostname match (www.example.com and example.com are treated as different tabs).",
|
|
62
|
+
inputSchema: {
|
|
63
|
+
type: "object",
|
|
64
|
+
properties: {
|
|
65
|
+
url: { type: "string", description: "The URL whose hostname tab should be closed" }
|
|
66
|
+
},
|
|
67
|
+
required: ["url"],
|
|
68
|
+
additionalProperties: false
|
|
69
|
+
},
|
|
70
|
+
outputSchema: {
|
|
71
|
+
type: "object",
|
|
72
|
+
properties: {
|
|
73
|
+
message: { type: "string", description: "Success message" },
|
|
74
|
+
hostname: { type: "string", description: "Hostname that was closed" },
|
|
75
|
+
nextSteps: {
|
|
76
|
+
type: "array",
|
|
77
|
+
items: { type: "string" },
|
|
78
|
+
description: "Suggested next actions"
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
required: ["message", "hostname", "nextSteps"],
|
|
82
|
+
additionalProperties: false
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// ============================================================================
|
|
87
|
+
// ACTION FUNCTION
|
|
88
|
+
// ============================================================================
|
|
6
89
|
|
|
7
90
|
/**
|
|
8
91
|
* Closes the browser tab for the given URL's hostname and removes it from the tab pool.
|
|
@@ -15,10 +98,12 @@ export async function closeTab({ url }) {
|
|
|
15
98
|
try {
|
|
16
99
|
// Validate URL
|
|
17
100
|
if (!url || typeof url !== 'string') {
|
|
18
|
-
return
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
101
|
+
return new ErrorResponse(
|
|
102
|
+
'Invalid or missing URL parameter',
|
|
103
|
+
[
|
|
104
|
+
"Provide a valid URL parameter"
|
|
105
|
+
]
|
|
106
|
+
);
|
|
22
107
|
}
|
|
23
108
|
|
|
24
109
|
// Extract hostname from URL
|
|
@@ -26,10 +111,12 @@ export async function closeTab({ url }) {
|
|
|
26
111
|
try {
|
|
27
112
|
hostname = new URL(url).hostname;
|
|
28
113
|
} catch {
|
|
29
|
-
return
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
114
|
+
return new ErrorResponse(
|
|
115
|
+
'Invalid URL format',
|
|
116
|
+
[
|
|
117
|
+
"Provide a valid URL with protocol (e.g., https://example.com)"
|
|
118
|
+
]
|
|
119
|
+
);
|
|
33
120
|
}
|
|
34
121
|
|
|
35
122
|
// Check if we have a tab for this hostname
|
|
@@ -50,12 +137,13 @@ export async function closeTab({ url }) {
|
|
|
50
137
|
}
|
|
51
138
|
|
|
52
139
|
if (!foundHostname) {
|
|
53
|
-
return
|
|
54
|
-
|
|
140
|
+
return new CloseTabSuccessResponse(
|
|
141
|
+
'No open tab found for this hostname',
|
|
55
142
|
hostname,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
143
|
+
[
|
|
144
|
+
"Use fetch_webpage to open a new page if needed"
|
|
145
|
+
]
|
|
146
|
+
);
|
|
59
147
|
}
|
|
60
148
|
|
|
61
149
|
// Found the page by URL - use that hostname
|
|
@@ -68,12 +156,13 @@ export async function closeTab({ url }) {
|
|
|
68
156
|
// Check if page is already closed
|
|
69
157
|
if (page.isClosed()) {
|
|
70
158
|
domainPages.delete(hostname);
|
|
71
|
-
return
|
|
72
|
-
|
|
159
|
+
return new CloseTabSuccessResponse(
|
|
160
|
+
'Tab was already closed',
|
|
73
161
|
hostname,
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
162
|
+
[
|
|
163
|
+
"Use fetch_webpage to open a new page if needed"
|
|
164
|
+
]
|
|
165
|
+
);
|
|
77
166
|
}
|
|
78
167
|
|
|
79
168
|
// Close the page
|
|
@@ -84,17 +173,22 @@ export async function closeTab({ url }) {
|
|
|
84
173
|
|
|
85
174
|
console.error(`[MCPBrowser] Closed tab for hostname: ${hostname}`);
|
|
86
175
|
|
|
87
|
-
return
|
|
88
|
-
|
|
176
|
+
return new CloseTabSuccessResponse(
|
|
177
|
+
`Successfully closed tab for ${hostname}`,
|
|
89
178
|
hostname,
|
|
90
|
-
|
|
91
|
-
|
|
179
|
+
[
|
|
180
|
+
"Use fetch_webpage to open a new page if needed"
|
|
181
|
+
]
|
|
182
|
+
);
|
|
92
183
|
|
|
93
184
|
} catch (error) {
|
|
94
185
|
console.error(`[MCPBrowser] Error closing tab:`, error);
|
|
95
|
-
return
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
186
|
+
return new ErrorResponse(
|
|
187
|
+
error.message,
|
|
188
|
+
[
|
|
189
|
+
"Check if the URL is correct",
|
|
190
|
+
"Verify a page exists for this hostname"
|
|
191
|
+
]
|
|
192
|
+
);
|
|
99
193
|
}
|
|
100
194
|
}
|
|
@@ -1,11 +1,96 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* fetch.js - Main page fetching functionality
|
|
2
|
+
* fetch-page.js - Main page fetching functionality
|
|
3
3
|
* Handles web page fetching with authentication flows and tab reuse
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { getBrowser, domainPages } from '../core/browser.js';
|
|
7
7
|
import { getOrCreatePage, navigateToUrl, extractAndProcessHtml, waitForPageStability } from '../core/page.js';
|
|
8
8
|
import { detectRedirectType, waitForAutoAuth, waitForManualAuth } from '../core/auth.js';
|
|
9
|
+
import { MCPResponse, ErrorResponse } from '../core/responses.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @typedef {import('@modelcontextprotocol/sdk/types.js').Tool} Tool
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// RESPONSE CLASS
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Response for successful fetch_webpage operations
|
|
21
|
+
*/
|
|
22
|
+
export class FetchPageSuccessResponse extends MCPResponse {
|
|
23
|
+
/**
|
|
24
|
+
* @param {string} currentUrl - Final URL after redirects
|
|
25
|
+
* @param {string} html - Page HTML content
|
|
26
|
+
* @param {string[]} nextSteps - Suggested next actions
|
|
27
|
+
*/
|
|
28
|
+
constructor(currentUrl, html, nextSteps) {
|
|
29
|
+
super(nextSteps);
|
|
30
|
+
|
|
31
|
+
if (typeof currentUrl !== 'string') {
|
|
32
|
+
throw new TypeError('currentUrl must be a string');
|
|
33
|
+
}
|
|
34
|
+
if (typeof html !== 'string') {
|
|
35
|
+
throw new TypeError('html must be a string');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
this.currentUrl = currentUrl;
|
|
39
|
+
this.html = html;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
_getAdditionalFields() {
|
|
43
|
+
return {
|
|
44
|
+
currentUrl: this.currentUrl,
|
|
45
|
+
html: this.html
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
getTextSummary() {
|
|
50
|
+
return `Successfully fetched: ${this.currentUrl}`;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ============================================================================
|
|
55
|
+
// TOOL DEFINITION
|
|
56
|
+
// ============================================================================
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @type {Tool}
|
|
60
|
+
*/
|
|
61
|
+
export const FETCH_WEBPAGE_TOOL = {
|
|
62
|
+
name: "fetch_webpage",
|
|
63
|
+
title: "Fetch Web Page",
|
|
64
|
+
description: "Fetches web pages using Chrome/Edge browser with full JavaScript rendering and authentication support. **REQUIRED for corporate/enterprise sites, any page requiring login/SSO, anti-bot/CAPTCHA pages, and JavaScript-heavy applications.** Use this as the DEFAULT for all webpage fetching - it handles simple HTML pages too. Opens browser for user authentication when needed. Never use generic HTTP fetch for pages that might require authentication.",
|
|
65
|
+
inputSchema: {
|
|
66
|
+
type: "object",
|
|
67
|
+
properties: {
|
|
68
|
+
url: { type: "string", description: "The URL to fetch" },
|
|
69
|
+
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
|
|
70
|
+
postLoadWait: { type: "number", description: "Milliseconds to wait after page load for SPAs to render dynamic content.", default: 1000 }
|
|
71
|
+
},
|
|
72
|
+
required: ["url"],
|
|
73
|
+
additionalProperties: false
|
|
74
|
+
},
|
|
75
|
+
outputSchema: {
|
|
76
|
+
type: "object",
|
|
77
|
+
properties: {
|
|
78
|
+
currentUrl: { type: "string", description: "Final URL after any redirects" },
|
|
79
|
+
html: { type: "string", description: "Page HTML content" },
|
|
80
|
+
nextSteps: {
|
|
81
|
+
type: "array",
|
|
82
|
+
items: { type: "string" },
|
|
83
|
+
description: "Suggested next actions"
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
required: ["currentUrl", "html", "nextSteps"],
|
|
87
|
+
additionalProperties: false
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// ============================================================================
|
|
92
|
+
// ACTION FUNCTION
|
|
93
|
+
// ============================================================================
|
|
9
94
|
|
|
10
95
|
/**
|
|
11
96
|
* Fetch a web page using Chrome browser, with support for authentication flows and tab reuse.
|
|
@@ -19,16 +104,25 @@ import { detectRedirectType, waitForAutoAuth, waitForManualAuth } from '../core/
|
|
|
19
104
|
* @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
|
|
20
105
|
*/
|
|
21
106
|
export async function fetchPage({ url, removeUnnecessaryHTML = true, postLoadWait = 1000 }) {
|
|
107
|
+
// Handle missing URL with environment variable fallback
|
|
108
|
+
if (!url) {
|
|
109
|
+
const fallbackUrl = process.env.DEFAULT_FETCH_URL || process.env.MCP_DEFAULT_FETCH_URL;
|
|
110
|
+
if (fallbackUrl) {
|
|
111
|
+
url = fallbackUrl;
|
|
112
|
+
} else {
|
|
113
|
+
return new ErrorResponse(
|
|
114
|
+
"Missing url parameter and no DEFAULT_FETCH_URL/MCP_DEFAULT_FETCH_URL configured",
|
|
115
|
+
["Set DEFAULT_FETCH_URL or MCP_DEFAULT_FETCH_URL environment variable", "Provide url parameter in the request"]
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
22
120
|
// Hardcoded smart defaults - use 'domcontentloaded' for fastest loading
|
|
23
121
|
// (waits for HTML parsed, not all resources loaded - much faster for SPAs)
|
|
24
122
|
const waitUntil = "domcontentloaded";
|
|
25
123
|
const navigationTimeout = 30000;
|
|
26
124
|
const authCompletionTimeout = 600000;
|
|
27
125
|
const reuseLastKeptPage = true;
|
|
28
|
-
|
|
29
|
-
if (!url) {
|
|
30
|
-
throw new Error("url parameter is required");
|
|
31
|
-
}
|
|
32
126
|
|
|
33
127
|
// Parse hostname for domain-based tab reuse
|
|
34
128
|
let hostname;
|
|
@@ -88,12 +182,14 @@ export async function fetchPage({ url, removeUnnecessaryHTML = true, postLoadWai
|
|
|
88
182
|
const manualAuthResult = await waitForManualAuth(page, redirectInfo.hostname, redirectInfo.originalBase, authCompletionTimeout);
|
|
89
183
|
|
|
90
184
|
if (!manualAuthResult.success) {
|
|
91
|
-
return
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
185
|
+
return new ErrorResponse(
|
|
186
|
+
manualAuthResult.error,
|
|
187
|
+
[
|
|
188
|
+
"Complete authentication in the browser window",
|
|
189
|
+
"Call fetch_webpage again with the same URL to retry",
|
|
190
|
+
"Use close_tab to reset the session if authentication fails"
|
|
191
|
+
]
|
|
192
|
+
);
|
|
97
193
|
}
|
|
98
194
|
|
|
99
195
|
// Update hostname if changed
|
|
@@ -116,14 +212,25 @@ export async function fetchPage({ url, removeUnnecessaryHTML = true, postLoadWai
|
|
|
116
212
|
// Extract and process HTML
|
|
117
213
|
const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
|
|
118
214
|
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
215
|
+
return new FetchPageSuccessResponse(
|
|
216
|
+
page.url(),
|
|
217
|
+
processedHtml,
|
|
218
|
+
[
|
|
219
|
+
"Use click_element to interact with buttons/links on the page",
|
|
220
|
+
"Use type_text to fill in form fields",
|
|
221
|
+
"Use get_current_html to re-check page state after interactions",
|
|
222
|
+
"Use close_tab when finished to free browser resources"
|
|
223
|
+
]
|
|
224
|
+
);
|
|
124
225
|
} catch (err) {
|
|
125
|
-
|
|
126
|
-
|
|
226
|
+
return new ErrorResponse(
|
|
227
|
+
err.message || String(err),
|
|
228
|
+
[
|
|
229
|
+
"Complete authentication in the browser if prompted",
|
|
230
|
+
"Call fetch_webpage again with the same URL to retry",
|
|
231
|
+
"Use close_tab to reset the session if needed"
|
|
232
|
+
]
|
|
233
|
+
);
|
|
127
234
|
} finally {
|
|
128
235
|
// Tab always stays open - domain-aware reuse handles cleanup
|
|
129
236
|
}
|