@mcp-browser-kit/server 1.0.9 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +177 -34
- package/dist/main.js.map +1 -1
- package/package.json +3 -1
package/dist/main.js
CHANGED
|
@@ -6513,17 +6513,17 @@ var startTRpcServer = () => {
|
|
|
6513
6513
|
}
|
|
6514
6514
|
});
|
|
6515
6515
|
wss.on("connection", (ws) => {
|
|
6516
|
-
console.log(`\u2795\u2795 Connection (${wss.clients.size})`);
|
|
6517
6516
|
ws.once("close", () => {
|
|
6518
|
-
console.log(`\u2796\u2796 Connection (${wss.clients.size})`);
|
|
6519
6517
|
});
|
|
6520
6518
|
});
|
|
6521
|
-
|
|
6522
|
-
process.on("SIGTERM", () => {
|
|
6523
|
-
console.log("SIGTERM");
|
|
6519
|
+
const shutdown = () => {
|
|
6524
6520
|
handler.broadcastReconnectNotification();
|
|
6525
|
-
wss.close()
|
|
6526
|
-
|
|
6521
|
+
wss.close(() => {
|
|
6522
|
+
process.exit(0);
|
|
6523
|
+
});
|
|
6524
|
+
};
|
|
6525
|
+
process.on("SIGTERM", shutdown);
|
|
6526
|
+
process.on("SIGINT", shutdown);
|
|
6527
6527
|
};
|
|
6528
6528
|
|
|
6529
6529
|
// ../../node_modules/@modelcontextprotocol/sdk/dist/esm/types.js
|
|
@@ -9474,9 +9474,12 @@ server.tool(
|
|
|
9474
9474
|
"getTabs",
|
|
9475
9475
|
[
|
|
9476
9476
|
combinationDescription,
|
|
9477
|
-
"
|
|
9478
|
-
"
|
|
9479
|
-
"
|
|
9477
|
+
"\u26A0\uFE0F CRITICAL FIRST STEP - ALWAYS START HERE BEFORE ANY OTHER TOOLS!",
|
|
9478
|
+
"* This tool MUST be called first to obtain the list of open browser tabs.",
|
|
9479
|
+
"* Each tab includes a unique ID that is required for all subsequent tool operations.",
|
|
9480
|
+
"* Note which tab is active (marked with 'active: true') as this is essential information.",
|
|
9481
|
+
"* The tabId from this list is required for captureActiveTab and all other interactions.",
|
|
9482
|
+
"* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
|
|
9480
9483
|
].join("\n"),
|
|
9481
9484
|
{},
|
|
9482
9485
|
async () => {
|
|
@@ -9491,15 +9494,80 @@ server.tool(
|
|
|
9491
9494
|
};
|
|
9492
9495
|
}
|
|
9493
9496
|
);
|
|
9497
|
+
server.tool(
|
|
9498
|
+
"captureActiveTab",
|
|
9499
|
+
[
|
|
9500
|
+
combinationDescription,
|
|
9501
|
+
"\u26A0\uFE0F SECOND REQUIRED STEP - AFTER getTabs AND BEFORE ANY INTERACTION!",
|
|
9502
|
+
"* SEQUENCE: First call getTabs to get tabId \u2192 Then use captureActiveTab with that tabId",
|
|
9503
|
+
"* IMPORTANT: This tool ONLY works for the ACTIVE tab (marked with 'active: true' in getTabs results).",
|
|
9504
|
+
"* If you need to work with an INACTIVE tab, use the Readable tools instead (getReadableElements + clickOnReadableElement).",
|
|
9505
|
+
"* ALWAYS capture a screenshot before attempting interaction with the active tab.",
|
|
9506
|
+
"* FOR ACTIVE TAB, VISIBLE ELEMENTS: Use coordinate-based Viewable tools (clickOnViewableElement, fillTextToViewableElement).",
|
|
9507
|
+
"* FOR INACTIVE TABS or HIDDEN ELEMENTS: Use Readable tools (getReadableElements + clickOnReadableElement/fillTextToReadableElement).",
|
|
9508
|
+
"* DECISION RULE: Active tab visible elements \u2192 Viewable tools. Inactive tabs or hidden elements \u2192 Readable tools.",
|
|
9509
|
+
"* Returns visual context showing exactly where form fields, buttons, and other UI elements are located.",
|
|
9510
|
+
"* After any page change or navigation, YOU MUST capture a new screenshot before further interactions."
|
|
9511
|
+
].join("\n"),
|
|
9512
|
+
{},
|
|
9513
|
+
async () => {
|
|
9514
|
+
const screenshot = await rpcClient.defer("captureActiveTab");
|
|
9515
|
+
return {
|
|
9516
|
+
content: [
|
|
9517
|
+
{
|
|
9518
|
+
type: "text",
|
|
9519
|
+
text: `Screenshot size [${screenshot.width}x${screenshot.height}] - Use these dimensions to calculate exact pixel coordinates for clicking and text entry`
|
|
9520
|
+
},
|
|
9521
|
+
{
|
|
9522
|
+
type: "image",
|
|
9523
|
+
mimeType: screenshot.mimeType,
|
|
9524
|
+
data: screenshot.data
|
|
9525
|
+
}
|
|
9526
|
+
]
|
|
9527
|
+
};
|
|
9528
|
+
}
|
|
9529
|
+
);
|
|
9530
|
+
server.tool(
|
|
9531
|
+
"getInnerText",
|
|
9532
|
+
[
|
|
9533
|
+
combinationDescription,
|
|
9534
|
+
"\u26A0\uFE0F FASTEST & MOST EFFICIENT TEXT EXTRACTION TOOL",
|
|
9535
|
+
"* PREFERRED FIRST CHOICE for any task that only needs to read text (no interaction required).",
|
|
9536
|
+
"* Much faster and more efficient than capturing screenshots for text-only operations.",
|
|
9537
|
+
"* Extracts all readable text content from the specified tab in a single call.",
|
|
9538
|
+
"* Ideal for: content analysis, information extraction, summarization, and search tasks.",
|
|
9539
|
+
"* Perfect for generating suggestions, answering questions, or analyzing page content.",
|
|
9540
|
+
"* Use this BEFORE screenshot capture when you only need to understand text context.",
|
|
9541
|
+
"* Works on any tab using tabId from getTabs, not just active tabs.",
|
|
9542
|
+
"* WARNING: This text extraction cannot be used for direct element interaction."
|
|
9543
|
+
].join("\n"),
|
|
9544
|
+
{
|
|
9545
|
+
tabId: z.string().describe("Tab ID to extract text from")
|
|
9546
|
+
},
|
|
9547
|
+
async ({ tabId }) => {
|
|
9548
|
+
const innerText = await rpcClient.defer("getInnerText", tabId);
|
|
9549
|
+
return {
|
|
9550
|
+
content: [
|
|
9551
|
+
{
|
|
9552
|
+
type: "text",
|
|
9553
|
+
text: `InnerText: ${JSON.stringify(innerText)}`
|
|
9554
|
+
}
|
|
9555
|
+
]
|
|
9556
|
+
};
|
|
9557
|
+
}
|
|
9558
|
+
);
|
|
9494
9559
|
server.tool(
|
|
9495
9560
|
"getReadableElements",
|
|
9496
9561
|
[
|
|
9497
9562
|
combinationDescription,
|
|
9498
|
-
"
|
|
9499
|
-
"
|
|
9563
|
+
"* Returns an indexed list of all interactive elements in the format: [index, HTML tag, accessible text].",
|
|
9564
|
+
"* This creates a map of elements you can interact with programmatically.",
|
|
9565
|
+
"* The element indexes can be used with clickOnReadableElement and fillTextToReadableElement.",
|
|
9566
|
+
"* Ideal for forms, navigation menus, and interactive page components.",
|
|
9567
|
+
"* Use with tabId from getTabs to target specific tabs."
|
|
9500
9568
|
].join("\n"),
|
|
9501
9569
|
{
|
|
9502
|
-
tabId: z.string().describe("Tab ID to
|
|
9570
|
+
tabId: z.string().describe("Tab ID to extract elements from")
|
|
9503
9571
|
},
|
|
9504
9572
|
async ({ tabId }) => {
|
|
9505
9573
|
const elements = await rpcClient.defer("getReadableElements", tabId);
|
|
@@ -9514,19 +9582,88 @@ server.tool(
|
|
|
9514
9582
|
}
|
|
9515
9583
|
);
|
|
9516
9584
|
server.tool(
|
|
9517
|
-
"
|
|
9585
|
+
"clickOnViewableElement",
|
|
9586
|
+
[
|
|
9587
|
+
combinationDescription,
|
|
9588
|
+
"\u26A0\uFE0F PREFERRED TOOL - Use this FIRST for ANY element visible in the screenshot from captureActiveTab!",
|
|
9589
|
+
"* Works on the ACTIVE tab for any element you can SEE in the viewport screenshot.",
|
|
9590
|
+
"* FIRST CHOICE: Always prefer this tool over clickOnReadableElement when the target is visible.",
|
|
9591
|
+
"* Simulates a mouse click at the exact (x,y) coordinates specified.",
|
|
9592
|
+
"* Use captureActiveTab \u2192 identify target element \u2192 determine its CENTER coordinates \u2192 use this tool.",
|
|
9593
|
+
"* Calculate the center by finding the midpoint of the element's width and height.",
|
|
9594
|
+
"* For buttons and links, always aim for the center to ensure proper click registration.",
|
|
9595
|
+
"* If this tool fails or element is outside viewport, THEN try clickOnReadableElement as a fallback.",
|
|
9596
|
+
"* After clicking, capture another screenshot to verify the action succeeded."
|
|
9597
|
+
].join("\n"),
|
|
9598
|
+
{
|
|
9599
|
+
tabId: z.string().describe("Tab ID of the active tab"),
|
|
9600
|
+
x: z.number().describe("X coordinate (pixels) of the element to click"),
|
|
9601
|
+
y: z.number().describe("Y coordinate (pixels) of the element to click")
|
|
9602
|
+
},
|
|
9603
|
+
async ({ tabId, x, y }) => {
|
|
9604
|
+
await rpcClient.defer("clickOnViewableElement", tabId, x, y);
|
|
9605
|
+
return {
|
|
9606
|
+
content: [
|
|
9607
|
+
{
|
|
9608
|
+
type: "text",
|
|
9609
|
+
text: "Done"
|
|
9610
|
+
}
|
|
9611
|
+
]
|
|
9612
|
+
};
|
|
9613
|
+
}
|
|
9614
|
+
);
|
|
9615
|
+
server.tool(
|
|
9616
|
+
"fillTextToViewableElement",
|
|
9617
|
+
[
|
|
9618
|
+
combinationDescription,
|
|
9619
|
+
"\u26A0\uFE0F PREFERRED TOOL - Use this FIRST for ANY input field visible in the screenshot from captureActiveTab!",
|
|
9620
|
+
"* Works on the ACTIVE tab for any input field you can SEE in the viewport screenshot.",
|
|
9621
|
+
"* FIRST CHOICE: Always prefer this tool over fillTextToReadableElement when the input field is visible.",
|
|
9622
|
+
"* Sets text value for an input element at the specified (x,y) coordinates.",
|
|
9623
|
+
"* Use captureActiveTab \u2192 identify input field \u2192 determine its CENTER coordinates \u2192 use this tool.",
|
|
9624
|
+
"* Calculate the center by finding the midpoint of the input field's width and height.",
|
|
9625
|
+
"* Clicking on the center ensures the field is properly selected before text entry.",
|
|
9626
|
+
"* If this tool fails or input field is outside viewport, THEN try fillTextToReadableElement as a fallback.",
|
|
9627
|
+
"* For multi-step forms, fill all inputs before submitting the form."
|
|
9628
|
+
].join("\n"),
|
|
9629
|
+
{
|
|
9630
|
+
tabId: z.string().describe("Tab ID of the active tab"),
|
|
9631
|
+
x: z.number().describe("X coordinate (pixels) of the input element"),
|
|
9632
|
+
y: z.number().describe("Y coordinate (pixels) of the input element"),
|
|
9633
|
+
value: z.string().describe("Text to enter into the input field")
|
|
9634
|
+
},
|
|
9635
|
+
async ({ tabId, x, y, value }) => {
|
|
9636
|
+
await rpcClient.defer("fillTextToViewableElement", tabId, x, y, value);
|
|
9637
|
+
return {
|
|
9638
|
+
content: [
|
|
9639
|
+
{
|
|
9640
|
+
type: "text",
|
|
9641
|
+
text: "Done"
|
|
9642
|
+
}
|
|
9643
|
+
]
|
|
9644
|
+
};
|
|
9645
|
+
}
|
|
9646
|
+
);
|
|
9647
|
+
server.tool(
|
|
9648
|
+
"fillTextToReadableElement",
|
|
9518
9649
|
[
|
|
9519
9650
|
combinationDescription,
|
|
9520
|
-
"
|
|
9521
|
-
"
|
|
9651
|
+
"\u26A0\uFE0F FALLBACK TOOL - Only use when fillTextToViewableElement cannot help!",
|
|
9652
|
+
"* Use this tool ONLY if fillTextToViewableElement failed or the input field is not visible.",
|
|
9653
|
+
"* Acts as a direct fallback when coordinate-based interaction with visible elements doesn't work.",
|
|
9654
|
+
"* Sets text value for an input element identified by its index from getReadableElements.",
|
|
9655
|
+
"* Works on any tab, not just the active one.",
|
|
9656
|
+
"* Run getReadableElements first to obtain the correct element index.",
|
|
9657
|
+
"* Use when form fields are not visible without scrolling or are in iframes/embedded content.",
|
|
9658
|
+
"* Also effective for cases where coordinate-based interaction failed or is unreliable."
|
|
9522
9659
|
].join("\n"),
|
|
9523
9660
|
{
|
|
9524
|
-
tabId: z.string().describe("Tab ID to
|
|
9525
|
-
index: z.number().describe("
|
|
9526
|
-
value: z.string().describe("
|
|
9661
|
+
tabId: z.string().describe("Tab ID to target"),
|
|
9662
|
+
index: z.number().describe("Element index from getReadableElements"),
|
|
9663
|
+
value: z.string().describe("Text to enter into the input field")
|
|
9527
9664
|
},
|
|
9528
9665
|
async ({ tabId, index, value }) => {
|
|
9529
|
-
await rpcClient.defer("
|
|
9666
|
+
await rpcClient.defer("fillTextToReadableElement", tabId, index, value);
|
|
9530
9667
|
return {
|
|
9531
9668
|
content: [
|
|
9532
9669
|
{
|
|
@@ -9538,20 +9675,24 @@ server.tool(
|
|
|
9538
9675
|
}
|
|
9539
9676
|
);
|
|
9540
9677
|
server.tool(
|
|
9541
|
-
"
|
|
9678
|
+
"clickOnReadableElement",
|
|
9542
9679
|
[
|
|
9543
9680
|
combinationDescription,
|
|
9544
|
-
"
|
|
9545
|
-
"
|
|
9546
|
-
"
|
|
9547
|
-
"
|
|
9681
|
+
"\u26A0\uFE0F FALLBACK TOOL - Only use when clickOnViewableElement cannot help!",
|
|
9682
|
+
"* Use this tool ONLY if clickOnViewableElement failed or the target element is not visible.",
|
|
9683
|
+
"* Acts as a direct fallback when coordinate-based clicking on visible elements doesn't work.",
|
|
9684
|
+
"* Clicks on an element identified by its index from getReadableElements.",
|
|
9685
|
+
"* Works on any tab, not just the active one.",
|
|
9686
|
+
"* Run getReadableElements first to obtain the correct element index.",
|
|
9687
|
+
"* Use when buttons/links are not visible without scrolling or are in iframes/embedded content.",
|
|
9688
|
+
"* Also effective for cases where coordinate-based clicking failed or is unreliable."
|
|
9548
9689
|
].join("\n"),
|
|
9549
9690
|
{
|
|
9550
|
-
tabId: z.string().describe("Tab ID to
|
|
9551
|
-
index: z.number().describe("
|
|
9691
|
+
tabId: z.string().describe("Tab ID to target"),
|
|
9692
|
+
index: z.number().describe("Element index from getReadableElements")
|
|
9552
9693
|
},
|
|
9553
9694
|
async ({ tabId, index }) => {
|
|
9554
|
-
await rpcClient.defer("
|
|
9695
|
+
await rpcClient.defer("clickOnReadableElement", tabId, index);
|
|
9555
9696
|
return {
|
|
9556
9697
|
content: [
|
|
9557
9698
|
{
|
|
@@ -9566,14 +9707,16 @@ server.tool(
|
|
|
9566
9707
|
"invokeJsFn",
|
|
9567
9708
|
[
|
|
9568
9709
|
combinationDescription,
|
|
9569
|
-
"
|
|
9570
|
-
"
|
|
9571
|
-
"
|
|
9572
|
-
"
|
|
9710
|
+
"\u26A0\uFE0F USE THIS TOOL AS A LAST RESORT ONLY.",
|
|
9711
|
+
"* Executes custom JavaScript code directly in the page context.",
|
|
9712
|
+
"* Only use when standard tools (clicking, text input) cannot accomplish the task.",
|
|
9713
|
+
"* The JavaScript function body must be self-contained and return a serializable value.",
|
|
9714
|
+
"* Useful for complex interactions, custom data extraction, or handling dynamic elements.",
|
|
9715
|
+
"* Example: scrolling, accessing hidden elements, or interacting with complex widgets."
|
|
9573
9716
|
].join("\n"),
|
|
9574
9717
|
{
|
|
9575
|
-
tabId: z.string().describe("Tab ID to
|
|
9576
|
-
fnBodyCode: z.string().describe("
|
|
9718
|
+
tabId: z.string().describe("Tab ID to run JavaScript in"),
|
|
9719
|
+
fnBodyCode: z.string().describe("JavaScript function body to execute in page context")
|
|
9577
9720
|
},
|
|
9578
9721
|
async ({ tabId, fnBodyCode }) => {
|
|
9579
9722
|
const result = await rpcClient.defer("invokeJsFn", tabId, fnBodyCode);
|