@mcp-browser-kit/server 2.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +152 -30
- package/dist/main.js.map +1 -1
- package/package.json +3 -1
package/dist/main.js
CHANGED
|
@@ -6524,6 +6524,7 @@ var startTRpcServer = () => {
|
|
|
6524
6524
|
};
|
|
6525
6525
|
process.on("SIGTERM", shutdown);
|
|
6526
6526
|
process.on("SIGINT", shutdown);
|
|
6527
|
+
process.stdin.on("close", shutdown);
|
|
6527
6528
|
};
|
|
6528
6529
|
|
|
6529
6530
|
// ../../node_modules/@modelcontextprotocol/sdk/dist/esm/types.js
|
|
@@ -9474,9 +9475,12 @@ server.tool(
|
|
|
9474
9475
|
"getTabs",
|
|
9475
9476
|
[
|
|
9476
9477
|
combinationDescription,
|
|
9477
|
-
"
|
|
9478
|
-
"
|
|
9479
|
-
"
|
|
9478
|
+
"\u26A0\uFE0F CRITICAL FIRST STEP - ALWAYS START HERE BEFORE ANY OTHER TOOLS!",
|
|
9479
|
+
"* This tool MUST be called first to obtain the list of open browser tabs.",
|
|
9480
|
+
"* Each tab includes a unique ID that is required for all subsequent tool operations.",
|
|
9481
|
+
"* Note which tab is active (marked with 'active: true') as this is essential information.",
|
|
9482
|
+
"* The tabId from this list is required for captureActiveTab and all other interactions.",
|
|
9483
|
+
"* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
|
|
9480
9484
|
].join("\n"),
|
|
9481
9485
|
{},
|
|
9482
9486
|
async () => {
|
|
@@ -9491,15 +9495,55 @@ server.tool(
|
|
|
9491
9495
|
};
|
|
9492
9496
|
}
|
|
9493
9497
|
);
|
|
9498
|
+
server.tool(
|
|
9499
|
+
"captureActiveTab",
|
|
9500
|
+
[
|
|
9501
|
+
combinationDescription,
|
|
9502
|
+
"\u26A0\uFE0F SECOND REQUIRED STEP - AFTER getTabs AND BEFORE ANY INTERACTION!",
|
|
9503
|
+
"* SEQUENCE: First call getTabs to get tabId \u2192 Then use captureActiveTab with that tabId",
|
|
9504
|
+
"* IMPORTANT: This tool ONLY works for the ACTIVE tab (marked with 'active: true' in getTabs results).",
|
|
9505
|
+
"* If you need to work with an INACTIVE tab, use the Readable tools instead (getReadableElements + clickOnReadableElement).",
|
|
9506
|
+
"* ALWAYS capture a screenshot before attempting interaction with the active tab.",
|
|
9507
|
+
"* FOR ACTIVE TAB, VISIBLE ELEMENTS: Use coordinate-based Viewable tools (clickOnViewableElement, fillTextToViewableElement).",
|
|
9508
|
+
"* FOR INACTIVE TABS or HIDDEN ELEMENTS: Use Readable tools (getReadableElements + clickOnReadableElement/fillTextToReadableElement).",
|
|
9509
|
+
"* DECISION RULE: Active tab visible elements \u2192 Viewable tools. Inactive tabs or hidden elements \u2192 Readable tools.",
|
|
9510
|
+
"* Returns visual context showing exactly where form fields, buttons, and other UI elements are located.",
|
|
9511
|
+
"* After any page change or navigation, YOU MUST capture a new screenshot before further interactions."
|
|
9512
|
+
].join("\n"),
|
|
9513
|
+
{},
|
|
9514
|
+
async () => {
|
|
9515
|
+
const screenshot = await rpcClient.defer("captureActiveTab");
|
|
9516
|
+
return {
|
|
9517
|
+
content: [
|
|
9518
|
+
{
|
|
9519
|
+
type: "text",
|
|
9520
|
+
text: `Screenshot size [${screenshot.width}x${screenshot.height}] - Use these dimensions to calculate exact pixel coordinates for clicking and text entry`
|
|
9521
|
+
},
|
|
9522
|
+
{
|
|
9523
|
+
type: "image",
|
|
9524
|
+
mimeType: screenshot.mimeType,
|
|
9525
|
+
data: screenshot.data
|
|
9526
|
+
}
|
|
9527
|
+
]
|
|
9528
|
+
};
|
|
9529
|
+
}
|
|
9530
|
+
);
|
|
9494
9531
|
server.tool(
|
|
9495
9532
|
"getInnerText",
|
|
9496
9533
|
[
|
|
9497
9534
|
combinationDescription,
|
|
9498
|
-
"
|
|
9499
|
-
"
|
|
9535
|
+
"\u26A0\uFE0F FASTEST & MOST EFFICIENT TEXT EXTRACTION TOOL",
|
|
9536
|
+
"* PREFERRED FIRST CHOICE for any task that only needs to read text (no interaction required).",
|
|
9537
|
+
"* Much faster and more efficient than capturing screenshots for text-only operations.",
|
|
9538
|
+
"* Extracts all readable text content from the specified tab in a single call.",
|
|
9539
|
+
"* Ideal for: content analysis, information extraction, summarization, and search tasks.",
|
|
9540
|
+
"* Perfect for generating suggestions, answering questions, or analyzing page content.",
|
|
9541
|
+
"* Use this BEFORE screenshot capture when you only need to understand text context.",
|
|
9542
|
+
"* Works on any tab using tabId from getTabs, not just active tabs.",
|
|
9543
|
+
"* WARNING: This text extraction cannot be used for direct element interaction."
|
|
9500
9544
|
].join("\n"),
|
|
9501
9545
|
{
|
|
9502
|
-
tabId: z.string().describe("Tab ID to
|
|
9546
|
+
tabId: z.string().describe("Tab ID to extract text from")
|
|
9503
9547
|
},
|
|
9504
9548
|
async ({ tabId }) => {
|
|
9505
9549
|
const innerText = await rpcClient.defer("getInnerText", tabId);
|
|
@@ -9517,11 +9561,14 @@ server.tool(
|
|
|
9517
9561
|
"getReadableElements",
|
|
9518
9562
|
[
|
|
9519
9563
|
combinationDescription,
|
|
9520
|
-
"
|
|
9521
|
-
"
|
|
9564
|
+
"* Returns an indexed list of all interactive elements in the format: [index, HTML tag, accessible text].",
|
|
9565
|
+
"* This creates a map of elements you can interact with programmatically.",
|
|
9566
|
+
"* The element indexes can be used with clickOnReadableElement and fillTextToReadableElement.",
|
|
9567
|
+
"* Ideal for forms, navigation menus, and interactive page components.",
|
|
9568
|
+
"* Use with tabId from getTabs to target specific tabs."
|
|
9522
9569
|
].join("\n"),
|
|
9523
9570
|
{
|
|
9524
|
-
tabId: z.string().describe("Tab ID to
|
|
9571
|
+
tabId: z.string().describe("Tab ID to extract elements from")
|
|
9525
9572
|
},
|
|
9526
9573
|
async ({ tabId }) => {
|
|
9527
9574
|
const elements = await rpcClient.defer("getReadableElements", tabId);
|
|
@@ -9536,19 +9583,88 @@ server.tool(
|
|
|
9536
9583
|
}
|
|
9537
9584
|
);
|
|
9538
9585
|
server.tool(
|
|
9539
|
-
"
|
|
9586
|
+
"clickOnViewableElement",
|
|
9587
|
+
[
|
|
9588
|
+
combinationDescription,
|
|
9589
|
+
"\u26A0\uFE0F PREFERRED TOOL - Use this FIRST for ANY element visible in the screenshot from captureActiveTab!",
|
|
9590
|
+
"* Works on the ACTIVE tab for any element you can SEE in the viewport screenshot.",
|
|
9591
|
+
"* FIRST CHOICE: Always prefer this tool over clickOnReadableElement when the target is visible.",
|
|
9592
|
+
"* Simulates a mouse click at the exact (x,y) coordinates specified.",
|
|
9593
|
+
"* Use captureActiveTab \u2192 identify target element \u2192 determine its CENTER coordinates \u2192 use this tool.",
|
|
9594
|
+
"* Calculate the center by finding the midpoint of the element's width and height.",
|
|
9595
|
+
"* For buttons and links, always aim for the center to ensure proper click registration.",
|
|
9596
|
+
"* If this tool fails or element is outside viewport, THEN try clickOnReadableElement as a fallback.",
|
|
9597
|
+
"* After clicking, capture another screenshot to verify the action succeeded."
|
|
9598
|
+
].join("\n"),
|
|
9599
|
+
{
|
|
9600
|
+
tabId: z.string().describe("Tab ID of the active tab"),
|
|
9601
|
+
x: z.number().describe("X coordinate (pixels) of the element to click"),
|
|
9602
|
+
y: z.number().describe("Y coordinate (pixels) of the element to click")
|
|
9603
|
+
},
|
|
9604
|
+
async ({ tabId, x, y }) => {
|
|
9605
|
+
await rpcClient.defer("clickOnViewableElement", tabId, x, y);
|
|
9606
|
+
return {
|
|
9607
|
+
content: [
|
|
9608
|
+
{
|
|
9609
|
+
type: "text",
|
|
9610
|
+
text: "Done"
|
|
9611
|
+
}
|
|
9612
|
+
]
|
|
9613
|
+
};
|
|
9614
|
+
}
|
|
9615
|
+
);
|
|
9616
|
+
server.tool(
|
|
9617
|
+
"fillTextToViewableElement",
|
|
9618
|
+
[
|
|
9619
|
+
combinationDescription,
|
|
9620
|
+
"\u26A0\uFE0F PREFERRED TOOL - Use this FIRST for ANY input field visible in the screenshot from captureActiveTab!",
|
|
9621
|
+
"* Works on the ACTIVE tab for any input field you can SEE in the viewport screenshot.",
|
|
9622
|
+
"* FIRST CHOICE: Always prefer this tool over fillTextToReadableElement when the input field is visible.",
|
|
9623
|
+
"* Sets text value for an input element at the specified (x,y) coordinates.",
|
|
9624
|
+
"* Use captureActiveTab \u2192 identify input field \u2192 determine its CENTER coordinates \u2192 use this tool.",
|
|
9625
|
+
"* Calculate the center by finding the midpoint of the input field's width and height.",
|
|
9626
|
+
"* Clicking on the center ensures the field is properly selected before text entry.",
|
|
9627
|
+
"* If this tool fails or input field is outside viewport, THEN try fillTextToReadableElement as a fallback.",
|
|
9628
|
+
"* For multi-step forms, fill all inputs before submitting the form."
|
|
9629
|
+
].join("\n"),
|
|
9630
|
+
{
|
|
9631
|
+
tabId: z.string().describe("Tab ID of the active tab"),
|
|
9632
|
+
x: z.number().describe("X coordinate (pixels) of the input element"),
|
|
9633
|
+
y: z.number().describe("Y coordinate (pixels) of the input element"),
|
|
9634
|
+
value: z.string().describe("Text to enter into the input field")
|
|
9635
|
+
},
|
|
9636
|
+
async ({ tabId, x, y, value }) => {
|
|
9637
|
+
await rpcClient.defer("fillTextToViewableElement", tabId, x, y, value);
|
|
9638
|
+
return {
|
|
9639
|
+
content: [
|
|
9640
|
+
{
|
|
9641
|
+
type: "text",
|
|
9642
|
+
text: "Done"
|
|
9643
|
+
}
|
|
9644
|
+
]
|
|
9645
|
+
};
|
|
9646
|
+
}
|
|
9647
|
+
);
|
|
9648
|
+
server.tool(
|
|
9649
|
+
"fillTextToReadableElement",
|
|
9540
9650
|
[
|
|
9541
9651
|
combinationDescription,
|
|
9542
|
-
"
|
|
9543
|
-
"
|
|
9652
|
+
"\u26A0\uFE0F FALLBACK TOOL - Only use when fillTextToViewableElement cannot help!",
|
|
9653
|
+
"* Use this tool ONLY if fillTextToViewableElement failed or the input field is not visible.",
|
|
9654
|
+
"* Acts as a direct fallback when coordinate-based interaction with visible elements doesn't work.",
|
|
9655
|
+
"* Sets text value for an input element identified by its index from getReadableElements.",
|
|
9656
|
+
"* Works on any tab, not just the active one.",
|
|
9657
|
+
"* Run getReadableElements first to obtain the correct element index.",
|
|
9658
|
+
"* Use when form fields are not visible without scrolling or are in iframes/embedded content.",
|
|
9659
|
+
"* Also effective for cases where coordinate-based interaction failed or is unreliable."
|
|
9544
9660
|
].join("\n"),
|
|
9545
9661
|
{
|
|
9546
|
-
tabId: z.string().describe("Tab ID to
|
|
9547
|
-
index: z.number().describe("
|
|
9548
|
-
value: z.string().describe("
|
|
9662
|
+
tabId: z.string().describe("Tab ID to target"),
|
|
9663
|
+
index: z.number().describe("Element index from getReadableElements"),
|
|
9664
|
+
value: z.string().describe("Text to enter into the input field")
|
|
9549
9665
|
},
|
|
9550
9666
|
async ({ tabId, index, value }) => {
|
|
9551
|
-
await rpcClient.defer("
|
|
9667
|
+
await rpcClient.defer("fillTextToReadableElement", tabId, index, value);
|
|
9552
9668
|
return {
|
|
9553
9669
|
content: [
|
|
9554
9670
|
{
|
|
@@ -9560,20 +9676,24 @@ server.tool(
|
|
|
9560
9676
|
}
|
|
9561
9677
|
);
|
|
9562
9678
|
server.tool(
|
|
9563
|
-
"
|
|
9679
|
+
"clickOnReadableElement",
|
|
9564
9680
|
[
|
|
9565
9681
|
combinationDescription,
|
|
9566
|
-
"
|
|
9567
|
-
"
|
|
9568
|
-
"
|
|
9569
|
-
"
|
|
9682
|
+
"\u26A0\uFE0F FALLBACK TOOL - Only use when clickOnViewableElement cannot help!",
|
|
9683
|
+
"* Use this tool ONLY if clickOnViewableElement failed or the target element is not visible.",
|
|
9684
|
+
"* Acts as a direct fallback when coordinate-based clicking on visible elements doesn't work.",
|
|
9685
|
+
"* Clicks on an element identified by its index from getReadableElements.",
|
|
9686
|
+
"* Works on any tab, not just the active one.",
|
|
9687
|
+
"* Run getReadableElements first to obtain the correct element index.",
|
|
9688
|
+
"* Use when buttons/links are not visible without scrolling or are in iframes/embedded content.",
|
|
9689
|
+
"* Also effective for cases where coordinate-based clicking failed or is unreliable."
|
|
9570
9690
|
].join("\n"),
|
|
9571
9691
|
{
|
|
9572
|
-
tabId: z.string().describe("Tab ID to
|
|
9573
|
-
index: z.number().describe("
|
|
9692
|
+
tabId: z.string().describe("Tab ID to target"),
|
|
9693
|
+
index: z.number().describe("Element index from getReadableElements")
|
|
9574
9694
|
},
|
|
9575
9695
|
async ({ tabId, index }) => {
|
|
9576
|
-
await rpcClient.defer("
|
|
9696
|
+
await rpcClient.defer("clickOnReadableElement", tabId, index);
|
|
9577
9697
|
return {
|
|
9578
9698
|
content: [
|
|
9579
9699
|
{
|
|
@@ -9588,14 +9708,16 @@ server.tool(
|
|
|
9588
9708
|
"invokeJsFn",
|
|
9589
9709
|
[
|
|
9590
9710
|
combinationDescription,
|
|
9591
|
-
"
|
|
9592
|
-
"
|
|
9593
|
-
"
|
|
9594
|
-
"
|
|
9711
|
+
"\u26A0\uFE0F USE THIS TOOL AS A LAST RESORT ONLY.",
|
|
9712
|
+
"* Executes custom JavaScript code directly in the page context.",
|
|
9713
|
+
"* Only use when standard tools (clicking, text input) cannot accomplish the task.",
|
|
9714
|
+
"* The JavaScript function body must be self-contained and return a serializable value.",
|
|
9715
|
+
"* Useful for complex interactions, custom data extraction, or handling dynamic elements.",
|
|
9716
|
+
"* Example: scrolling, accessing hidden elements, or interacting with complex widgets."
|
|
9595
9717
|
].join("\n"),
|
|
9596
9718
|
{
|
|
9597
|
-
tabId: z.string().describe("Tab ID to
|
|
9598
|
-
fnBodyCode: z.string().describe("
|
|
9719
|
+
tabId: z.string().describe("Tab ID to run JavaScript in"),
|
|
9720
|
+
fnBodyCode: z.string().describe("JavaScript function body to execute in page context")
|
|
9599
9721
|
},
|
|
9600
9722
|
async ({ tabId, fnBodyCode }) => {
|
|
9601
9723
|
const result = await rpcClient.defer("invokeJsFn", tabId, fnBodyCode);
|