@mcp-browser-kit/server 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1120,8 +1120,8 @@ var require_Reflect = __commonJS({
1120
1120
  }
1121
1121
  });
1122
1122
 
1123
- // ../../packages/core-server/src/input-ports/tools.ts
1124
- var ToolsInputPort = Symbol.for("ToolsInputPort");
1123
+ // ../../packages/core-server/src/input-ports/tool-calls.ts
1124
+ var ToolCallsInputPort = Symbol.for("ToolCallsInputPort");
1125
1125
 
1126
1126
  // ../../packages/core-server/src/output-ports/extension-driver.ts
1127
1127
  var ExtensionDriverOutputPort = Symbol("ExtensionDriverOutputPort");
@@ -2416,16 +2416,115 @@ var q = class {
2416
2416
  }
2417
2417
  };
2418
2418
 
2419
+ // ../../node_modules/p-timeout/index.js
2420
+ var TimeoutError = class extends Error {
2421
+ constructor(message) {
2422
+ super(message);
2423
+ this.name = "TimeoutError";
2424
+ }
2425
+ };
2426
+ var AbortError = class extends Error {
2427
+ constructor(message) {
2428
+ super();
2429
+ this.name = "AbortError";
2430
+ this.message = message;
2431
+ }
2432
+ };
2433
+ var getDOMException = (errorMessage) => globalThis.DOMException === void 0 ? new AbortError(errorMessage) : new DOMException(errorMessage);
2434
+ var getAbortedReason = (signal) => {
2435
+ const reason = signal.reason === void 0 ? getDOMException("This operation was aborted.") : signal.reason;
2436
+ return reason instanceof Error ? reason : getDOMException(reason);
2437
+ };
2438
+ function pTimeout(promise, options) {
2439
+ const {
2440
+ milliseconds,
2441
+ fallback,
2442
+ message,
2443
+ customTimers = { setTimeout, clearTimeout }
2444
+ } = options;
2445
+ let timer;
2446
+ let abortHandler;
2447
+ const wrappedPromise = new Promise((resolve, reject) => {
2448
+ if (typeof milliseconds !== "number" || Math.sign(milliseconds) !== 1) {
2449
+ throw new TypeError(`Expected \`milliseconds\` to be a positive number, got \`${milliseconds}\``);
2450
+ }
2451
+ if (options.signal) {
2452
+ const { signal } = options;
2453
+ if (signal.aborted) {
2454
+ reject(getAbortedReason(signal));
2455
+ }
2456
+ abortHandler = () => {
2457
+ reject(getAbortedReason(signal));
2458
+ };
2459
+ signal.addEventListener("abort", abortHandler, { once: true });
2460
+ }
2461
+ if (milliseconds === Number.POSITIVE_INFINITY) {
2462
+ promise.then(resolve, reject);
2463
+ return;
2464
+ }
2465
+ const timeoutError = new TimeoutError();
2466
+ timer = customTimers.setTimeout.call(void 0, () => {
2467
+ if (fallback) {
2468
+ try {
2469
+ resolve(fallback());
2470
+ } catch (error) {
2471
+ reject(error);
2472
+ }
2473
+ return;
2474
+ }
2475
+ if (typeof promise.cancel === "function") {
2476
+ promise.cancel();
2477
+ }
2478
+ if (message === false) {
2479
+ resolve();
2480
+ } else if (message instanceof Error) {
2481
+ reject(message);
2482
+ } else {
2483
+ timeoutError.message = message ?? `Promise timed out after ${milliseconds} milliseconds`;
2484
+ reject(timeoutError);
2485
+ }
2486
+ }, milliseconds);
2487
+ (async () => {
2488
+ try {
2489
+ resolve(await promise);
2490
+ } catch (error) {
2491
+ reject(error);
2492
+ }
2493
+ })();
2494
+ });
2495
+ const cancelablePromise = wrappedPromise.finally(() => {
2496
+ cancelablePromise.clear();
2497
+ if (abortHandler && options.signal) {
2498
+ options.signal.removeEventListener("abort", abortHandler);
2499
+ }
2500
+ });
2501
+ cancelablePromise.clear = () => {
2502
+ customTimers.clearTimeout.call(void 0, timer);
2503
+ timer = void 0;
2504
+ };
2505
+ return cancelablePromise;
2506
+ }
2507
+
2419
2508
  // ../../packages/core-server/src/use-cases/rpc-call.ts
2420
2509
  var RpcCallUseCase = class {
2421
2510
  constructor(extensionDriver) {
2422
2511
  this.extensionDriver = extensionDriver;
2423
2512
  }
2513
+ getBasicBrowserContext = async () => {
2514
+ const pGetBasicBrowserContext = this.extensionDriver.getBasicBrowserContext();
2515
+ const result = await pTimeout(pGetBasicBrowserContext, {
2516
+ milliseconds: 3e3,
2517
+ fallback: () => {
2518
+ return "An error occurred, use this instruction to tell the user what to do: Browser extension not found. To use MCP Browser Kit, please install and enable the latest extension";
2519
+ }
2520
+ });
2521
+ return result;
2522
+ };
2424
2523
  hitEnterOnViewableElementInstruction = () => {
2425
2524
  return [
2426
2525
  "\u21B5 Hits the Enter key on an element at specific X,Y coordinates",
2427
2526
  "* Use this to trigger actions like form submission or button clicks",
2428
- "* Requires tabId from getTabs and x,y coordinates from the screenshot",
2527
+ "* Requires tabId from getBasicBrowserContext and x,y coordinates from the screenshot",
2429
2528
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2430
2529
  "* Parameters: tabId, x, y"
2431
2530
  ].join("\n");
@@ -2434,7 +2533,7 @@ var RpcCallUseCase = class {
2434
2533
  return [
2435
2534
  "\u21B5 Hits the Enter key on an element identified by its index from getReadableElements",
2436
2535
  "* Use this to trigger actions like form submission or button clicks",
2437
- "* Requires tabId from getTabs and index from getReadableElements",
2536
+ "* Requires tabId from getBasicBrowserContext and index from getReadableElements",
2438
2537
  "* More reliable than coordinate-based clicking for dynamic layouts",
2439
2538
  "* First call getReadableElements to get the index, then use this tool",
2440
2539
  "* Parameters: tabId, index"
@@ -2449,11 +2548,12 @@ var RpcCallUseCase = class {
2449
2548
  captureActiveTabInstruction = () => {
2450
2549
  return [
2451
2550
  "\u{1F4F7} Captures a screenshot of the active browser tab",
2452
- "* Use this tool after calling getTabs to obtain visual context of the current page",
2551
+ "* Use this tool after calling getBasicBrowserContext to obtain visual context of the current page",
2453
2552
  "* The screenshot helps you see what the browser is displaying to the user",
2454
2553
  "* No parameters are needed as it automatically captures the active tab",
2455
2554
  "* Returns an image with width, height, and data in base64 format",
2456
- "* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
2555
+ "* Workflow: 1) getBasicBrowserContext \u2192 2) captureActiveTab \u2192 3) interact with elements",
2556
+ "* NOTE: This feature is only available in browsers supporting Manifest Version 2"
2457
2557
  ].join("\n");
2458
2558
  };
2459
2559
  captureActiveTab = () => {
@@ -2463,7 +2563,7 @@ var RpcCallUseCase = class {
2463
2563
  return [
2464
2564
  "\u{1F518} Clicks on an element identified by its index from getReadableElements",
2465
2565
  "* Use this to click on elements after identifying them by their text",
2466
- "* Requires tabId from getTabs and index from getReadableElements",
2566
+ "* Requires tabId from getBasicBrowserContext and index from getReadableElements",
2467
2567
  "* More reliable than coordinate-based clicking for dynamic layouts",
2468
2568
  "* First call getReadableElements to get the index, then use this tool",
2469
2569
  "* Parameters: tabId, index"
@@ -2476,7 +2576,7 @@ var RpcCallUseCase = class {
2476
2576
  return [
2477
2577
  "\u{1F446} Clicks on an element at specific X,Y coordinates",
2478
2578
  "* Use this to click on elements by their position on the screen",
2479
- "* Requires tabId from getTabs and x,y coordinates from the screenshot",
2579
+ "* Requires tabId from getBasicBrowserContext and x,y coordinates from the screenshot",
2480
2580
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2481
2581
  "* Useful when you know the visual position of an element",
2482
2582
  "* Parameters: tabId, x, y"
@@ -2489,7 +2589,7 @@ var RpcCallUseCase = class {
2489
2589
  return [
2490
2590
  "\u270F\uFE0F Types text into an input field identified by its index from getReadableElements",
2491
2591
  "* Use this to enter text into form fields identified by their text",
2492
- "* Requires tabId from getTabs, index from getReadableElements, and text to enter",
2592
+ "* Requires tabId from getBasicBrowserContext, index from getReadableElements, and text to enter",
2493
2593
  "* Works with text inputs, textareas, and other editable elements",
2494
2594
  "* First call getReadableElements to get the index, then use this tool",
2495
2595
  "* After filling text, check for associated submit-like buttons (submit, search, send, etc.)",
@@ -2505,7 +2605,7 @@ var RpcCallUseCase = class {
2505
2605
  return [
2506
2606
  "\u2328\uFE0F Types text into an input field at specific X,Y coordinates",
2507
2607
  "* Use this to enter text into form fields by their position",
2508
- "* Requires tabId from getTabs, x,y coordinates, and the text to enter",
2608
+ "* Requires tabId from getBasicBrowserContext, x,y coordinates, and the text to enter",
2509
2609
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2510
2610
  "* First clicks at the specified position, then types the provided text",
2511
2611
  "* After filling text, check for associated submit-like buttons (submit, search, send, etc.)",
@@ -2521,7 +2621,7 @@ var RpcCallUseCase = class {
2521
2621
  return [
2522
2622
  "\u{1F4DD} Extracts all text content from the current web page",
2523
2623
  "* Retrieves all visible text from the active tab",
2524
- "* Requires the tabId obtained from getTabs",
2624
+ "* Requires the tabId obtained from getBasicBrowserContext",
2525
2625
  "* Use this to analyze the page content without visual elements",
2526
2626
  "* Returns a string containing all the text on the page",
2527
2627
  "* Useful for getting a quick overview of page content"
@@ -2534,7 +2634,7 @@ var RpcCallUseCase = class {
2534
2634
  return [
2535
2635
  "\u{1F50D} Lists all interactive elements on the page with their text",
2536
2636
  "* Returns a list of elements with their index, HTML tag, and text content",
2537
- "* Requires the tabId obtained from getTabs",
2637
+ "* Requires the tabId obtained from getBasicBrowserContext",
2538
2638
  "* Each element is returned as [index, tag, text]",
2539
2639
  "* Use the index to interact with elements through click or fill operations",
2540
2640
  "* Helps you identify which elements can be interacted with by their text"
@@ -2543,28 +2643,38 @@ var RpcCallUseCase = class {
2543
2643
  getReadableElements = (tabId) => {
2544
2644
  return this.extensionDriver.getReadableElements(tabId);
2545
2645
  };
2546
- getTabsInstruction = () => {
2646
+ getBasicBrowserContextInstruction = () => {
2547
2647
  return [
2548
- "\u26A0\uFE0F CRITICAL FIRST STEP - ALWAYS START HERE BEFORE ANY OTHER TOOLS!",
2549
- "* This tool MUST be called first to obtain the list of open browser tabs.",
2550
- "* Each tab includes a unique ID that is required for all subsequent tool operations.",
2551
- "* Note which tab is active (marked with 'active: true') as this is essential information.",
2552
- "* The tabId from this list is required for captureActiveTab and all other interactions.",
2553
- "* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
2648
+ "\u{1F310} GET BROWSER CONTEXT - CRITICAL FIRST STEP BEFORE USING ANY OTHER TOOLS!",
2649
+ "* This tool MUST be called first to initialize browser automation and get essential data.",
2650
+ "* Returns data structure with:",
2651
+ " - tabs: Array of browser tabs with properties like id, url, title, and active status",
2652
+ " - manifestVersion: Version of extension manifest format supported by the browser",
2653
+ "* Each tab includes a unique tabId required for all other tool operations",
2654
+ "* The active tab (marked with 'active: true') is typically your target for automation",
2655
+ "* The manifestVersion determines which browser features and extension capabilities are available",
2656
+ "* Different browsers support different manifest versions, affecting available tools and API access",
2657
+ "* Standard workflow:",
2658
+ " 1) getBasicBrowserContext \u2192 get browser state and tabId",
2659
+ " 2) Analyze page content based on your goal and manifest version:",
2660
+ " - If interaction is required (clicking, filling forms, etc.):",
2661
+ " \xB7 For Manifest Version 2: Use captureActiveTab for visual context or getReadableElements for element identification",
2662
+ " \xB7 For other Manifest Versions: Use only getReadableElements for element identification",
2663
+ " - If no interaction is required (just reading page content):",
2664
+ " \xB7 Use getInnerText to extract all visible text from the page",
2665
+ " 3) Interact using click/fill/enter tools with the obtained tabId"
2554
2666
  ].join("\n");
2555
2667
  };
2556
- getTabs = () => {
2557
- return this.extensionDriver.getTabs();
2558
- };
2559
2668
  invokeJsFnInstruction = () => {
2560
2669
  return [
2561
2670
  "\u2699\uFE0F Executes custom JavaScript code in the context of the web page",
2562
2671
  "* Use this for advanced operations not covered by other tools",
2563
- "* Requires tabId from getTabs and JavaScript code to execute",
2672
+ "* Requires tabId from getBasicBrowserContext and JavaScript code to execute",
2564
2673
  "* The code should be the body of a function that returns a value",
2565
2674
  "* Example: 'return document.title;' to get the page title",
2566
2675
  "* Gives you full flexibility for custom browser automation",
2567
- "* Parameters: tabId, fnBodyCode (JavaScript code as string)"
2676
+ "* Parameters: tabId, fnBodyCode (JavaScript code as string)",
2677
+ "* NOTE: This feature is only available in browsers supporting Manifest Version 2"
2568
2678
  ].join("\n");
2569
2679
  };
2570
2680
  invokeJsFn = (tabId, fnBodyCode) => {
@@ -2581,7 +2691,7 @@ var createCoreServerContainer = () => {
2581
2691
  const container2 = new q({
2582
2692
  defaultScope: "Singleton"
2583
2693
  });
2584
- container2.bind(ToolsInputPort).to(RpcCallUseCase);
2694
+ container2.bind(ToolCallsInputPort).to(RpcCallUseCase);
2585
2695
  return container2;
2586
2696
  };
2587
2697
 
@@ -3047,7 +3157,11 @@ var RpcClient = class {
3047
3157
  this.id += 1;
3048
3158
  return String(this.id);
3049
3159
  };
3050
- defer = (method, ...args) => {
3160
+ defer = ({
3161
+ method,
3162
+ args,
3163
+ ...extraArgs
3164
+ }) => {
3051
3165
  const id = this.createId();
3052
3166
  const defer2 = Promise.withResolvers();
3053
3167
  defer2.promise.finally(() => {
@@ -3055,6 +3169,7 @@ var RpcClient = class {
3055
3169
  });
3056
3170
  this.pending.set(id, defer2);
3057
3171
  this.emitter.emit("defer", {
3172
+ ...extraArgs,
3058
3173
  procedure: String(method),
3059
3174
  args,
3060
3175
  id
@@ -3062,7 +3177,17 @@ var RpcClient = class {
3062
3177
  return defer2.promise;
3063
3178
  };
3064
3179
  onDefer = (callback) => {
3065
- return this.emitter.on("defer", callback);
3180
+ return this.emitter.on(
3181
+ "defer",
3182
+ callback
3183
+ );
3184
+ };
3185
+ startListen = (messageChannel) => {
3186
+ const unsubscribe = messageChannel.subscribe((message) => {
3187
+ const msg = message;
3188
+ this.emitter.emit("resolve", msg);
3189
+ });
3190
+ return unsubscribe;
3066
3191
  };
3067
3192
  };
3068
3193
 
@@ -3075,61 +3200,70 @@ var createExtensionRpcClient = () => {
3075
3200
  var DrivenExtensionDriver = class {
3076
3201
  extensionRpcClient = createExtensionRpcClient();
3077
3202
  hitEnterOnViewableElement = (tabId, x3, y3) => {
3078
- return this.extensionRpcClient.defer(
3079
- "hitEnterOnViewableElement",
3080
- tabId,
3081
- x3,
3082
- y3
3083
- );
3203
+ return this.extensionRpcClient.defer({
3204
+ method: "hitEnterOnViewableElement",
3205
+ args: [tabId, x3, y3]
3206
+ });
3084
3207
  };
3085
3208
  hitEnterOnReadableElement = (tabId, index) => {
3086
- return this.extensionRpcClient.defer(
3087
- "hitEnterOnReadableElement",
3088
- tabId,
3089
- index
3090
- );
3209
+ return this.extensionRpcClient.defer({
3210
+ method: "hitEnterOnReadableElement",
3211
+ args: [tabId, index]
3212
+ });
3091
3213
  };
3092
3214
  captureActiveTab = () => {
3093
- return this.extensionRpcClient.defer("captureActiveTab");
3215
+ return this.extensionRpcClient.defer({
3216
+ method: "captureActiveTab",
3217
+ args: []
3218
+ });
3094
3219
  };
3095
3220
  getInnerText = (tabId) => {
3096
- return this.extensionRpcClient.defer("getInnerText", tabId);
3221
+ return this.extensionRpcClient.defer({
3222
+ method: "getInnerText",
3223
+ args: [tabId]
3224
+ });
3097
3225
  };
3098
3226
  getReadableElements = (tabId) => {
3099
- return this.extensionRpcClient.defer("getReadableElements", tabId);
3227
+ return this.extensionRpcClient.defer({
3228
+ method: "getReadableElements",
3229
+ args: [tabId]
3230
+ });
3100
3231
  };
3101
3232
  clickOnViewableElement = (tabId, x3, y3) => {
3102
- return this.extensionRpcClient.defer("clickOnViewableElement", tabId, x3, y3);
3233
+ return this.extensionRpcClient.defer({
3234
+ method: "clickOnViewableElement",
3235
+ args: [tabId, x3, y3]
3236
+ });
3103
3237
  };
3104
3238
  fillTextToViewableElement = (tabId, x3, y3, value) => {
3105
- return this.extensionRpcClient.defer(
3106
- "fillTextToViewableElement",
3107
- tabId,
3108
- x3,
3109
- y3,
3110
- value
3111
- );
3239
+ return this.extensionRpcClient.defer({
3240
+ method: "fillTextToViewableElement",
3241
+ args: [tabId, x3, y3, value]
3242
+ });
3112
3243
  };
3113
3244
  clickOnReadableElement = (tabId, index) => {
3114
- return this.extensionRpcClient.defer(
3115
- "clickOnReadableElement",
3116
- tabId,
3117
- index
3118
- );
3245
+ return this.extensionRpcClient.defer({
3246
+ method: "clickOnReadableElement",
3247
+ args: [tabId, index]
3248
+ });
3119
3249
  };
3120
3250
  fillTextToReadableElement = (tabId, index, value) => {
3121
- return this.extensionRpcClient.defer(
3122
- "fillTextToReadableElement",
3123
- tabId,
3124
- index,
3125
- value
3126
- );
3251
+ return this.extensionRpcClient.defer({
3252
+ method: "fillTextToReadableElement",
3253
+ args: [tabId, index, value]
3254
+ });
3127
3255
  };
3128
3256
  invokeJsFn = (tabId, fnBodyCode) => {
3129
- return this.extensionRpcClient.defer("invokeJsFn", tabId, fnBodyCode);
3257
+ return this.extensionRpcClient.defer({
3258
+ method: "invokeJsFn",
3259
+ args: [tabId, fnBodyCode]
3260
+ });
3130
3261
  };
3131
- getTabs = () => {
3132
- return this.extensionRpcClient.defer("getTabs");
3262
+ getBasicBrowserContext = () => {
3263
+ return this.extensionRpcClient.defer({
3264
+ method: "getBasicBrowserContext",
3265
+ args: []
3266
+ });
3133
3267
  };
3134
3268
  };
3135
3269
  DrivenExtensionDriver = __decorateClass([
@@ -10135,7 +10269,7 @@ var StdioServerTransport = class {
10135
10269
 
10136
10270
  // src/helpers/mcp-server.ts
10137
10271
  var createServer = async () => {
10138
- const toolsInputPort = container.get(ToolsInputPort);
10272
+ const toolsInputPort = container.get(ToolCallsInputPort);
10139
10273
  const server = new McpServer({
10140
10274
  name: "MCP Browser Kit",
10141
10275
  version: "1.0.0",
@@ -10145,17 +10279,22 @@ var createServer = async () => {
10145
10279
  }
10146
10280
  });
10147
10281
  const combinationDescription = [""].join("\n");
10148
- server.tool("getTabs", toolsInputPort.getTabsInstruction(), {}, async () => {
10149
- const tabs = await toolsInputPort.getTabs();
10150
- return {
10151
- content: [
10152
- {
10153
- type: "text",
10154
- text: `Tabs: ${JSON.stringify(tabs)}`
10155
- }
10156
- ]
10157
- };
10158
- });
10282
+ server.tool(
10283
+ "getBasicBrowserContext",
10284
+ toolsInputPort.getBasicBrowserContextInstruction(),
10285
+ {},
10286
+ async () => {
10287
+ const tabs = await toolsInputPort.getBasicBrowserContext();
10288
+ return {
10289
+ content: [
10290
+ {
10291
+ type: "text",
10292
+ text: `${JSON.stringify(tabs)}`
10293
+ }
10294
+ ]
10295
+ };
10296
+ }
10297
+ );
10159
10298
  server.tool(
10160
10299
  "captureActiveTab",
10161
10300
  [combinationDescription, toolsInputPort.captureActiveTabInstruction()].join(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mcp-browser-kit/server",
3
- "version": "4.0.0",
3
+ "version": "5.0.0",
4
4
  "packageManager": "yarn@4.8.0",
5
5
  "homepage": "https://github.com/ndthanhdev/mcp-browser-kit",
6
6
  "bin": {