@mcp-browser-kit/server 4.0.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1120,8 +1120,8 @@ var require_Reflect = __commonJS({
1120
1120
  }
1121
1121
  });
1122
1122
 
1123
- // ../../packages/core-server/src/input-ports/tools.ts
1124
- var ToolsInputPort = Symbol.for("ToolsInputPort");
1123
+ // ../../packages/core-server/src/input-ports/tool-calls.ts
1124
+ var ToolCallsInputPort = Symbol.for("ToolCallsInputPort");
1125
1125
 
1126
1126
  // ../../packages/core-server/src/output-ports/extension-driver.ts
1127
1127
  var ExtensionDriverOutputPort = Symbol("ExtensionDriverOutputPort");
@@ -2416,16 +2416,118 @@ var q = class {
2416
2416
  }
2417
2417
  };
2418
2418
 
2419
+ // ../../node_modules/p-timeout/index.js
2420
+ var TimeoutError = class extends Error {
2421
+ constructor(message) {
2422
+ super(message);
2423
+ this.name = "TimeoutError";
2424
+ }
2425
+ };
2426
+ var AbortError = class extends Error {
2427
+ constructor(message) {
2428
+ super();
2429
+ this.name = "AbortError";
2430
+ this.message = message;
2431
+ }
2432
+ };
2433
+ var getDOMException = (errorMessage) => globalThis.DOMException === void 0 ? new AbortError(errorMessage) : new DOMException(errorMessage);
2434
+ var getAbortedReason = (signal) => {
2435
+ const reason = signal.reason === void 0 ? getDOMException("This operation was aborted.") : signal.reason;
2436
+ return reason instanceof Error ? reason : getDOMException(reason);
2437
+ };
2438
+ function pTimeout(promise, options) {
2439
+ const {
2440
+ milliseconds,
2441
+ fallback,
2442
+ message,
2443
+ customTimers = { setTimeout, clearTimeout }
2444
+ } = options;
2445
+ let timer;
2446
+ let abortHandler;
2447
+ const wrappedPromise = new Promise((resolve, reject) => {
2448
+ if (typeof milliseconds !== "number" || Math.sign(milliseconds) !== 1) {
2449
+ throw new TypeError(`Expected \`milliseconds\` to be a positive number, got \`${milliseconds}\``);
2450
+ }
2451
+ if (options.signal) {
2452
+ const { signal } = options;
2453
+ if (signal.aborted) {
2454
+ reject(getAbortedReason(signal));
2455
+ }
2456
+ abortHandler = () => {
2457
+ reject(getAbortedReason(signal));
2458
+ };
2459
+ signal.addEventListener("abort", abortHandler, { once: true });
2460
+ }
2461
+ if (milliseconds === Number.POSITIVE_INFINITY) {
2462
+ promise.then(resolve, reject);
2463
+ return;
2464
+ }
2465
+ const timeoutError = new TimeoutError();
2466
+ timer = customTimers.setTimeout.call(void 0, () => {
2467
+ if (fallback) {
2468
+ try {
2469
+ resolve(fallback());
2470
+ } catch (error) {
2471
+ reject(error);
2472
+ }
2473
+ return;
2474
+ }
2475
+ if (typeof promise.cancel === "function") {
2476
+ promise.cancel();
2477
+ }
2478
+ if (message === false) {
2479
+ resolve();
2480
+ } else if (message instanceof Error) {
2481
+ reject(message);
2482
+ } else {
2483
+ timeoutError.message = message ?? `Promise timed out after ${milliseconds} milliseconds`;
2484
+ reject(timeoutError);
2485
+ }
2486
+ }, milliseconds);
2487
+ (async () => {
2488
+ try {
2489
+ resolve(await promise);
2490
+ } catch (error) {
2491
+ reject(error);
2492
+ }
2493
+ })();
2494
+ });
2495
+ const cancelablePromise = wrappedPromise.finally(() => {
2496
+ cancelablePromise.clear();
2497
+ if (abortHandler && options.signal) {
2498
+ options.signal.removeEventListener("abort", abortHandler);
2499
+ }
2500
+ });
2501
+ cancelablePromise.clear = () => {
2502
+ customTimers.clearTimeout.call(void 0, timer);
2503
+ timer = void 0;
2504
+ };
2505
+ return cancelablePromise;
2506
+ }
2507
+
2419
2508
  // ../../packages/core-server/src/use-cases/rpc-call.ts
2420
2509
  var RpcCallUseCase = class {
2421
2510
  constructor(extensionDriver) {
2422
2511
  this.extensionDriver = extensionDriver;
2423
2512
  }
2513
+ getBasicBrowserContext = async () => {
2514
+ const pGetBasicBrowserContext = this.extensionDriver.getBasicBrowserContext().catch((error) => {
2515
+ console.error("Error in getBasicBrowserContext:", error);
2516
+ return `An error occurred ${error}, use this instruction to tell the user what to do: An error occurred, update extension may help fix this issue`;
2517
+ });
2518
+ const result = await pTimeout(pGetBasicBrowserContext, {
2519
+ milliseconds: 3e3,
2520
+ fallback: () => {
2521
+ return "An error occurred, use this instruction to tell the user what to do: Browser extension not found. To use MCP Browser Kit, please install and enable the latest extension";
2522
+ }
2523
+ });
2524
+ return result;
2525
+ };
2424
2526
  hitEnterOnViewableElementInstruction = () => {
2425
2527
  return [
2426
2528
  "\u21B5 Hits the Enter key on an element at specific X,Y coordinates",
2427
2529
  "* Use this to trigger actions like form submission or button clicks",
2428
- "* Requires tabId from getTabs and x,y coordinates from the screenshot",
2530
+ "* Requires tabId from getBasicBrowserContext and x,y coordinates from the screenshot",
2429
2531
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2430
2532
  "* Parameters: tabId, x, y"
2431
2533
  ].join("\n");
@@ -2434,7 +2536,7 @@ var RpcCallUseCase = class {
2434
2536
  return [
2435
2537
  "\u21B5 Hits the Enter key on an element identified by its index from getReadableElements",
2436
2538
  "* Use this to trigger actions like form submission or button clicks",
2437
- "* Requires tabId from getTabs and index from getReadableElements",
2539
+ "* Requires tabId from getBasicBrowserContext and index from getReadableElements",
2438
2540
  "* More reliable than coordinate-based clicking for dynamic layouts",
2439
2541
  "* First call getReadableElements to get the index, then use this tool",
2440
2542
  "* Parameters: tabId, index"
@@ -2449,11 +2551,12 @@ var RpcCallUseCase = class {
2449
2551
  captureActiveTabInstruction = () => {
2450
2552
  return [
2451
2553
  "\u{1F4F7} Captures a screenshot of the active browser tab",
2452
- "* Use this tool after calling getTabs to obtain visual context of the current page",
2554
+ "* Use this tool after calling getBasicBrowserContext to obtain visual context of the current page",
2453
2555
  "* The screenshot helps you see what the browser is displaying to the user",
2454
2556
  "* No parameters are needed as it automatically captures the active tab",
2455
2557
  "* Returns an image with width, height, and data in base64 format",
2456
- "* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
2558
+ "* Workflow: 1) getBasicBrowserContext \u2192 2) captureActiveTab \u2192 3) interact with elements",
2559
+ "* NOTE: This feature is only available in browsers supporting Manifest Version 2"
2457
2560
  ].join("\n");
2458
2561
  };
2459
2562
  captureActiveTab = () => {
@@ -2463,7 +2566,7 @@ var RpcCallUseCase = class {
2463
2566
  return [
2464
2567
  "\u{1F518} Clicks on an element identified by its index from getReadableElements",
2465
2568
  "* Use this to click on elements after identifying them by their text",
2466
- "* Requires tabId from getTabs and index from getReadableElements",
2569
+ "* Requires tabId from getBasicBrowserContext and index from getReadableElements",
2467
2570
  "* More reliable than coordinate-based clicking for dynamic layouts",
2468
2571
  "* First call getReadableElements to get the index, then use this tool",
2469
2572
  "* Parameters: tabId, index"
@@ -2476,7 +2579,7 @@ var RpcCallUseCase = class {
2476
2579
  return [
2477
2580
  "\u{1F446} Clicks on an element at specific X,Y coordinates",
2478
2581
  "* Use this to click on elements by their position on the screen",
2479
- "* Requires tabId from getTabs and x,y coordinates from the screenshot",
2582
+ "* Requires tabId from getBasicBrowserContext and x,y coordinates from the screenshot",
2480
2583
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2481
2584
  "* Useful when you know the visual position of an element",
2482
2585
  "* Parameters: tabId, x, y"
@@ -2489,7 +2592,7 @@ var RpcCallUseCase = class {
2489
2592
  return [
2490
2593
  "\u270F\uFE0F Types text into an input field identified by its index from getReadableElements",
2491
2594
  "* Use this to enter text into form fields identified by their text",
2492
- "* Requires tabId from getTabs, index from getReadableElements, and text to enter",
2595
+ "* Requires tabId from getBasicBrowserContext, index from getReadableElements, and text to enter",
2493
2596
  "* Works with text inputs, textareas, and other editable elements",
2494
2597
  "* First call getReadableElements to get the index, then use this tool",
2495
2598
  "* After filling text, check for associated submit-like buttons (submit, search, send, etc.)",
@@ -2505,7 +2608,7 @@ var RpcCallUseCase = class {
2505
2608
  return [
2506
2609
  "\u2328\uFE0F Types text into an input field at specific X,Y coordinates",
2507
2610
  "* Use this to enter text into form fields by their position",
2508
- "* Requires tabId from getTabs, x,y coordinates, and the text to enter",
2611
+ "* Requires tabId from getBasicBrowserContext, x,y coordinates, and the text to enter",
2509
2612
  "* Coordinates are based on the captureActiveTab screenshot dimensions",
2510
2613
  "* First clicks at the specified position, then types the provided text",
2511
2614
  "* After filling text, check for associated submit-like buttons (submit, search, send, etc.)",
@@ -2521,7 +2624,7 @@ var RpcCallUseCase = class {
2521
2624
  return [
2522
2625
  "\u{1F4DD} Extracts all text content from the current web page",
2523
2626
  "* Retrieves all visible text from the active tab",
2524
- "* Requires the tabId obtained from getTabs",
2627
+ "* Requires the tabId obtained from getBasicBrowserContext",
2525
2628
  "* Use this to analyze the page content without visual elements",
2526
2629
  "* Returns a string containing all the text on the page",
2527
2630
  "* Useful for getting a quick overview of page content"
@@ -2534,7 +2637,7 @@ var RpcCallUseCase = class {
2534
2637
  return [
2535
2638
  "\u{1F50D} Lists all interactive elements on the page with their text",
2536
2639
  "* Returns a list of elements with their index, HTML tag, and text content",
2537
- "* Requires the tabId obtained from getTabs",
2640
+ "* Requires the tabId obtained from getBasicBrowserContext",
2538
2641
  "* Each element is returned as [index, tag, text]",
2539
2642
  "* Use the index to interact with elements through click or fill operations",
2540
2643
  "* Helps you identify which elements can be interacted with by their text"
@@ -2543,28 +2646,38 @@ var RpcCallUseCase = class {
2543
2646
  getReadableElements = (tabId) => {
2544
2647
  return this.extensionDriver.getReadableElements(tabId);
2545
2648
  };
2546
- getTabsInstruction = () => {
2649
+ getBasicBrowserContextInstruction = () => {
2547
2650
  return [
2548
- "\u26A0\uFE0F CRITICAL FIRST STEP - ALWAYS START HERE BEFORE ANY OTHER TOOLS!",
2549
- "* This tool MUST be called first to obtain the list of open browser tabs.",
2550
- "* Each tab includes a unique ID that is required for all subsequent tool operations.",
2551
- "* Note which tab is active (marked with 'active: true') as this is essential information.",
2552
- "* The tabId from this list is required for captureActiveTab and all other interactions.",
2553
- "* Workflow: 1) getTabs \u2192 2) captureActiveTab \u2192 3) interact with elements"
2651
+ "\u{1F310} GET BROWSER CONTEXT - CRITICAL FIRST STEP BEFORE USING ANY OTHER TOOLS!",
2652
+ "* This tool MUST be called first to initialize browser automation and get essential data.",
2653
+ "* Returns data structure with:",
2654
+ " - tabs: Array of browser tabs with properties like id, url, title, and active status",
2655
+ " - manifestVersion: Version of extension manifest format supported by the browser",
2656
+ "* Each tab includes a unique tabId required for all other tool operations",
2657
+ "* The active tab (marked with 'active: true') is typically your target for automation",
2658
+ "* The manifestVersion determines which browser features and extension capabilities are available",
2659
+ "* Different browsers support different manifest versions, affecting available tools and API access",
2660
+ "* Standard workflow:",
2661
+ " 1) getBasicBrowserContext \u2192 get browser state and tabId",
2662
+ " 2) Analyze page content based on your goal and manifest version:",
2663
+ " - If interaction is required (clicking, filling forms, etc.):",
2664
+ " \xB7 For Manifest Version 2: Use captureActiveTab for visual context or getReadableElements for element identification",
2665
+ " \xB7 For other Manifest Versions: Use only getReadableElements for element identification",
2666
+ " - If no interaction is required (just reading page content):",
2667
+ " \xB7 Use getInnerText to extract all visible text from the page",
2668
+ " 3) Interact using click/fill/enter tools with the obtained tabId"
2554
2669
  ].join("\n");
2555
2670
  };
2556
- getTabs = () => {
2557
- return this.extensionDriver.getTabs();
2558
- };
2559
2671
  invokeJsFnInstruction = () => {
2560
2672
  return [
2561
2673
  "\u2699\uFE0F Executes custom JavaScript code in the context of the web page",
2562
2674
  "* Use this for advanced operations not covered by other tools",
2563
- "* Requires tabId from getTabs and JavaScript code to execute",
2675
+ "* Requires tabId from getBasicBrowserContext and JavaScript code to execute",
2564
2676
  "* The code should be the body of a function that returns a value",
2565
2677
  "* Example: 'return document.title;' to get the page title",
2566
2678
  "* Gives you full flexibility for custom browser automation",
2567
- "* Parameters: tabId, fnBodyCode (JavaScript code as string)"
2679
+ "* Parameters: tabId, fnBodyCode (JavaScript code as string)",
2680
+ "* NOTE: This feature is only available in browsers supporting Manifest Version 2"
2568
2681
  ].join("\n");
2569
2682
  };
2570
2683
  invokeJsFn = (tabId, fnBodyCode) => {
@@ -2581,7 +2694,7 @@ var createCoreServerContainer = () => {
2581
2694
  const container2 = new q({
2582
2695
  defaultScope: "Singleton"
2583
2696
  });
2584
- container2.bind(ToolsInputPort).to(RpcCallUseCase);
2697
+ container2.bind(ToolCallsInputPort).to(RpcCallUseCase);
2585
2698
  return container2;
2586
2699
  };
2587
2700
 
@@ -3047,7 +3160,11 @@ var RpcClient = class {
3047
3160
  this.id += 1;
3048
3161
  return String(this.id);
3049
3162
  };
3050
- defer = (method, ...args) => {
3163
+ defer = ({
3164
+ method,
3165
+ args,
3166
+ ...extraArgs
3167
+ }) => {
3051
3168
  const id = this.createId();
3052
3169
  const defer2 = Promise.withResolvers();
3053
3170
  defer2.promise.finally(() => {
@@ -3055,6 +3172,7 @@ var RpcClient = class {
3055
3172
  });
3056
3173
  this.pending.set(id, defer2);
3057
3174
  this.emitter.emit("defer", {
3175
+ ...extraArgs,
3058
3176
  procedure: String(method),
3059
3177
  args,
3060
3178
  id
@@ -3062,7 +3180,17 @@ var RpcClient = class {
3062
3180
  return defer2.promise;
3063
3181
  };
3064
3182
  onDefer = (callback) => {
3065
- return this.emitter.on("defer", callback);
3183
+ return this.emitter.on(
3184
+ "defer",
3185
+ callback
3186
+ );
3187
+ };
3188
+ startListen = (messageChannel) => {
3189
+ const unsubscribe = messageChannel.subscribe((message) => {
3190
+ const msg = message;
3191
+ this.emitter.emit("resolve", msg);
3192
+ });
3193
+ return unsubscribe;
3066
3194
  };
3067
3195
  };
3068
3196
 
@@ -3075,61 +3203,70 @@ var createExtensionRpcClient = () => {
3075
3203
  var DrivenExtensionDriver = class {
3076
3204
  extensionRpcClient = createExtensionRpcClient();
3077
3205
  hitEnterOnViewableElement = (tabId, x3, y3) => {
3078
- return this.extensionRpcClient.defer(
3079
- "hitEnterOnViewableElement",
3080
- tabId,
3081
- x3,
3082
- y3
3083
- );
3206
+ return this.extensionRpcClient.defer({
3207
+ method: "hitEnterOnViewableElement",
3208
+ args: [tabId, x3, y3]
3209
+ });
3084
3210
  };
3085
3211
  hitEnterOnReadableElement = (tabId, index) => {
3086
- return this.extensionRpcClient.defer(
3087
- "hitEnterOnReadableElement",
3088
- tabId,
3089
- index
3090
- );
3212
+ return this.extensionRpcClient.defer({
3213
+ method: "hitEnterOnReadableElement",
3214
+ args: [tabId, index]
3215
+ });
3091
3216
  };
3092
3217
  captureActiveTab = () => {
3093
- return this.extensionRpcClient.defer("captureActiveTab");
3218
+ return this.extensionRpcClient.defer({
3219
+ method: "captureActiveTab",
3220
+ args: []
3221
+ });
3094
3222
  };
3095
3223
  getInnerText = (tabId) => {
3096
- return this.extensionRpcClient.defer("getInnerText", tabId);
3224
+ return this.extensionRpcClient.defer({
3225
+ method: "getInnerText",
3226
+ args: [tabId]
3227
+ });
3097
3228
  };
3098
3229
  getReadableElements = (tabId) => {
3099
- return this.extensionRpcClient.defer("getReadableElements", tabId);
3230
+ return this.extensionRpcClient.defer({
3231
+ method: "getReadableElements",
3232
+ args: [tabId]
3233
+ });
3100
3234
  };
3101
3235
  clickOnViewableElement = (tabId, x3, y3) => {
3102
- return this.extensionRpcClient.defer("clickOnViewableElement", tabId, x3, y3);
3236
+ return this.extensionRpcClient.defer({
3237
+ method: "clickOnViewableElement",
3238
+ args: [tabId, x3, y3]
3239
+ });
3103
3240
  };
3104
3241
  fillTextToViewableElement = (tabId, x3, y3, value) => {
3105
- return this.extensionRpcClient.defer(
3106
- "fillTextToViewableElement",
3107
- tabId,
3108
- x3,
3109
- y3,
3110
- value
3111
- );
3242
+ return this.extensionRpcClient.defer({
3243
+ method: "fillTextToViewableElement",
3244
+ args: [tabId, x3, y3, value]
3245
+ });
3112
3246
  };
3113
3247
  clickOnReadableElement = (tabId, index) => {
3114
- return this.extensionRpcClient.defer(
3115
- "clickOnReadableElement",
3116
- tabId,
3117
- index
3118
- );
3248
+ return this.extensionRpcClient.defer({
3249
+ method: "clickOnReadableElement",
3250
+ args: [tabId, index]
3251
+ });
3119
3252
  };
3120
3253
  fillTextToReadableElement = (tabId, index, value) => {
3121
- return this.extensionRpcClient.defer(
3122
- "fillTextToReadableElement",
3123
- tabId,
3124
- index,
3125
- value
3126
- );
3254
+ return this.extensionRpcClient.defer({
3255
+ method: "fillTextToReadableElement",
3256
+ args: [tabId, index, value]
3257
+ });
3127
3258
  };
3128
3259
  invokeJsFn = (tabId, fnBodyCode) => {
3129
- return this.extensionRpcClient.defer("invokeJsFn", tabId, fnBodyCode);
3260
+ return this.extensionRpcClient.defer({
3261
+ method: "invokeJsFn",
3262
+ args: [tabId, fnBodyCode]
3263
+ });
3130
3264
  };
3131
- getTabs = () => {
3132
- return this.extensionRpcClient.defer("getTabs");
3265
+ getBasicBrowserContext = () => {
3266
+ return this.extensionRpcClient.defer({
3267
+ method: "getBasicBrowserContext",
3268
+ args: []
3269
+ });
3133
3270
  };
3134
3271
  };
3135
3272
  DrivenExtensionDriver = __decorateClass([
@@ -10135,7 +10272,7 @@ var StdioServerTransport = class {
10135
10272
 
10136
10273
  // src/helpers/mcp-server.ts
10137
10274
  var createServer = async () => {
10138
- const toolsInputPort = container.get(ToolsInputPort);
10275
+ const toolsInputPort = container.get(ToolCallsInputPort);
10139
10276
  const server = new McpServer({
10140
10277
  name: "MCP Browser Kit",
10141
10278
  version: "1.0.0",
@@ -10145,17 +10282,22 @@ var createServer = async () => {
10145
10282
  }
10146
10283
  });
10147
10284
  const combinationDescription = [""].join("\n");
10148
- server.tool("getTabs", toolsInputPort.getTabsInstruction(), {}, async () => {
10149
- const tabs = await toolsInputPort.getTabs();
10150
- return {
10151
- content: [
10152
- {
10153
- type: "text",
10154
- text: `Tabs: ${JSON.stringify(tabs)}`
10155
- }
10156
- ]
10157
- };
10158
- });
10285
+ server.tool(
10286
+ "getBasicBrowserContext",
10287
+ toolsInputPort.getBasicBrowserContextInstruction(),
10288
+ {},
10289
+ async () => {
10290
+ const tabs = await toolsInputPort.getBasicBrowserContext();
10291
+ return {
10292
+ content: [
10293
+ {
10294
+ type: "text",
10295
+ text: `${JSON.stringify(tabs)}`
10296
+ }
10297
+ ]
10298
+ };
10299
+ }
10300
+ );
10159
10301
  server.tool(
10160
10302
  "captureActiveTab",
10161
10303
  [combinationDescription, toolsInputPort.captureActiveTabInstruction()].join(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mcp-browser-kit/server",
3
- "version": "4.0.0",
3
+ "version": "5.0.1",
4
4
  "packageManager": "yarn@4.8.0",
5
5
  "homepage": "https://github.com/ndthanhdev/mcp-browser-kit",
6
6
  "bin": {