ornold-mcp 1.2.2 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +97 -74
  2. package/package.json +1 -2
package/dist/cli.js CHANGED
@@ -2056,6 +2056,7 @@ function getOptionalString(argNames, envNames) {
2056
2056
  }
2057
2057
  var TOKEN = getArg("token") || process.env.ORNOLD_TOKEN || "";
2058
2058
  var SERVER_URL = getArg("server") || process.env.ORNOLD_SERVER || "wss://mcp.ornold.com/bridge";
2059
+ var BROWSER_MODE = getArg("mode") || process.env.ORNOLD_MODE || "dom";
2059
2060
  var LINKEN_PORT = getOptionalPort("linken-port", "LINKEN_PORT");
2060
2061
  var WADEX_PORT = getOptionalPort("wadex-port", "WADEX_PORT");
2061
2062
  var DOLPHIN_PORT = getOptionalPort("dolphin-port", "DOLPHIN_PORT");
@@ -2432,8 +2433,19 @@ ${result.snapshot || ""}`;
2432
2433
  }
2433
2434
  case "cdp_click_normalized_box": {
2434
2435
  const exec = await getExecutor();
2435
- const r = await exec.parallelClickNormalizedBox(params.box, params.browserIds);
2436
- return { content: formatResult(r), isError: false };
2436
+ const box = params.box;
2437
+ if (!box || box.length !== 4) return { content: "Invalid box coordinates", isError: true };
2438
+ const browsers = params.browserIds ? void 0 : await exec.listBrowsers();
2439
+ const targetId = params.browserIds?.[0] || browsers?.[0]?.id;
2440
+ if (!targetId) return { content: "No browser connected", isError: true };
2441
+ const page = exec.getPage(targetId);
2442
+ if (!page) return { content: "Browser page not found", isError: true };
2443
+ const vpRaw = await page.evaluate("JSON.stringify({ w: window.innerWidth, h: window.innerHeight })");
2444
+ const vp = JSON.parse(vpRaw);
2445
+ const centerX = (box[0] + box[2]) / 2 * vp.w;
2446
+ const centerY = (box[1] + box[3]) / 2 * vp.h;
2447
+ await page.mouse.click(centerX, centerY);
2448
+ return { content: JSON.stringify({ clicked: true, x: Math.round(centerX), y: Math.round(centerY), viewport: vp }), isError: false };
2437
2449
  }
2438
2450
  case "cdp_setup_downloads": {
2439
2451
  const exec = await getExecutor();
@@ -2865,6 +2877,16 @@ var browserTargetArgs = {
2865
2877
  _scope: browserScopeArg
2866
2878
  };
2867
2879
  function createServer() {
2880
+ const modeInstructions = BROWSER_MODE === "vision" ? `## Interaction mode: VISION
2881
+ - Use browser_parallel_vision_analyze_grouped to analyze page content via AI vision.
2882
+ - Use browser_parallel_click_normalized_box to click elements by coordinates.
2883
+ - Use browser_parallel_screenshot to see the page.
2884
+ - Do NOT use browser_parallel_snapshot (DOM mode is disabled).` : BROWSER_MODE === "both" ? `## Interaction modes: DOM + VISION
2885
+ - Prefer DOM mode (browser_parallel_snapshot \u2192 ref-based clicks) \u2014 it's faster and free.
2886
+ - Use Vision mode (browser_parallel_vision_analyze_grouped) only when DOM doesn't work (canvas, complex iframes, dynamic content).` : `## Interaction mode: DOM
2887
+ - Use browser_parallel_snapshot to get page content with [ref=N] markers.
2888
+ - Use ref parameter for clicking/filling elements from snapshot.
2889
+ - Always call snapshot BEFORE clicking/filling \u2014 refs go stale after navigation.`;
2868
2890
  const server = new McpServer({
2869
2891
  name: "ornold-browser",
2870
2892
  version: CLIENT_VERSION,
@@ -2876,9 +2898,9 @@ ${LINKEN_PORT !== void 0 ? "- Linken Sphere (linken_* tools)" : ""}${WADEX_PORT
2876
2898
  ## Core workflow
2877
2899
  1. Start a browser profile: linken_start_instances / dolphin_start_profile
2878
2900
  2. Browser auto-connects via CDP. Use browser_list to see connected browsers.
2879
- 3. Use browser_parallel_* tools to interact (navigate, click, fill, snapshot, etc.)
2880
- 4. Always call browser_parallel_snapshot BEFORE clicking/filling \u2014 it returns [ref=N] markers.
2881
- 5. Use ref parameter (not selector) for clicking/filling elements from snapshot.
2901
+ 3. Use browser_parallel_* tools to interact with pages.
2902
+
2903
+ ${modeInstructions}
2882
2904
 
2883
2905
  ## Anti-detection rules
2884
2906
  - Navigate via Google search, not direct URLs (except same-domain links).
@@ -2886,12 +2908,9 @@ ${LINKEN_PORT !== void 0 ? "- Linken Sphere (linken_* tools)" : ""}${WADEX_PORT
2886
2908
  - Use browser_parallel_type (not fill) for short human-like inputs.
2887
2909
  - Never modify fingerprints via JavaScript or navigate to chrome:// URLs.
2888
2910
 
2889
- ## Flow execution (AI-powered automation)
2890
- Flows delegate multi-step tasks to a cheaper model on the server.
2891
2911
  ## Captcha solving
2892
2912
  - browser_detect_captcha \u2192 browser_solve_captcha (reCAPTCHA/hCaptcha via 2captcha)
2893
- - browser_detect_press_hold \u2192 browser_solve_press_hold (PerimeterX)
2894
- - browser_captcha_balance to check remaining balance`
2913
+ - browser_detect_press_hold \u2192 browser_solve_press_hold (PerimeterX)`
2895
2914
  });
2896
2915
  server.tool("browser_list", "List connected browsers", {}, () => toolHandler("browser_list", {}));
2897
2916
  server.tool("browser_status", "Check browser sync and responsiveness", {
@@ -2905,11 +2924,29 @@ Flows delegate multi-step tasks to a cheaper model on the server.
2905
2924
  browserIds: browserIdsArg,
2906
2925
  _scope: browserScopeArg
2907
2926
  }, (args2) => toolHandler("browser_parallel_tabs", args2));
2908
- server.tool("browser_parallel_snapshot", "Get page snapshot with [ref=N] markers", {
2909
- compact: z.boolean().optional(),
2910
- browserIds: browserIdsArg,
2911
- _scope: browserScopeArg
2912
- }, (args2) => toolHandler("browser_parallel_snapshot", args2));
2927
+ if (BROWSER_MODE === "dom" || BROWSER_MODE === "both") {
2928
+ server.tool("browser_parallel_snapshot", "Get page snapshot with [ref=N] markers", {
2929
+ compact: z.boolean().optional(),
2930
+ browserIds: browserIdsArg,
2931
+ _scope: browserScopeArg
2932
+ }, (args2) => toolHandler("browser_parallel_snapshot", args2));
2933
+ }
2934
+ if (BROWSER_MODE === "vision" || BROWSER_MODE === "both") {
2935
+ server.tool("browser_parallel_screenshot", "Take a screenshot of the page", {
2936
+ browserIds: browserIdsArg,
2937
+ _scope: browserScopeArg
2938
+ }, (args2) => toolHandler("browser_parallel_screenshot", args2));
2939
+ server.tool("browser_parallel_vision_analyze_grouped", "Analyze grouped browser screenshots with OmniParser via Ornold server", {
2940
+ similarityThreshold: z.number().optional(),
2941
+ browserIds: browserIdsArg,
2942
+ _scope: browserScopeArg
2943
+ }, (args2) => toolHandler("browser_parallel_vision_analyze_grouped", args2));
2944
+ server.tool("browser_parallel_click_normalized_box", "Click normalized viewport box center", {
2945
+ box: z.tuple([z.number(), z.number(), z.number(), z.number()]),
2946
+ browserIds: browserIdsArg,
2947
+ _scope: browserScopeArg
2948
+ }, (args2) => toolHandler("browser_parallel_click_normalized_box", args2));
2949
+ }
2913
2950
  server.tool("browser_parallel_navigate", "Navigate to URL", {
2914
2951
  url: z.string(),
2915
2952
  browserIds: browserIdsArg,
@@ -2922,52 +2959,57 @@ Flows delegate multi-step tasks to a cheaper model on the server.
2922
2959
  })),
2923
2960
  _scope: browserScopeArg
2924
2961
  }, (args2) => toolHandler("browser_parallel_navigate_multi", args2));
2925
- server.tool("browser_parallel_click", "Click element", {
2926
- ...browserTargetArgs
2927
- }, (args2) => toolHandler("browser_parallel_click", args2));
2928
- server.tool("browser_parallel_type", "Type text into element", {
2929
- ...browserTargetArgs,
2930
- text: z.string()
2931
- }, (args2) => toolHandler("browser_parallel_type", args2));
2932
- server.tool("browser_parallel_fill", "Fill input (clear + type)", {
2933
- ...browserTargetArgs,
2934
- text: z.string().optional(),
2935
- texts: z.record(z.string()).optional()
2936
- }, (args2) => toolHandler("browser_parallel_fill", args2));
2937
- server.tool("browser_parallel_fill_multi", "Fill input with per-browser values", {
2938
- ...browserTargetArgs,
2939
- texts: z.record(z.string())
2940
- }, (args2) => toolHandler("browser_parallel_fill_multi", args2));
2941
- server.tool("browser_parallel_fill_form", "Fill multiple form fields sequentially", {
2942
- fields: z.array(z.object({
2943
- element: z.string().optional(),
2944
- ref: browserRefArg,
2945
- selector: z.string().optional(),
2946
- value: z.string(),
2947
- type: z.enum(["textbox", "checkbox", "radio", "combobox"]).optional()
2948
- })),
2949
- browserIds: browserIdsArg,
2950
- _scope: browserScopeArg
2951
- }, (args2) => toolHandler("browser_parallel_fill_form", args2));
2952
- server.tool("browser_parallel_drag", "Drag from one element to another", {
2953
- startElement: z.string().optional(),
2954
- startRef: browserRefArg,
2955
- startSelector: z.string().optional(),
2956
- endElement: z.string().optional(),
2957
- endRef: browserRefArg,
2958
- endSelector: z.string().optional(),
2959
- browserIds: browserIdsArg,
2960
- _scope: browserScopeArg
2961
- }, (args2) => toolHandler("browser_parallel_drag", args2));
2962
+ if (BROWSER_MODE === "dom" || BROWSER_MODE === "both") {
2963
+ server.tool("browser_parallel_click", "Click element", {
2964
+ ...browserTargetArgs
2965
+ }, (args2) => toolHandler("browser_parallel_click", args2));
2966
+ server.tool("browser_parallel_type", "Type text into element", {
2967
+ ...browserTargetArgs,
2968
+ text: z.string()
2969
+ }, (args2) => toolHandler("browser_parallel_type", args2));
2970
+ server.tool("browser_parallel_fill", "Fill input (clear + type)", {
2971
+ ...browserTargetArgs,
2972
+ text: z.string().optional(),
2973
+ texts: z.record(z.string()).optional()
2974
+ }, (args2) => toolHandler("browser_parallel_fill", args2));
2975
+ server.tool("browser_parallel_fill_multi", "Fill input with per-browser values", {
2976
+ ...browserTargetArgs,
2977
+ texts: z.record(z.string())
2978
+ }, (args2) => toolHandler("browser_parallel_fill_multi", args2));
2979
+ server.tool("browser_parallel_fill_form", "Fill multiple form fields sequentially", {
2980
+ fields: z.array(z.object({
2981
+ element: z.string().optional(),
2982
+ ref: browserRefArg,
2983
+ selector: z.string().optional(),
2984
+ value: z.string(),
2985
+ type: z.enum(["textbox", "checkbox", "radio", "combobox"]).optional()
2986
+ })),
2987
+ browserIds: browserIdsArg,
2988
+ _scope: browserScopeArg
2989
+ }, (args2) => toolHandler("browser_parallel_fill_form", args2));
2990
+ server.tool("browser_parallel_drag", "Drag from one element to another", {
2991
+ startElement: z.string().optional(),
2992
+ startRef: browserRefArg,
2993
+ startSelector: z.string().optional(),
2994
+ endElement: z.string().optional(),
2995
+ endRef: browserRefArg,
2996
+ endSelector: z.string().optional(),
2997
+ browserIds: browserIdsArg,
2998
+ _scope: browserScopeArg
2999
+ }, (args2) => toolHandler("browser_parallel_drag", args2));
3000
+ server.tool("browser_parallel_select_option", "Select dropdown option", {
3001
+ ...browserTargetArgs,
3002
+ values: z.array(z.string())
3003
+ }, (args2) => toolHandler("browser_parallel_select_option", args2));
3004
+ server.tool("browser_parallel_hover", "Hover over element", {
3005
+ ...browserTargetArgs
3006
+ }, (args2) => toolHandler("browser_parallel_hover", args2));
3007
+ }
2962
3008
  server.tool("browser_parallel_press_key", "Press keyboard key", {
2963
3009
  key: z.string(),
2964
3010
  browserIds: browserIdsArg,
2965
3011
  _scope: browserScopeArg
2966
3012
  }, (args2) => toolHandler("browser_parallel_press_key", args2));
2967
- server.tool("browser_parallel_select_option", "Select dropdown option", {
2968
- ...browserTargetArgs,
2969
- values: z.array(z.string())
2970
- }, (args2) => toolHandler("browser_parallel_select_option", args2));
2971
3013
  server.tool("browser_parallel_wait_for", "Wait for condition", {
2972
3014
  time: z.number().optional(),
2973
3015
  text: z.string().optional(),
@@ -2977,11 +3019,6 @@ Flows delegate multi-step tasks to a cheaper model on the server.
2977
3019
  browserIds: browserIdsArg,
2978
3020
  _scope: browserScopeArg
2979
3021
  }, (args2) => toolHandler("browser_parallel_wait_for", args2));
2980
- server.tool("browser_parallel_screenshot", "Take screenshot", {
2981
- fullPage: z.boolean().optional(),
2982
- browserIds: browserIdsArg,
2983
- _scope: browserScopeArg
2984
- }, (args2) => toolHandler("browser_parallel_screenshot", args2));
2985
3022
  server.tool("browser_parallel_evaluate", "Run JavaScript in page", {
2986
3023
  script: z.string(),
2987
3024
  browserIds: browserIdsArg,
@@ -2997,9 +3034,6 @@ Flows delegate multi-step tasks to a cheaper model on the server.
2997
3034
  variables: z.record(z.record(z.string())),
2998
3035
  _scope: browserScopeArg
2999
3036
  }, (args2) => toolHandler("browser_parallel_run_code_with_vars", args2));
3000
- server.tool("browser_parallel_hover", "Hover over element", {
3001
- ...browserTargetArgs
3002
- }, (args2) => toolHandler("browser_parallel_hover", args2));
3003
3037
  server.tool("browser_parallel_go_back", "Go back", {
3004
3038
  browserIds: browserIdsArg,
3005
3039
  _scope: browserScopeArg
@@ -3027,16 +3061,6 @@ Flows delegate multi-step tasks to a cheaper model on the server.
3027
3061
  browserIds: browserIdsArg,
3028
3062
  _scope: browserScopeArg
3029
3063
  }, (args2) => toolHandler("browser_parallel_network_requests", args2));
3030
- server.tool("browser_parallel_vision_analyze_grouped", "Analyze grouped browser screenshots with OmniParser via Ornold server", {
3031
- similarityThreshold: z.number().optional(),
3032
- browserIds: browserIdsArg,
3033
- _scope: browserScopeArg
3034
- }, (args2) => toolHandler("browser_parallel_vision_analyze_grouped", args2));
3035
- server.tool("browser_parallel_click_normalized_box", "Click normalized viewport box center", {
3036
- box: z.tuple([z.number(), z.number(), z.number(), z.number()]),
3037
- browserIds: browserIdsArg,
3038
- _scope: browserScopeArg
3039
- }, (args2) => toolHandler("browser_parallel_click_normalized_box", args2));
3040
3064
  server.tool("browser_setup_downloads", "Enable browser downloads to project files directory", {
3041
3065
  browserIds: browserIdsArg,
3042
3066
  _scope: browserScopeArg
@@ -3083,7 +3107,6 @@ Flows delegate multi-step tasks to a cheaper model on the server.
3083
3107
  autoSubmit: z.boolean().optional(),
3084
3108
  _scope: browserScopeArg
3085
3109
  }, (args2) => toolHandler("browser_solve_captcha", args2));
3086
- server.tool("browser_captcha_balance", "Check 2captcha balance", {}, () => toolHandler("browser_captcha_balance", {}));
3087
3110
  server.tool("captcha_detect", "Detect captcha on page", {
3088
3111
  browserIds: browserIdsArg
3089
3112
  }, (args2) => toolHandler("captcha_detect", args2));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ornold-mcp",
3
- "version": "1.2.2",
3
+ "version": "1.3.2",
4
4
  "type": "module",
5
5
  "main": "./dist/cli.js",
6
6
  "files": [
@@ -19,7 +19,6 @@
19
19
  "client": "tsx client/index.ts"
20
20
  },
21
21
  "dependencies": {
22
- "@anthropic-ai/sdk": "^0.88.0",
23
22
  "@modelcontextprotocol/sdk": "^1.12.0",
24
23
  "@supabase/supabase-js": "^2.103.0",
25
24
  "express": "^4.21.2",