@github/computer-use-mcp 0.1.9 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -3,8 +3,8 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
3
  /**
4
4
  * Creates a computer-use MCP server.
5
5
  *
6
- * The server exposes a single "computer" tool with mouse, keyboard, and
7
- * screenshot actions mirroring the Anthropic computer-use API.
6
+ * The server exposes individual tools for mouse, keyboard, and screenshot
7
+ * actions (screenshot, left_click, type, key, scroll, etc.).
8
8
  *
9
9
  * Uses prebuilt native addons (macOS / Windows / Linux). Throws when the
10
10
  * current platform or desktop session is unsupported.
package/dist/index.js CHANGED
@@ -1,20 +1,26 @@
1
- import{McpServer as N}from"@modelcontextprotocol/sdk/server/mcp.js";import{createRequire as k}from"module";import{dirname as w,join as P}from"path";import{fileURLToPath as $}from"url";var C=k(import.meta.url),R=w($(import.meta.url));function q(){let c=P(R,"prebuilds",`${process.platform}-${process.arch}`,"computer.node");try{return C(c)}catch{return null}}var b=class c{constructor(e){this.native=e}native;static create(){let e=q();if(!e)throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}`);return e.display(),new c(e)}async click(e,o,s,t){this.native.click(e,o,s,t)}async move(e,o){this.native.move(e,o)}async drag(e,o,s,t){this.native.drag(e,o,s,t)}async mouseDown(e,o){this.native.mouseDown(e,o)}async mouseUp(e,o){this.native.mouseUp(e,o)}async type(e){this.native.type(e)}async key(e){this.native.key(e)}async scroll(e,o,s,t){this.native.scroll(e,o,s,t)}async cursorPosition(){return this.native.cursorPosition()}async display(){let{width:e,height:o}=this.native.display();return{width:e,height:o}}async screenshot(){return this.native.screenshot()}};import{z as a}from"zod";function i(c,e){let o=[{type:"text",text:c}];return e?{content:o,isError:!0}:{content:o}}function T(c,e){return{content:[{type:"text",text:c},{type:"image",data:e,mimeType:"image/png"}]}}var S=a.enum(["screenshot","left_click","right_click","middle_click","double_click","triple_click","left_click_drag","mouse_move","type","key","scroll","left_mouse_down","left_mouse_up","hold_key","wait","cursor_position"]),x=a.object({x:a.number(),y:a.number()});function h(c,e){c.tool("computer",`Use a mouse and keyboard to interact with a computer, and take screenshots.
2
-
3
- Take a screenshot before clicking to determine coordinates. After each action, take a screenshot to verify the result.`,{action:S.describe(`The action to perform:
4
- - screenshot: Capture the current screen. No other parameters.
5
- - left_click: Click left button. Optional 'coordinate' (clicks at current position if omitted).
6
- - right_click: Click right button. Optional 'coordinate'.
7
- - middle_click: Click middle button. Optional 'coordinate'.
8
- - double_click: Double-click left button. Optional 'coordinate'.
9
- - triple_click: Triple-click left button. Optional 'coordinate'.
10
- - left_click_drag: Drag from 'start_coordinate' to 'coordinate'. Both required.
11
- - mouse_move: Move cursor. Requires 'coordinate'.
12
- - type: Type text. Requires 'text'. Rejects 'coordinate'.
13
- - key: Press key combo. Requires 'text' (e.g. 'Return', 'ctrl+s', 'alt+Tab'). Rejects 'coordinate'.
14
- - scroll: Scroll. Requires 'scroll_direction'. Optional 'coordinate', 'scroll_amount' (default 3).
15
- - left_mouse_down: Press left button at current position. No 'coordinate'.
16
- - left_mouse_up: Release left button at current position. No 'coordinate'.
17
- - hold_key: Hold a key. Requires 'text' (key name) and 'duration' (seconds, max 100).
18
- - wait: Pause. Requires 'duration' (seconds, max 100).
19
- - cursor_position: Get current cursor {x, y}. No other parameters.`),coordinate:x.optional().describe("{x, y} pixel coordinate. Required for mouse_move and left_click_drag (end position). Optional for clicks and scroll."),start_coordinate:x.optional().describe("{x, y} drag start position. Required for left_click_drag only."),text:a.string().optional().describe("For 'type': text to type. For 'key': key combo (e.g. 'ctrl+s'). For 'hold_key': key name."),scroll_direction:a.enum(["up","down","left","right"]).optional().describe("Required for 'scroll' action."),scroll_amount:a.number().int().nonnegative().optional().describe("Scroll clicks (default 3). Only for 'scroll'."),duration:a.number().nonnegative().max(100).optional().describe("Seconds. Required for 'hold_key' and 'wait'.")},async o=>{let{action:s,coordinate:t,start_coordinate:d,text:u,scroll_direction:m,scroll_amount:v,duration:p}=o,n=await e();switch(s){case"screenshot":{let r=await n.screenshot();if(!r)return i("Screenshot failed",!0);let l=await n.display();return T(`Display: ${l.width}x${l.height}`,r.toString("base64"))}case"cursor_position":{let r=await n.cursorPosition();return i(`${r.x},${r.y}`)}case"left_click":case"right_click":case"middle_click":case"double_click":case"triple_click":{let r=s.includes("right")?"right":s.includes("middle")?"middle":"left",l=s.includes("double")?2:s.includes("triple")?3:1;t&&await n.move(t.x,t.y);let y=t??await n.cursorPosition();return await n.click(y.x,y.y,r,l),i(`${s}${t?` at (${t.x},${t.y})`:""}`)}case"left_click_drag":return!d||!t?i("left_click_drag requires start_coordinate and coordinate",!0):(await n.drag(d.x,d.y,t.x,t.y),i(`Dragged (${d.x},${d.y}) -> (${t.x},${t.y})`));case"mouse_move":return t?(await n.move(t.x,t.y),i(`Moved to (${t.x},${t.y})`)):i("mouse_move requires coordinate",!0);case"type":return u?(await n.type(u),i(`Typed ${u.length} chars`)):i("type requires text",!0);case"key":return u?(await n.key(u),i(`Pressed ${u}`)):i("key requires text",!0);case"scroll":{if(!m)return i("scroll requires scroll_direction",!0);let r=v??3,l=await n.cursorPosition(),y=t?.x??l.x,f=t?.y??l.y,g=m==="left"?-r:m==="right"?r:0,_=m==="down"?r:m==="up"?-r:0;return await n.scroll(y,f,g,_),i(`Scrolled ${m} ${r} at (${y},${f})`)}case"left_mouse_down":{let r=await n.cursorPosition();return await n.mouseDown(r.x,r.y),i(`Mouse down at (${r.x},${r.y})`)}case"left_mouse_up":{let r=await n.cursorPosition();return await n.mouseUp(r.x,r.y),i(`Mouse up at (${r.x},${r.y})`)}case"hold_key":return!u||p==null?i("hold_key requires text and duration",!0):(await n.key(u),i(`Held ${u} for ${p}s`));case"wait":return p==null?i("wait requires duration",!0):(await new Promise(r=>setTimeout(r,p*1e3)),i(`Waited ${p}s`));default:return i(`Unknown action: ${String(s)}`,!0)}})}function E(){let c=new N({name:"computer-use",version:"1.0.0"}),e;return h(c,async()=>(e||(e=b.create()),e)),c}export{E as createServer};
1
+ import{McpServer as A}from"@modelcontextprotocol/sdk/server/mcp.js";import{existsSync as P,mkdirSync as k,readFileSync as D,writeFileSync as y}from"fs";import{homedir as T}from"os";import{join as g}from"path";var v="<!-- LOG -->",h=class{constructor(e,t=g(T(),".cache","github","computer-use-mcp","logs")){this.inner=e;this.logDir=t,this.path=g(this.logDir,`${this.timestamp()}.html`),P(this.logDir)||k(this.logDir,{recursive:!0}),y(this.path,`<!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <style>
6
+ body { font-family: system-ui, sans-serif; margin: 20px; background: #fafafa; }
7
+ table { border-collapse: collapse; width: 100%; }
8
+ th, td { border: 1px solid #ddd; padding: 6px 10px; text-align: left; vertical-align: top; }
9
+ th { background: #f0f0f0; }
10
+ img { max-width: 800px; cursor: pointer; }
11
+ .time { color: #888; white-space: nowrap; }
12
+ .action { font-weight: 600; }
13
+ .error { color: #c00; }
14
+ </style>
15
+ </head>
16
+ <body>
17
+ <table>
18
+ <tr><th>Time</th><th>Action</th><th>Arguments</th></tr>
19
+ ${v}
20
+ </table>
21
+ </body>
22
+ </html>
23
+ `)}inner;logDir;path;startTime=performance.now();timestamp(){let e=new Date,t=(o,i=2)=>String(o).padStart(i,"0");return`${e.getFullYear()}${t(e.getMonth()+1)}${t(e.getDate())}-${t(e.getHours())}${t(e.getMinutes())}${t(e.getSeconds())}.${t(Math.floor(e.getMilliseconds()/10))}`}elapsed(){return`${((performance.now()-this.startTime)/1e3).toFixed(2)}s`}insert(e){let t=D(this.path,"utf-8");y(this.path,t.replace(v,e+v))}row(e,t){this.insert(`<tr><td class="time">${this.elapsed()}</td><td class="action">${e}</td><td>${t}</td></tr>
24
+ `)}async click(e,t,o,i){await this.inner.click(e,t,o,i),this.row("click",`(${e}, ${t}) button=${o} count=${i}`)}async move(e,t){await this.inner.move(e,t),this.row("move",`(${e}, ${t})`)}async drag(e,t,o,i){await this.inner.drag(e,t,o,i),this.row("drag",`(${e}, ${t}) \u2192 (${o}, ${i})`)}async mouseDown(e,t){await this.inner.mouseDown(e,t),this.row("mouseDown",`(${e}, ${t})`)}async mouseUp(e,t){await this.inner.mouseUp(e,t),this.row("mouseUp",`(${e}, ${t})`)}async type(e){await this.inner.type(e);let t=e.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;");this.row("type",`"${t}"`)}async key(e){await this.inner.key(e),this.row("key",e)}async scroll(e,t,o,i){await this.inner.scroll(e,t,o,i),this.row("scroll",`(${e}, ${t}) dx=${o} dy=${i}`)}async cursorPosition(){let e=await this.inner.cursorPosition();return this.row("cursorPosition",`(${e.x}, ${e.y})`),e}async display(){let e=await this.inner.display();return this.row("display",`${e.width}\xD7${e.height}`),e}async screenshot(e,t){let o=await this.inner.screenshot(e,t);if(o){let i=`${this.timestamp()}.png`;y(g(this.logDir,i),o);let r=e&&t?` (${e}\xD7${t})`:"";this.insert(`<tr><td class="time">${this.elapsed()}</td><td class="action">screenshot</td><td>${r}<br><img src="${i}"></td></tr>
25
+ `)}else this.row("screenshot",'<span class="error">failed</span>');return o}};import{createRequire as N}from"module";import{dirname as S,join as C}from"path";import{fileURLToPath as W}from"url";var M=N(import.meta.url),I=S(W(import.meta.url)),b=class n{constructor(e){this.native=e}native;scaleX=1;scaleY=1;static create(){let e=C(I,"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),t;try{t=M(e)}catch{throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}`)}return t.display(),new n(t)}toNative(e,t){return[e/this.scaleX,t/this.scaleY]}fromNative(e,t){return[Math.round(e*this.scaleX),Math.round(t*this.scaleY)]}async click(e,t,o,i){let[r,s]=this.toNative(e,t);this.native.click(r,s,o,i)}async move(e,t){let[o,i]=this.toNative(e,t);this.native.move(o,i)}async drag(e,t,o,i){let[r,s]=this.toNative(e,t),[u,a]=this.toNative(o,i);this.native.drag(r,s,u,a)}async mouseDown(e,t){let[o,i]=this.toNative(e,t);this.native.mouseDown(o,i)}async mouseUp(e,t){let[o,i]=this.toNative(e,t);this.native.mouseUp(o,i)}async type(e){this.native.type(e)}async key(e){this.native.key(e)}async scroll(e,t,o,i){let[r,s]=this.toNative(e,t);this.native.scroll(r,s,o,i)}async cursorPosition(){let e=this.native.cursorPosition(),[t,o]=this.fromNative(e.x,e.y);return{x:t,y:o}}async display(){let{width:e,height:t}=this.native.display();return{width:e,height:t}}async screenshot(e,t){let{width:o,height:i}=this.native.display(),r,s;if(e&&e>0&&t&&t>0)r=e,s=t;else{let l=Math.min(1,1568/Math.max(o,i),Math.sqrt(115e4/(o*i)));r=Math.floor(o*l),s=Math.floor(i*l)}return this.scaleX=r/o,this.scaleY=s/i,r===o&&s===i?this.native.screenshot(0,0):this.native.screenshot(r,s)}async capabilities(){return this.native.capabilities()}async listDisplays(){return this.native.listDisplays()}async captureDisplay(e,t=[]){return this.native.captureDisplay(e,t)}async listWindows(){return this.native.listWindows()}async activateWindow(e){return this.native.activateWindow(e)}async minimizeWindow(e){return this.native.minimizeWindow(e)}async restoreWindow(e){return this.native.restoreWindow(e)}};import{z as m}from"zod";function c(n,e){let t=[{type:"text",text:n}];return e?{content:t,isError:!0}:{content:t}}var d=m.object({x:m.number().describe("Horizontal pixel coordinate."),y:m.number().describe("Vertical pixel coordinate.")});function f(n,e){n.registerTool("screenshot",{description:"Capture the current screen. Take a screenshot before clicking to determine coordinates and after each action to verify the result.",inputSchema:{width:m.number().int().positive().optional().describe("Optional target width in pixels for resizing the screenshot."),height:m.number().int().positive().optional().describe("Optional target height in pixels for resizing the screenshot.")}},async({width:i,height:r})=>{let u=await(await e()).screenshot(i,r);return u?{content:[{type:"image",data:u.toString("base64"),mimeType:"image/png"}]}:c("Screenshot failed",!0)}),n.registerTool("cursor_position",{description:"Get the current cursor position in pixel coordinates. Returns {x, y}."},async()=>{let r=await(await e()).cursorPosition();return c(`${r.x},${r.y}`)});let t={coordinate:d.optional().describe("{x, y} pixel coordinate to click at. Clicks at current cursor position if omitted.")},o=(i,r,s,u)=>{n.registerTool(i,{description:r,inputSchema:t},async({coordinate:a})=>{let l=await e();a&&await l.move(a.x,a.y);let p=a??await l.cursorPosition();return await l.click(p.x,p.y,s,u),c(`${i}${a?` at (${a.x},${a.y})`:""}`)})};o("left_click","Click the left mouse button. Optionally move to a coordinate first.","left",1),o("right_click","Click the right mouse button. Optionally move to a coordinate first.","right",1),o("middle_click","Click the middle mouse button. Optionally move to a coordinate first.","middle",1),o("double_click","Double-click the left mouse button. Optionally move to a coordinate first.","left",2),o("triple_click","Triple-click the left mouse button. Optionally move to a coordinate first.","left",3),n.registerTool("mouse_move",{description:"Move the mouse cursor to the specified pixel coordinate.",inputSchema:{coordinate:d.describe("{x, y} pixel coordinate to move the cursor to.")}},async({coordinate:i})=>(await(await e()).move(i.x,i.y),c(`Moved to (${i.x},${i.y})`))),n.registerTool("left_click_drag",{description:"Click and drag from a start coordinate to an end coordinate.",inputSchema:{start_coordinate:d.describe("{x, y} pixel coordinate to start the drag from."),coordinate:d.describe("{x, y} pixel coordinate to drag to.")}},async({start_coordinate:i,coordinate:r})=>(await(await e()).drag(i.x,i.y,r.x,r.y),c(`Dragged (${i.x},${i.y}) -> (${r.x},${r.y})`))),n.registerTool("left_mouse_down",{description:"Press and hold the left mouse button at the current cursor position."},async()=>{let i=await e(),r=await i.cursorPosition();return await i.mouseDown(r.x,r.y),c(`Mouse down at (${r.x},${r.y})`)}),n.registerTool("left_mouse_up",{description:"Release the left mouse button at the current cursor position."},async()=>{let i=await e(),r=await i.cursorPosition();return await i.mouseUp(r.x,r.y),c(`Mouse up at (${r.x},${r.y})`)}),n.registerTool("type",{description:"Type a string of text using the keyboard.",inputSchema:{text:m.string().describe("The text to type.")}},async({text:i})=>(await(await e()).type(i),c(`Typed ${i.length} chars`))),n.registerTool("key",{description:"Press a key or key combination. Examples: 'Return', 'ctrl+s', 'alt+Tab', 'BackSpace'.",inputSchema:{text:m.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab').")}},async({text:i})=>(await(await e()).key(i),c(`Pressed ${i}`))),n.registerTool("hold_key",{description:"Hold a key down for a specified duration in seconds.",inputSchema:{text:m.string().describe("Key name to hold (e.g. 'shift', 'ctrl')."),duration:m.number().nonnegative().max(100).describe("Seconds to hold the key (max 100).")}},async({text:i,duration:r})=>(await(await e()).key(i),c(`Held ${i} for ${r}s`))),n.registerTool("scroll",{description:"Scroll the screen in a given direction at an optional coordinate.",inputSchema:{scroll_direction:m.enum(["up","down","left","right"]).describe("Direction to scroll."),coordinate:d.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),scroll_amount:m.number().int().nonnegative().optional().describe("Number of scroll clicks (default 3).")}},async({scroll_direction:i,coordinate:r,scroll_amount:s})=>{let u=await e(),a=s??3,l=await u.cursorPosition(),p=r?.x??l.x,w=r?.y??l.y,x=i==="left"?-a:i==="right"?a:0,$=i==="down"?a:i==="up"?-a:0;return await u.scroll(p,w,x,$),c(`Scrolled ${i} ${a} at (${p},${w})`)}),n.registerTool("wait",{description:"Pause for a specified duration in seconds.",inputSchema:{duration:m.number().nonnegative().max(100).describe("Seconds to wait (max 100).")}},async({duration:i})=>(await new Promise(r=>setTimeout(r,i*1e3)),c(`Waited ${i}s`)))}function G(){let n=new A({name:"computer-use",version:"1.0.0"}),e=!1,t;return f(n,async()=>(t||(t=b.create(),e&&(t=new h(t))),t)),n}export{G as createServer};
20
26
  //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@github/computer-use-mcp",
3
- "version": "0.1.9",
3
+ "version": "0.1.12",
4
4
  "description": "Computer Use MCP Server",
5
5
  "author": "GitHub",
6
6
  "repository": {