@github/computer-use-mcp 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import{McpServer as
|
|
1
|
+
import{McpServer as tt}from"@modelcontextprotocol/sdk/server/mcp.js";import{z as p}from"zod";var T=class{knownApplications=new Map;allowedAppIds=new Set;accessActive=!1;allowAll=!1;copilotWindowId=null;rememberApplications(t){this.knownApplications.clear();for(let e of t)this.knownApplications.set(e.id,e);if(this.allowAll)for(let e of t)this.allowedAppIds.add(e.id)}getKnownApplications(t){return t.map(e=>{let i=this.knownApplications.get(e);if(!i)throw new Error(`Unknown application id '${e}'. Call list_applications first.`);return i})}allowApplications(t,e=!1,i){this.getKnownApplications(t),this.accessActive=!0;for(let s of t)this.allowedAppIds.add(s);if(e){this.allowAll=!0;for(let s of this.knownApplications.keys())this.allowedAppIds.add(s)}return i&&(this.copilotWindowId=i),this.getState()}areAllowedForAccess(t){return this.getKnownApplications(t),this.allowAll||t.every(e=>this.allowedAppIds.has(e))}getState(){return{accessActive:this.accessActive,allowedAppIds:[...this.allowedAppIds],allowAll:this.allowAll,copilotWindowId:this.copilotWindowId}}};function $(c){let t=new Map,e=[...c].sort((i,s)=>`${i.applicationName}\0${i.title}\0${i.windowId}`.localeCompare(`${s.applicationName}\0${s.title}\0${s.windowId}`));for(let i of e){let s=i.applicationName||i.title||i.applicationId,o=t.get(i.applicationId);o||(o={id:i.applicationId,displayName:s,windows:[]},t.set(i.applicationId,o)),o.windows.push({windowId:i.windowId,title:i.title,displayId:i.displayId,isMinimized:i.isMinimized})}return[...t.values()].sort((i,s)=>`${i.displayName}\0${i.id}`.localeCompare(`${s.displayName}\0${s.id}`))}var Y=new Set(["ctrl","control","shift","alt","option","super","meta","cmd","command","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","return","enter","tab","space","backspace","delete","forwarddelete","escape","esc","up","down","left","right","home","end","pageup","pagedown","insert","capslock","f1","f2","f3","f4","f5","f6","f7","f8","f9","f10","f11","f12","-","minus","=","equal","plus","[","]","\\",";","'",",",".","/","`"]),X={arrowup:"up",arrowdown:"down",arrowleft:"left",arrowright:"right",page_up:"pageup",page_down:"pagedown",backslash:"\\",semicolon:";",slash:"/",grave:"`",bracketleft:"[",bracketright:"]",super_l:"super",gui:"super",caps_lock:"capslock",caps:"capslock",del:"delete"};function j(c){c==="+"?c="plus":c.length>2&&c.endsWith("++")&&(c=c.slice(0,-1)+"plus");let t=c.split("+").map(i=>i.trim().toLowerCase()).filter(Boolean);if(t.length===0)throw new Error("Key combo must contain at least one key.");return t.map(i=>{let s=X[i]??i;if(!Y.has(s))throw new Error(`Unknown key "${i}" in combo "${c}".`);return s}).join("+")}import*as b from"fs/promises";import*as H from"os";import*as W from"path";var M="<!-- LOG -->",D=class{computer;logDir;logPath;startTime=performance.now();queue=Promise.resolve();constructor(t){this.computer=t,this.logDir=W.join(H.homedir(),".copilot","logs","computer-use"),this.logPath=W.join(this.logDir,`${this.timestamp()}.html`)}timestamp(){let t=new Date,e=(i,s=2)=>String(i).padStart(s,"0");return`${t.getFullYear()}${e(t.getMonth()+1)}${e(t.getDate())}-${e(t.getHours())}${e(t.getMinutes())}${e(t.getSeconds())}.${e(Math.floor(t.getMilliseconds()/10))}`}elapsed(){return`${((performance.now()-this.startTime)/1e3).toFixed(2)}s`}insert(t){return this.queue=this.queue.then(()=>this.write(t)).catch(()=>this.write(t)).catch(()=>{}),this.queue}async write(t){let e=!0;try{await b.access(this.logPath)}catch{e=!1}e||(await b.mkdir(this.logDir,{recursive:!0}),await b.writeFile(this.logPath,`<!DOCTYPE html>
|
|
2
2
|
<html>
|
|
3
3
|
<head>
|
|
4
4
|
<meta charset="utf-8">
|
|
@@ -20,11 +20,11 @@ ${M}
|
|
|
20
20
|
</table>
|
|
21
21
|
</body>
|
|
22
22
|
</html>
|
|
23
|
-
`)
|
|
24
|
-
`)}async checkPermissions(
|
|
25
|
-
`)}else this.row("screenshot",'<span class="error">failed</span>');return l}};import{createRequire as
|
|
26
|
-
`)}var E=class{constructor(
|
|
27
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${
|
|
28
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((r.action==="accept"?r.content?.action:void 0)!=="done")break}else if(l!=="done")break;if(i=await e.checkPermissions("accessibility"),s=await e.checkPermissions("screen"),!i||!s){let r=[];throw i||r.push("Accessibility"),s||r.push("Screen Recording"),new Error(`${r.join(" and ")} permission${r.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(e){return this.selectedDisplay!==""?this.selectedDisplay:(await e.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let e=this.accessStore.getState();if(!e.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let t=await this.computer();if(e.allowAll&&(await this.capabilities()).canListWindows){let s=x(await t.listWindows());this.accessStore.rememberApplications(s),e=this.accessStore.getState()}return{computer:t,allowedAppIds:e.allowedAppIds,copilotWindowId:e.copilotWindowId}}async copilotWindowId(e){if(!(await this.capabilities()).canListWindows)return null;let i=await e.getActiveWindow();return i&&(await e.listWindows()).some(o=>o.windowId===i.windowId)?i.windowId:null}async prepareForInput(e,t){if(!(await this.capabilities()).canListWindows||t.length===0)return;let s=await this.activeDisplayId(e);if(!await e.prepareForInput(s,t))throw new Error("No allowed visible windows are available on the selected display.")}async list_displays(){let t=await(await this.computer()).listDisplays();return this.toJson({displays:t,selectedDisplay:this.selectedDisplay})}async select_display({display_id:e}){let t=e??"",s=await(await this.computer()).listDisplays();if(t!==""&&!s.some(n=>n.displayId===t))throw new Error(`Unknown display id '${t}'. Call list_displays first.`);this.selectedDisplay=t;let o=t===""?s.find(n=>n.isPrimary)??null:s.find(n=>n.displayId===t)??null;return this.toJson({selectedDisplay:this.selectedDisplay,status:"selected",message:t===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${o?.label??t}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications(){let e=await this.computer(),t=x(await e.listWindows());return this.accessStore.rememberApplications(t),this.toJson({applications:t})}async request_access({apps:e,reason:t}){await this.requestPermission();let i={const:"allow",title:"Allow"},s={const:"allow_all",title:"Allow all apps (don't ask again)"},o={const:"deny",title:"Deny (Esc)"},n=(d,b,k)=>this.toJson({allowed:d,allowAll:b.allowAll,...b.allowAll?{}:{allowedAppIds:b.allowedAppIds},message:k});if(this.options.yolo){let d=await this.computer(),b=await this.copilotWindowId(d),k=this.accessStore.allowApplications([],!0,b);return n(!0,k,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let a=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let d=this.accessStore.allowApplications([],!0);return n(!0,d,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let r,m,h,y;if(a.canListWindows){let d=await this.computer(),b=await d.listWindows(),k=x(b);this.accessStore.rememberApplications(k);let L=await this.copilotWindowId(d),H=e&&e.length>0?"":await this.activeDisplayId(d),A=e&&e.length>0?this.accessStore.getKnownApplications(e.map(g=>g.id)):x(b.filter(g=>!g.isMinimized&&(H===""||g.displayId===H))),_=A.map(g=>g.id),q=this.accessStore.getState(),I=A.length;m=["allow","allow_all"],h=_.length===0?q.allowAll||q.accessActive:this.accessStore.areAllowedForAccess(_),y=_.length===0?q.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":I===1?`'${A[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",r={choices:[i,s,o],message:j(A.map(g=>g.displayName),t),denyMessage:I===0?"The user declined desktop access.":I===1?`The user declined to share '${A[0].displayName}'.`:"The user declined to share the requested apps.",allow:g=>this.accessStore.allowApplications(_,g==="allow_all",L),allowedMessage:g=>g==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":I===0?"Desktop access is active for this session.":I===1?`Access session started for '${A[0].displayName}'.`:"Access session started for the requested apps."}}else{let d=this.accessStore.getState();m=["allow_all"],h=d.allowAll||d.accessActive,y=d.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",r={choices:[s,o],message:j([],t),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(h){let d=this.accessStore.getState();return n(!0,d,y)}let f=await this.server.server.elicitInput({mode:"form",message:r.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:r.choices}},required:["decision"]}}),C=f.action==="accept"?f.content?.decision:void 0;if(f.action!=="accept"||!C||!m.includes(C)){let d=this.accessStore.getState();return n(!1,d,r.denyMessage)}let B=r.allow(C);return n(!0,B,r.allowedMessage(C))}async screenshot(){let{computer:e,allowedAppIds:t,copilotWindowId:i}=await this.requireAccess();this.lock();let s=await e.screenshot(this.selectedDisplay,t,i,0,0,void 0,D);return this.toImage(s,"Screenshot failed",D)}async cursor_position(){let t=await(await this.computer()).cursorPosition(this.selectedDisplay);return this.toText(`${t.x},${t.y}`)}async _click(e,t,i,s){let{computer:o,allowedAppIds:n}=await this.requireAccess();this.lock(),await this.prepareForInput(o,n),s&&await o.move(s.x,s.y,this.selectedDisplay);let a=s??await o.cursorPosition(this.selectedDisplay);return await o.click(a.x,a.y,t,i,this.selectedDisplay),this.toText(`${e}${s?` at (${s.x},${s.y})`:""}`)}async left_click({coordinate:e}){return this._click("left_click","left",1,e)}async right_click({coordinate:e}){return this._click("right_click","right",1,e)}async middle_click({coordinate:e}){return this._click("middle_click","middle",1,e)}async double_click({coordinate:e}){return this._click("double_click","left",2,e)}async triple_click({coordinate:e}){return this._click("triple_click","left",3,e)}async mouse_move({coordinate:e}){let{computer:t,allowedAppIds:i}=await this.requireAccess();return this.lock(),await this.prepareForInput(t,i),await t.move(e.x,e.y,this.selectedDisplay),this.toText(`Moved to (${e.x},${e.y})`)}async left_click_drag({start_coordinate:e,coordinate:t}){let{computer:i,allowedAppIds:s}=await this.requireAccess();return this.lock(),await this.prepareForInput(i,s),await i.drag(e.x,e.y,t.x,t.y,this.selectedDisplay),this.toText(`Dragged (${e.x},${e.y}) -> (${t.x},${t.y})`)}async left_mouse_down(){let{computer:e,allowedAppIds:t}=await this.requireAccess();this.lock(),await this.prepareForInput(e,t);let i=await e.cursorPosition(this.selectedDisplay);return await e.mouseDown(i.x,i.y,this.selectedDisplay),this.toText(`Mouse down at (${i.x},${i.y})`)}async left_mouse_up(){let{computer:e,allowedAppIds:t}=await this.requireAccess();this.lock(),await this.prepareForInput(e,t);let i=await e.cursorPosition(this.selectedDisplay);return await e.mouseUp(i.x,i.y,this.selectedDisplay),this.toText(`Mouse up at (${i.x},${i.y})`)}async type({text:e}){let{computer:t,allowedAppIds:i}=await this.requireAccess();return this.lock(),await this.prepareForInput(t,i),await t.type(e),this.toText(`Typed ${e.length} chars`)}async key({text:e}){let{computer:t,allowedAppIds:i}=await this.requireAccess();this.lock(),await this.prepareForInput(t,i);let s=F(e);return await t.key(s),this.toText(`Pressed ${s}`)}async scroll({scroll_direction:e,coordinate:t,scroll_amount:i}){let{computer:s,allowedAppIds:o}=await this.requireAccess();this.lock(),await this.prepareForInput(s,o);let n=i??3,a=await s.cursorPosition(this.selectedDisplay),l=t?.x??a.x,r=t?.y??a.y,m=e==="left"?-n:e==="right"?n:0,h=e==="down"?n:e==="up"?-n:0;return await s.scroll(l,r,m,h,this.selectedDisplay),this.toText(`Scrolled ${e} ${n} at (${l},${r})`)}async wait({duration:e}){return await new Promise(t=>setTimeout(t,e*1e3)),this.toText(`Waited ${e}s`)}async get_clipboard(){let{computer:e}=await this.requireAccess();this.lock();let t=await e.getClipboard();return this.toText(t)}async set_clipboard({text:e}){let{computer:t}=await this.requireAccess();return this.lock(),await t.setClipboard(e),this.toText(`Clipboard set (${e.length} chars)`)}async zoom({region:e}){let{computer:t,allowedAppIds:i,copilotWindowId:s}=await this.requireAccess();this.lock();let o=await t.screenshot(this.selectedDisplay,i,s,0,0,[e.x1,e.y1,e.x2,e.y2],D);return this.toImage(o,"Zoom screenshot failed",D)}};function ke(c={}){let e=new se({name:"computer-use",version:"1.0.0"},{instructions:["This MCP server provides desktop automation tools (mouse, keyboard, screenshots, clipboard).",c.yolo?"YOLO mode is enabled. Call `request_access` once to auto-allow all current and future apps for this session.":"Before using access-gated tools (screenshot, click, type, clipboard, etc.), you MUST call `request_access` to start an access session.","If available, call `list_applications` first to discover apps, then pass the desired app IDs to `request_access`.","Calling `request_access` with an empty apps array allows all visible apps on the selected display. If none are visible, it falls back to desktop access and empty desktop screenshots until apps are allowed.","Screenshots and zoom captures are composited to show ONLY the windows of allowed applications (plus system UI like the Dock). Disallowed app windows are not visible.","If `request_access` returns `allowAll=true`, all current and future apps are allowed for the rest of this session and you do not need to call `request_access` again.","Otherwise, if you launch a new app during the session, call `list_applications` and `request_access` again to add it."].join(`
|
|
29
|
-
`)});return new E(c).connect(
|
|
23
|
+
`));let i=await b.readFile(this.logPath,"utf-8");await b.writeFile(this.logPath,i.replace(M,t+M))}async row(t,e){await this.insert(`<tr><td class="time">${this.elapsed()}</td><td class="action">${t}</td><td>${e}</td></tr>
|
|
24
|
+
`)}async checkPermissions(t){return this.computer.checkPermissions(t)}async requestPermissions(t){return this.computer.requestPermissions(t)}async capabilities(){let t=await this.computer.capabilities();return await this.row("capabilities",JSON.stringify(t)),t}async click(t,e,i,s,o=""){await this.computer.click(t,e,i,s,o),await this.row("click",`(${t}, ${e}) button=${i} count=${s}${o?` display=${o}`:""}`)}async move(t,e,i=""){await this.computer.move(t,e,i),await this.row("move",`(${t}, ${e})${i?` display=${i}`:""}`)}async drag(t,e,i,s,o=""){await this.computer.drag(t,e,i,s,o),await this.row("drag",`(${t}, ${e}) \u2192 (${i}, ${s})${o?` display=${o}`:""}`)}async mouseDown(t,e,i=""){await this.computer.mouseDown(t,e,i),await this.row("mouseDown",`(${t}, ${e})${i?` display=${i}`:""}`)}async mouseUp(t,e,i=""){await this.computer.mouseUp(t,e,i),await this.row("mouseUp",`(${t}, ${e})${i?` display=${i}`:""}`)}async type(t){await this.computer.type(t);let e=t.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""");await this.row("type",`"${e}"`)}async key(t){await this.computer.key(t),await this.row("key",t)}async scroll(t,e,i,s,o=""){await this.computer.scroll(t,e,i,s,o),await this.row("scroll",`(${t}, ${e}) dx=${i} dy=${s}${o?` display=${o}`:""}`)}async cursorPosition(t=""){let e=await this.computer.cursorPosition(t);return await this.row("cursorPosition",`(${e.x}, ${e.y})${t?` display=${t}`:""}`),e}async display(t=""){let e=await this.computer.display(t);return await this.row("display",`${e.width}x${e.height}${t?` display=${t}`:""}`),e}async listDisplays(){let t=await this.computer.listDisplays();return await this.row("listDisplays",`${t.length} displays`),t}async listWindows(){let t=await this.computer.listWindows();return await this.row("listWindows",`${t.length} windows`),t}async getActiveWindow(){let t=await this.computer.getActiveWindow();return await this.row("getActiveWindow",t?`${t.windowId} (${t.applicationId}) title="${t.title}"`:"null"),t}async activateApplication(t){let e=await this.computer.activateApplication(t);return await this.row("activateApplication",`${t} -> ${e}`),e}async concealApplication(t){let e=await this.computer.concealApplication(t);return await this.row("concealApplication",`${t} -> ${e}`),e}async restoreApplication(t){let e=await this.computer.restoreApplication(t);return await this.row("restoreApplication",`${t} -> ${e}`),e}async activateWindow(t){let e=await this.computer.activateWindow(t);return await this.row("activateWindow",`${t} -> ${e}`),e}async concealWindow(t){let e=await this.computer.concealWindow(t);return await this.row("concealWindow",`${t} -> ${e}`),e}async restoreWindow(t){let e=await this.computer.restoreWindow(t);return await this.row("restoreWindow",`${t} -> ${e}`),e}async getClipboard(){let t=await this.computer.getClipboard(),e=t.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""");return await this.row("getClipboard",`"${e.slice(0,200)}${e.length>200?"\u2026":""}"`),t}async setClipboard(t){await this.computer.setClipboard(t);let e=t.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""");await this.row("setClipboard",`"${e.slice(0,200)}${e.length>200?"\u2026":""}" (${t.length} chars)`)}async prepareForInput(t,e){let i=await this.computer.prepareForInput(t,e);return await this.row("prepareForInput",`${t} -> ${i} (${e.length} apps)`),i}async screenshot(t,e,i,s,o,n,r){let l=await this.computer.screenshot(t,e,i,s,o,n,r);if(l){let a=r===void 0?"png":"jpg",m=`${this.timestamp()}.${a}`;await b.mkdir(this.logDir,{recursive:!0}),await b.writeFile(W.join(this.logDir,m),l);let h=s&&o?` (${s}x${o})`:"",f=n?` crop=[${n.join(",")}]`:"",v=i?` exclude=${i}`:"";await this.insert(`<tr><td class="time">${this.elapsed()}</td><td class="action">screenshot</td><td>${h}${f}${v}<br><img src="${m}"></td></tr>
|
|
25
|
+
`)}else await this.row("screenshot",'<span class="error">failed</span>');return l}};import{createRequire as J}from"module";import{dirname as K,join as G}from"path";import{fileURLToPath as Q}from"url";var V=J(import.meta.url),Z=K(Q(import.meta.url)),q=class c{constructor(t){this.native=t}native;static create(){let t=G(Z,"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),e;try{e=V(t)}catch(i){let s=i instanceof Error?i.message:String(i);throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}: ${s} (path: ${t})`,{cause:i})}return new c(e)}async checkPermissions(t){return this.native.checkPermissions(t)}async requestPermissions(t){this.native.requestPermissions(t)}async click(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,l]=this.toNative(n,t,e);this.native.click(r,l,i,s)}async move(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.move(o,n)}async drag(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e),this.assertPointInBounds(n,i,s);let[r,l]=this.toNative(n,t,e),[a,m]=this.toNative(n,i,s);this.native.drag(r,l,a,m)}async mouseDown(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseDown(o,n)}async mouseUp(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseUp(o,n)}async type(t){this.native.type(t)}async key(t){this.native.key(t)}async scroll(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,l]=this.toNative(n,t,e);this.native.scroll(r,l,i,s)}async cursorPosition(t=""){let e=this.native.cursorPosition(),i=this.resolveCoordinateSpace(t),[s,o]=this.fromNative(i,e.x,e.y);return{x:s,y:o}}async display(t=""){let e=this.resolveCoordinateSpace(t);return{width:e.targetWidth,height:e.targetHeight}}async screenshot(t,e,i,s,o,n,r){let l=this.resolveCoordinateSpace(t,s,o),a=r??-1;if(n){this.assertRegionEdgeInBounds(l,n[0],n[1]),this.assertRegionEdgeInBounds(l,n[2],n[3]);let[m,h,f,v]=this.toNativeCrop(l,n[0],n[1],n[2],n[3]);return this.native.screenshot(t,e,i??"",0,0,m,h,f,v,a)}return this.native.screenshot(t,e,i??"",l.targetWidth,l.targetHeight,0,0,0,0,a)}async capabilities(){return this.native.capabilities()}async listDisplays(){return this.native.listDisplays()}async listWindows(){return this.native.listWindows()}async getActiveWindow(){return this.native.getActiveWindow?this.native.getActiveWindow():null}async activateApplication(t){return this.native.activateApplication(t)}async concealApplication(t){return this.native.concealApplication(t)}async restoreApplication(t){return this.native.restoreApplication(t)}async activateWindow(t){return this.native.activateWindow(t)}async concealWindow(t){return this.native.concealWindow(t)}async restoreWindow(t){return this.native.restoreWindow(t)}async getClipboard(){return this.native.getClipboard()}async setClipboard(t){this.native.setClipboard(t)}async prepareForInput(t,e){return this.native.prepareForInput(t,e)}resolveDisplay(t){let e=this.native.listDisplays();return t!==""?e.find(i=>i.displayId===t):e.find(i=>i.isPrimary)??e[0]}targetSizeForDisplay(t,e,i=0,s=0){if(i>0&&s>0)return{targetWidth:i,targetHeight:s};let r=Math.min(1,1568/Math.max(t,e),Math.sqrt(115e4/(t*e)));return{targetWidth:Math.floor(t*r),targetHeight:Math.floor(e*r)}}resolveCoordinateSpace(t,e=0,i=0){let s=this.resolveDisplay(t),o=s?.bounds.x??0,n=s?.bounds.y??0,r=s?void 0:this.native.display(),l=s?.bounds.width??r?.width??1,a=s?.bounds.height??r?.height??1,{targetWidth:m,targetHeight:h}=this.targetSizeForDisplay(l,a,e,i);return{originX:o,originY:n,displayWidth:l,displayHeight:a,scaleX:m/l,scaleY:h/a,targetWidth:m,targetHeight:h}}toNative(t,e,i){return[t.originX+e/t.scaleX,t.originY+i/t.scaleY]}toNativeCrop(t,e,i,s,o){return[Math.floor(e/t.scaleX),Math.floor(i/t.scaleY),Math.ceil(s/t.scaleX),Math.ceil(o/t.scaleY)]}assertPointInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>=t.targetWidth||i>=t.targetHeight)throw new RangeError(`Coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}assertRegionEdgeInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>t.targetWidth||i>t.targetHeight)throw new RangeError(`Crop coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}fromNative(t,e,i){return[Math.round((e-t.originX)*t.scaleX),Math.round((i-t.originY)*t.scaleY)]}};var N=.8,A={readOnlyHint:!0,destructiveHint:!1,openWorldHint:!1},w={readOnlyHint:!1,destructiveHint:!0,openWorldHint:!0},O={readOnlyHint:!1,destructiveHint:!1,openWorldHint:!1},u=" Requires an active access session via `request_access`.",U=" If you do not see an expected application window, you MUST call `list_applications` then `request_access` to add it before retrying.",k=p.coerce.number().transform(Math.round),C=p.object({x:k.describe("Horizontal pixel coordinate."),y:k.describe("Vertical pixel coordinate.")}),et=p.object({x1:k.describe("Left edge of the region in screenshot pixel coordinates."),y1:k.describe("Top edge of the region in screenshot pixel coordinates."),x2:k.describe("Right edge of the region in screenshot pixel coordinates."),y2:k.describe("Bottom edge of the region in screenshot pixel coordinates.")}).refine(({x1:c,x2:t})=>t>c,{message:"x2 must be greater than x1.",path:["x2"]}).refine(({y1:c,y2:t})=>t>c,{message:"y2 must be greater than y1.",path:["y2"]}),it=p.object({id:p.string().min(1).describe("Stable application id returned by list_applications.")}),st=p.object({display_id:p.string().optional().describe("Display id to select. Omit or pass an empty string to use the default display.")});function B(c,t){let e=c.length===0?["","Computer Use runs on your actual desktop and can send mouse and keyboard input.","","Computer Use wants to control your desktop for this session."]:["","Computer Use runs on your actual desktop and can send mouse and keyboard input to the apps you share.","","Computer Use wants to control these apps:","",...c.map(i=>`- ${i}`)];return t&&e.push("","Reason:",t),e.join(`
|
|
26
|
+
`)}var E=class{constructor(t={}){this.options=t}options;_computer;_caps;_locked=null;server;accessStore=new T;selectedDisplay="";toText(t,e){let i=[{type:"text",text:t}];return e?{content:i,isError:!0}:{content:i}}toJson(t){return this.toText(JSON.stringify(t,null,2))}toImage(t,e,i){return t?{content:[{type:"image",data:t.toString("base64"),mimeType:i!==void 0?"image/jpeg":"image/png",_meta:{screenshot:!0}}]}:this.toText(e,!0)}connect(t){this.server=t;let e=p.object({method:p.literal("notifications/copilot"),params:p.object({type:p.string()}).passthrough()});t.server.setNotificationHandler(e,s=>{switch(s.params.type){case"assistant.turn_start":this.unlock();break;case"assistant.turn_end":this.unlock();break}}),t.registerTool("list_displays",{description:"List available displays and the currently selected display used for screenshots, zoom, cursor position, and coordinate-based actions.",annotations:A},()=>this.list_displays()),t.registerTool("select_display",{description:"Select the active display used for screenshots, zoom, cursor position, and coordinate-based actions. Omit display_id or pass an empty string to use the default display.",inputSchema:st.shape,annotations:O},s=>this.select_display(s)),t.registerTool("list_applications",{description:"List running desktop applications with stable ids that can be allowed via request_access.",annotations:A},()=>this.list_applications()),t.registerTool("request_access",{description:"Request access to previously listed applications using client-side form elicitation. Pass an empty apps array to request access to all apps on the selected display. IMPORTANT: Call `list_applications` first to discover apps when you want to allow specific apps, then call this tool before using access-gated tools such as screenshot, click, type, or clipboard. Returns: `{ allowed: boolean, allowAll: boolean, allowedAppIds?: string[], message: string }`. If `allowAll` is true, all current and future apps are allowed for this session and you do not need to call `request_access` again. Otherwise, if you launch a new app (via Start, Dock, script, etc.), call `list_applications` and `request_access` again to add it to the allowed set.",inputSchema:{apps:p.array(it).describe("Applications to allow. Pass an empty array to allow all apps on the selected display."),reason:p.string().min(1).optional().describe("Optional reason text shown in the access dialog.")},annotations:O},s=>this.request_access(s)),t.registerTool("screenshot",{description:"Capture the current screen. Take a screenshot before clicking to determine coordinates and after each action to verify the result. Screenshots are filtered to show only allowed application windows."+U+u,annotations:A},()=>this.screenshot()),t.registerTool("cursor_position",{description:"Get the current cursor position in pixel coordinates. Returns {x, y}."+u,annotations:A},()=>this.cursor_position());let i={coordinate:C.optional().describe("{x, y} pixel coordinate to click at. Clicks at current cursor position if omitted.")};t.registerTool("left_click",{description:"Click the left mouse button. Optionally move to a coordinate first."+u,inputSchema:i,annotations:w},s=>this.left_click(s)),t.registerTool("right_click",{description:"Click the right mouse button. Optionally move to a coordinate first."+u,inputSchema:i,annotations:w},s=>this.right_click(s)),t.registerTool("middle_click",{description:"Click the middle mouse button. Optionally move to a coordinate first."+u,inputSchema:i,annotations:w},s=>this.middle_click(s)),t.registerTool("double_click",{description:"Double-click the left mouse button. Optionally move to a coordinate first."+u,inputSchema:i,annotations:w},s=>this.double_click(s)),t.registerTool("triple_click",{description:"Triple-click the left mouse button. Optionally move to a coordinate first."+u,inputSchema:i,annotations:w},s=>this.triple_click(s)),t.registerTool("mouse_move",{description:"Move the mouse cursor to the specified pixel coordinate."+u,inputSchema:{coordinate:C.describe("{x, y} pixel coordinate to move the cursor to.")},annotations:w},s=>this.mouse_move(s)),t.registerTool("left_click_drag",{description:"Click and drag from a start coordinate to an end coordinate."+u,inputSchema:{start_coordinate:C.describe("{x, y} pixel coordinate to start the drag from."),coordinate:C.describe("{x, y} pixel coordinate to drag to.")},annotations:w},s=>this.left_click_drag(s)),t.registerTool("left_mouse_down",{description:"Press and hold the left mouse button at the current cursor position."+u,annotations:w},()=>this.left_mouse_down()),t.registerTool("left_mouse_up",{description:"Release the left mouse button at the current cursor position."+u,annotations:w},()=>this.left_mouse_up()),t.registerTool("type",{description:"Type a string of text using the keyboard."+u,inputSchema:{text:p.string().describe("The text to type.")},annotations:w},s=>this.type(s)),t.registerTool("key",{description:"Press a key or key combination. Examples: 'Return', 'ctrl+s', 'alt+Tab', 'BackSpace'."+u,inputSchema:{text:p.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab').")},annotations:w},s=>this.key(s)),t.registerTool("scroll",{description:"Scroll the screen in a given direction at an optional coordinate."+u,inputSchema:{scroll_direction:p.enum(["up","down","left","right"]).describe("Direction to scroll."),coordinate:C.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),scroll_amount:p.number().int().nonnegative().max(100).optional().describe("Number of scroll clicks (default 3).")},annotations:w},s=>this.scroll(s)),t.registerTool("wait",{description:"Pause for a specified duration in seconds.",inputSchema:{duration:p.number().nonnegative().max(100).describe("Seconds to wait (max 100).")},annotations:{...A,idempotentHint:!0}},s=>this.wait(s)),t.registerTool("get_clipboard",{description:"Get the current text contents of the system clipboard."+u,annotations:A},()=>this.get_clipboard()),t.registerTool("set_clipboard",{description:"Set the system clipboard to the specified text."+u,inputSchema:{text:p.string().describe("The text to copy to the clipboard.")},annotations:O},s=>this.set_clipboard(s)),t.registerTool("zoom",{description:"Capture a specific region of the screen at full resolution for detailed inspection. The region is defined by top-left (x1, y1) and bottom-right (x2, y2) coordinates in the screenshot coordinate space. Like screenshots, only allowed application windows are shown."+U+u,inputSchema:{region:et.describe("{x1, y1, x2, y2} coordinates defining top-left and bottom-right corners of the region to capture.")},annotations:A},s=>this.zoom(s))}lock(){this._locked!==null&&(this._locked=!0)}unlock(){this._locked=!1}async computer(){return this._computer||(this._computer=q.create(),process.env.DEBUG&&(this._computer=new D(this._computer))),this._computer}async capabilities(){if(!this._caps){let t=await this.computer();this._caps=await t.capabilities()}return this._caps}async requestPermission(){switch(process.platform){case"linux":{await(await this.computer()).display();break}case"darwin":{let t=await this.computer(),e=[],i=await t.checkPermissions("accessibility");i||e.push("Accessibility");let s=await t.checkPermissions("screen");if(s||e.push("Screen Recording"),e.length>0){if(!this.server.server.getClientCapabilities()?.elicitation?.form)break;let n=["",`Computer Use needs the following macOS permission${e.length>1?"s":""} to control your desktop:`,"",...e.map(a=>`\u2022 ${a}`),"",'Clicking "Open System Settings" will open the relevant settings page(s).',`Enable the permission${e.length>1?"s":""} for your terminal app, then click "I've granted the permissions".`,"","Note: You may need to restart your terminal after granting permissions for them to take effect."],r=await this.server.server.elicitInput({mode:"form",message:n.join(`
|
|
27
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${e.join(" and ")} permission${e.length>1?"s":""}`,oneOf:[{const:"open",title:"1. Open System Settings"},{const:"done",title:"2. I've granted the permissions"},{const:"skip",title:"3. Skip (things may not work)"}]}},required:["action"]}}),l=r.action==="accept"?r.content?.action:void 0;if(l==="open"){i||await t.requestPermissions("accessibility"),s||await t.requestPermissions("screen");let a=await this.server.server.elicitInput({mode:"form",message:["","System Settings has been opened.","",`Enable ${e.join(" and ")} for your terminal app.`,e.length>1?"Both settings pages have been opened \u2014 check each one.":"","",`After granting permissions, click "I've granted the permissions" below.`,"If the toggle was already on, try removing and re-adding your terminal app."].filter(Boolean).join(`
|
|
28
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((a.action==="accept"?a.content?.action:void 0)!=="done")break}else if(l!=="done")break;if(i=await t.checkPermissions("accessibility"),s=await t.checkPermissions("screen"),!i||!s){let a=[];throw i||a.push("Accessibility"),s||a.push("Screen Recording"),new Error(`${a.join(" and ")} permission${a.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(t){return this.selectedDisplay!==""?this.selectedDisplay:(await t.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let t=this.accessStore.getState();if(!t.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let e=await this.computer();if(t.allowAll&&(await this.capabilities()).canListWindows){let s=$(await e.listWindows());this.accessStore.rememberApplications(s),t=this.accessStore.getState()}return{computer:e,allowedAppIds:t.allowedAppIds,copilotWindowId:t.copilotWindowId}}async copilotWindowId(t){if(!(await this.capabilities()).canListWindows)return null;let i=await t.getActiveWindow();return i&&(await t.listWindows()).some(o=>o.windowId===i.windowId)?i.windowId:null}async prepareForInput(t,e){if(!(await this.capabilities()).canListWindows||e.length===0)return;let s=await this.activeDisplayId(t);if(!await t.prepareForInput(s,e))throw new Error("No allowed visible windows are available on the selected display.")}async list_displays(){let e=await(await this.computer()).listDisplays();return this.toJson({displays:e,selectedDisplay:this.selectedDisplay})}async select_display({display_id:t}){let e=t??"",s=await(await this.computer()).listDisplays();if(e!==""&&!s.some(n=>n.displayId===e))throw new Error(`Unknown display id '${e}'. Call list_displays first.`);this.selectedDisplay=e;let o=e===""?s.find(n=>n.isPrimary)??null:s.find(n=>n.displayId===e)??null;return this.toJson({selectedDisplay:this.selectedDisplay,status:"selected",message:e===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${o?.label??e}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications(){let t=await this.computer(),e=$(await t.listWindows());return this.accessStore.rememberApplications(e),this.toJson({applications:e})}async request_access({apps:t,reason:e}){await this.requestPermission();let i={const:"allow",title:"Allow"},s={const:"allow_all",title:"Allow all apps (don't ask again)"},o={const:"deny",title:"Deny (Esc)"},n=(d,y,x)=>this.toJson({allowed:d,allowAll:y.allowAll,...y.allowAll?{}:{allowedAppIds:y.allowedAppIds},message:x});if(this.options.yolo){let d=await this.computer(),y=await this.copilotWindowId(d),x=this.accessStore.allowApplications([],!0,y);return n(!0,x,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let r=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let d=this.accessStore.allowApplications([],!0);return n(!0,d,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let a,m,h,f;if(r.canListWindows){let d=await this.computer(),y=await d.listWindows(),x=$(y);this.accessStore.rememberApplications(x);let z=await this.copilotWindowId(d),F=t&&t.length>0?"":await this.activeDisplayId(d),P=t&&t.length>0?this.accessStore.getKnownApplications(t.map(g=>g.id)):$(y.filter(g=>!g.isMinimized&&(F===""||g.displayId===F))),S=P.map(g=>g.id),R=this.accessStore.getState(),I=P.length;m=["allow","allow_all"],h=S.length===0?R.allowAll||R.accessActive:this.accessStore.areAllowedForAccess(S),f=S.length===0?R.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":I===1?`'${P[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",a={choices:[i,s,o],message:B(P.map(g=>g.displayName),e),denyMessage:I===0?"The user declined desktop access.":I===1?`The user declined to share '${P[0].displayName}'.`:"The user declined to share the requested apps.",allow:g=>this.accessStore.allowApplications(S,g==="allow_all",z),allowedMessage:g=>g==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":I===0?"Desktop access is active for this session.":I===1?`Access session started for '${P[0].displayName}'.`:"Access session started for the requested apps."}}else{let d=this.accessStore.getState();m=["allow_all"],h=d.allowAll||d.accessActive,f=d.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",a={choices:[s,o],message:B([],e),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(h){let d=this.accessStore.getState();return n(!0,d,f)}let v=await this.server.server.elicitInput({mode:"form",message:a.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:a.choices}},required:["decision"]}}),_=v.action==="accept"?v.content?.decision:void 0;if(v.action!=="accept"||!_||!m.includes(_)){let d=this.accessStore.getState();return n(!1,d,a.denyMessage)}let L=a.allow(_);return n(!0,L,a.allowedMessage(_))}async screenshot(){let{computer:t,allowedAppIds:e,copilotWindowId:i}=await this.requireAccess();this.lock();let s=await t.screenshot(this.selectedDisplay,e,i,0,0,void 0,N);return this.toImage(s,"Screenshot failed",N)}async cursor_position(){let e=await(await this.computer()).cursorPosition(this.selectedDisplay);return this.toText(`${e.x},${e.y}`)}async _click(t,e,i,s){let{computer:o,allowedAppIds:n}=await this.requireAccess();this.lock(),await this.prepareForInput(o,n),s&&await o.move(s.x,s.y,this.selectedDisplay);let r=s??await o.cursorPosition(this.selectedDisplay);return await o.click(r.x,r.y,e,i,this.selectedDisplay),this.toText(`${t}${s?` at (${s.x},${s.y})`:""}`)}async left_click({coordinate:t}){return this._click("left_click","left",1,t)}async right_click({coordinate:t}){return this._click("right_click","right",1,t)}async middle_click({coordinate:t}){return this._click("middle_click","middle",1,t)}async double_click({coordinate:t}){return this._click("double_click","left",2,t)}async triple_click({coordinate:t}){return this._click("triple_click","left",3,t)}async mouse_move({coordinate:t}){let{computer:e,allowedAppIds:i}=await this.requireAccess();return this.lock(),await this.prepareForInput(e,i),await e.move(t.x,t.y,this.selectedDisplay),this.toText(`Moved to (${t.x},${t.y})`)}async left_click_drag({start_coordinate:t,coordinate:e}){let{computer:i,allowedAppIds:s}=await this.requireAccess();return this.lock(),await this.prepareForInput(i,s),await i.drag(t.x,t.y,e.x,e.y,this.selectedDisplay),this.toText(`Dragged (${t.x},${t.y}) -> (${e.x},${e.y})`)}async left_mouse_down(){let{computer:t,allowedAppIds:e}=await this.requireAccess();this.lock(),await this.prepareForInput(t,e);let i=await t.cursorPosition(this.selectedDisplay);return await t.mouseDown(i.x,i.y,this.selectedDisplay),this.toText(`Mouse down at (${i.x},${i.y})`)}async left_mouse_up(){let{computer:t,allowedAppIds:e}=await this.requireAccess();this.lock(),await this.prepareForInput(t,e);let i=await t.cursorPosition(this.selectedDisplay);return await t.mouseUp(i.x,i.y,this.selectedDisplay),this.toText(`Mouse up at (${i.x},${i.y})`)}async type({text:t}){let{computer:e,allowedAppIds:i}=await this.requireAccess();return this.lock(),await this.prepareForInput(e,i),await e.type(t),this.toText(`Typed ${t.length} chars`)}async key({text:t}){let{computer:e,allowedAppIds:i}=await this.requireAccess();this.lock(),await this.prepareForInput(e,i);let s=j(t);return await e.key(s),this.toText(`Pressed ${s}`)}async scroll({scroll_direction:t,coordinate:e,scroll_amount:i}){let{computer:s,allowedAppIds:o}=await this.requireAccess();this.lock(),await this.prepareForInput(s,o);let n=i??3,r=await s.cursorPosition(this.selectedDisplay),l=e?.x??r.x,a=e?.y??r.y,m=t==="left"?-n:t==="right"?n:0,h=t==="down"?n:t==="up"?-n:0;return await s.scroll(l,a,m,h,this.selectedDisplay),this.toText(`Scrolled ${t} ${n} at (${l},${a})`)}async wait({duration:t}){return await new Promise(e=>setTimeout(e,t*1e3)),this.toText(`Waited ${t}s`)}async get_clipboard(){let{computer:t}=await this.requireAccess();this.lock();let e=await t.getClipboard();return this.toText(e)}async set_clipboard({text:t}){let{computer:e}=await this.requireAccess();return this.lock(),await e.setClipboard(t),this.toText(`Clipboard set (${t.length} chars)`)}async zoom({region:t}){let{computer:e,allowedAppIds:i,copilotWindowId:s}=await this.requireAccess();this.lock();let o=await e.screenshot(this.selectedDisplay,i,s,0,0,[t.x1,t.y1,t.x2,t.y2],N);return this.toImage(o,"Zoom screenshot failed",N)}};function bt(c={}){let t=new tt({name:"computer-use",version:"1.0.0"},{instructions:["This MCP server provides desktop automation tools (mouse, keyboard, screenshots, clipboard).",c.yolo?"YOLO mode is enabled. Call `request_access` once to auto-allow all current and future apps for this session.":"Before using access-gated tools (screenshot, click, type, clipboard, etc.), you MUST call `request_access` to start an access session.","If available, call `list_applications` first to discover apps, then pass the desired app IDs to `request_access`.","Calling `request_access` with an empty apps array allows all visible apps on the selected display. If none are visible, it falls back to desktop access and empty desktop screenshots until apps are allowed.","Screenshots and zoom captures are composited to show ONLY the windows of allowed applications (plus system UI like the Dock). Disallowed app windows are not visible.","If `request_access` returns `allowAll=true`, all current and future apps are allowed for the rest of this session and you do not need to call `request_access` again.","Otherwise, if you launch a new app during the session, call `list_applications` and `request_access` again to add it."].join(`
|
|
29
|
+
`)});return new E(c).connect(t),t}export{bt as createServer};
|
|
30
30
|
//# sourceMappingURL=index.js.map
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|