@github/computer-use-mcp 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -3
- package/dist/index.js +20 -12
- package/dist/main.js +20 -12
- package/dist/prebuilds/darwin-arm64/computer.node +0 -0
- package/dist/prebuilds/darwin-x64/computer.node +0 -0
- package/dist/prebuilds/linux-arm64/computer.node +0 -0
- package/dist/prebuilds/linux-x64/computer.node +0 -0
- package/dist/prebuilds/win32-arm64/computer.node +0 -0
- package/dist/prebuilds/win32-x64/computer.node +0 -0
- package/package.json +8 -6
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
import{McpServer as
|
|
1
|
+
import{McpServer as Qt}from"@modelcontextprotocol/sdk/server/mcp.js";import{ElicitResultSchema as te}from"@modelcontextprotocol/sdk/types.js";import{z as d}from"zod";var K=class{knownApplications=new Map;allowedAppIds=new Set;accessActive=!1;allowAll=!1;hostWindowId=null;rememberApplications(t){this.knownApplications.clear();for(let e of t)this.knownApplications.set(e.id,e);if(this.allowAll)for(let e of t)this.allowedAppIds.add(e.id)}getKnownApplications(t){return t.map(e=>{let i=this.knownApplications.get(e);if(!i)throw new Error(`Unknown application id '${e}'. Call list_applications first.`);return i})}setHostWindowId(t){this.hostWindowId=t}tryGetKnownApplication(t){return this.knownApplications.get(t)}allKnownApplications(){return[...this.knownApplications.values()]}allowApplications(t,e=!1,i){this.accessActive=!0;for(let n of t)this.allowedAppIds.add(n);if(e){this.allowAll=!0;for(let n of this.knownApplications.keys())this.allowedAppIds.add(n)}return i&&(this.hostWindowId=i),this.getState()}areAllowedForAccess(t){return this.allowAll||t.every(e=>this.allowedAppIds.has(e))}getState(){return{accessActive:this.accessActive,allowedAppIds:[...this.allowedAppIds],allowAll:this.allowAll,hostWindowId:this.hostWindowId}}};function T(l){let t=new Map,e=[...l].sort((i,n)=>`${i.applicationName}\0${i.title}\0${i.windowId}`.localeCompare(`${n.applicationName}\0${n.title}\0${n.windowId}`));for(let i of e){let n=i.applicationName||i.title||i.applicationId,s=t.get(i.applicationId);s||(s={id:i.applicationId,displayName:n,applicationNames:i.applicationNames,windows:[]},t.set(i.applicationId,s)),s.windows.push({windowId:i.windowId,title:i.title,displayId:i.displayId,isMinimized:i.isMinimized})}return[...t.values()].sort((i,n)=>`${i.displayName}\0${i.id}`.localeCompare(`${n.displayName}\0${n.id}`))}function mt(l){return l.toLowerCase().split(/[^a-z0-9]+/).filter(t=>t.length>0)}function ht(l){let t=new Set;for(let e of l)for(let i of mt(e))t.add(i);return t}function gt(l){return l.trim().toLowerCase()}function wt(l){let t=new Set;for(let e of l){let i=gt(e);i&&t.add(i)}return t}var Y=class{appRecords=new Map;updateApplications(t){for(let e of t){let i=[e.displayName,...e.applicationNames??[]],n=this.appRecords.get(e.id);if(n){for(let s of ht(i))n.tokenBag.add(s);for(let s of wt(i))n.exactNames.add(s);n.displayName=e.displayName}else this.appRecords.set(e.id,{appId:e.id,displayName:e.displayName,tokenBag:ht(i),exactNames:wt(i)})}}resolve(t){let e=this.appRecords.get(t);if(e)return[e];let i=gt(t);if(i){let o=[];for(let r of this.appRecords.values())r.exactNames.has(i)&&o.push(r);if(o.length>0)return o}let n=mt(t);if(n.length===0)return[];let s=[];for(let o of this.appRecords.values())n.every(r=>o.tokenBag.has(r))&&s.push(o);return s}allRecords(){return[...this.appRecords.values()]}};import{randomUUID as Mt}from"node:crypto";import{mkdirSync as Ot,writeFileSync as qt}from"node:fs";import{homedir as jt,tmpdir as Et}from"node:os";import{dirname as Lt,isAbsolute as Ft,join as ft}from"node:path";var zt={actionCoordinates:"window-local screenshot coordinates with origin at the target window top-left",screenFrames:"screen@ frames and window bounds are global screen coordinates; do not pass them directly to click/drag",localCenter:"local_center/localCenter values are window-local x,y hints suitable for click/drag coordinates"},B=class{constructor(t){this.result=t}result;toToolResult(t){let e=[],i=this.sanitizeJsonForOutput(),n;t.imageMode==="path"&&this.result.image&&(n=this.writeImage(t.screenshotOutFile),this.setScreenshotPath(i,n));let s=JSON.stringify(i,null,2),o=this.result.text?this.augmentProjection(this.result.text,i):this.buildProjection(i);n&&t.format!=="json"&&(o=`${o}
|
|
2
|
+
screenshot: ${n}`);let r=t.format==="json"?s:t.format==="both"?`${o}
|
|
3
|
+
|
|
4
|
+
--- window_state_json ---
|
|
5
|
+
${s}`:o;return e.push({type:"text",text:r}),t.imageMode==="image"&&this.result.image&&e.push({type:"image",data:this.result.image.data,mimeType:this.result.image.mimeType,_meta:{screenshot:!0,windowState:!0,stateToken:this.result.stateToken}}),{content:e}}sanitizeJsonForOutput(){return this.addCoordinateMetadata(this.sanitizeJson(this.result.json??{app:this.result.app,windowId:this.result.windowId,stateToken:this.result.stateToken}))}buildProjection(t=this.sanitizeJsonForOutput()){let e=this.stringAtPath(t,["window","applicationName"])??this.result.app??this.stringAtPath(t,["app"])??"unknown",i=this.stringAtPath(t,["window","applicationId"]),n=this.stringAtPath(t,["window","title"]),s=this.stringAtPath(t,["window","windowID"])??this.result.windowId,o=this.stringAtPath(t,["stateToken"])??this.result.stateToken,r=this.recordAtPath(t,["window","bounds"]),c=this.recordAtPath(t,["tree"]),a=this.arrayAtPath(c,["nodes"]).filter(m=>this.isRecord(m)),u=this.valueAtPath(c,["truncated"])===!0,p=["window_state compact_v1",`app: ${e}${i?` (${i})`:""}`,`window: ${n?JSON.stringify(n):"(untitled)"}${s?` id=${s}`:""}${r?` ${this.compactWindowRect(r,"screen@")}`:""}`,o?`state_token: ${o}`:void 0,`tree: ${a.length} node${a.length===1?"":"s"}${u?" (truncated)":""}`,"coordinate_contract: action x,y are window-local screenshot coordinates; screen@ frames are global; use local_center for click/drag","targeting: pass a line number or nodeId as element_index; include state_token when available",""].filter(m=>m!==void 0),h=this.renderCompactWindowNodes(a);return p.push(...h.length?h:["(no accessibility nodes returned)"]),p.join(`
|
|
6
|
+
`)}writeImage(t){if(!this.result.image)throw new Error("get_window_state image_mode=path requested a screenshot, but no screenshot was returned.");let e=t?this.expandHomePath(t):this.defaultImagePath();if(!Ft(e))throw new Error("get_window_state screenshot_out_file must be an absolute path.");return Ot(Lt(e),{recursive:!0}),qt(e,Buffer.from(this.result.image.data,"base64")),e}defaultImagePath(){let t=this.result.image?.mimeType==="image/jpeg"?"jpg":"png",e=(this.result.app??"app").replace(/[^A-Za-z0-9._-]+/g,"-").replace(/^-+|-+$/g,"")||"app";return ft(Et(),"computer-use-mcp","window-state",`${e}-${Date.now()}-${Mt()}.${t}`)}setScreenshotPath(t,e){if(!this.isRecord(t))return;let i=this.isRecord(t.screenshot)?t.screenshot:{},n=this.isRecord(i.image)?i.image:{};n.imagePath=e,i.image=n,t.screenshot=i}sanitizeJson(t,e){if(e==="imageBase64")return;if(typeof t=="string")return this.truncateString(t);if(Array.isArray(t))return t.map(n=>this.sanitizeJson(n));if(!this.isRecord(t))return t;let i={};for(let[n,s]of Object.entries(t))i[n]=this.sanitizeJson(s,n);return i}addCoordinateMetadata(t){if(!this.isRecord(t))return t;t.coordinateContract??=zt;let e=this.recordAtPath(t,["window","bounds"]),i=this.recordAtPath(t,["tree"]),n=this.arrayAtPath(i,["nodes"]);for(let s of n){if(!this.isRecord(s)||this.isRecord(s.localCenter))continue;let o=this.localCenterForNode(s,e);o&&(s.localCenter=o)}return t}augmentProjection(t,e){if(t=this.labelScreenFrames(t),t.includes("coordinate_contract:"))return t;let i=t.split(`
|
|
7
|
+
`),n=Math.max(i.findIndex(o=>o.trim()===""),0),s=["coordinate_contract: action x,y are window-local screenshot coordinates; screen@ frames are global; use local_center for click/drag",this.compactLocalCenters(e)].filter(o=>!!o);return n===0&&i[0]?.trim()!==""?[...s,...i].join(`
|
|
8
|
+
`):(i.splice(n,0,...s),i.join(`
|
|
9
|
+
`))}labelScreenFrames(t){return t.replace(/(^|[\s(])@(-?\d+,-?\d+\s+\d+x\d+)/g,"$1screen@$2")}truncateString(t){return t.length<=1e3?t:`${t.slice(0,1e3)}...[truncated ${t.length-1e3} chars]`}renderCompactWindowNodes(t){let e=[];for(let i of t){let n=this.compactWindowNodeLine(i);n&&e.push(n)}return e}compactWindowNodeLine(t){let e=this.stringValue(t.index)??this.numberValue(t.index)?.toString(),i=this.compactWindowRole(this.stringValue(t.role),this.stringValue(t.subrole)),n=[this.stringValue(t.title),this.stringValue(t.description),this.stringValue(t.value),this.stringValue(t.valueDescription)].map(p=>p?.replace(/\s+/g," ").trim()).filter(p=>!!p),s=[...new Set(n)].slice(0,2).map(p=>JSON.stringify(p.length>160?`${p.slice(0,157)}...`:p)),o=Array.isArray(t.actions)?t.actions.filter(p=>typeof p=="string").slice(0,4):[],r=[t.focused===!0?"focused":void 0,t.enabled===!1?"disabled":void 0].filter(p=>!!p),c=this.isRecord(t.frame)?this.compactWindowRect(t.frame,"screen@"):void 0,a=this.isRecord(t.localCenter)?this.compactPoint(t.localCenter):void 0;if(e==="1"||s.length>0||o.length>0||r.length>0||["window","button","checkbox","radio_button","text_field","text_area","link","menu_item","row","cell","table","list","scroll_area","web_area"].includes(i))return[`${e?`${e}. `:""}${i}`,...s,r.length?`[${r.join(",")}]`:void 0,o.length?`{${o.map(p=>p.replace(/^AX/,"").toLowerCase()).join(",")}}`:void 0,c,a?`local_center=${a}`:void 0].filter(p=>!!p).join(" ")}compactWindowRole(t,e){return(e&&e!==t?e:t??"element").replace(/^AX/,"").replace(/([a-z])([A-Z])/g,"$1_$2").replace(/[^A-Za-z0-9]+/g,"_").replace(/^_+|_+$/g,"").toLowerCase()||"element"}compactWindowRect(t,e="@"){let i=this.numberValue(t.x),n=this.numberValue(t.y),s=this.numberValue(t.width),o=this.numberValue(t.height);if(!(i===void 0||n===void 0||s===void 0||o===void 0))return`${e}${Math.round(i)},${Math.round(n)} ${Math.round(s)}x${Math.round(o)}`}compactPoint(t){let e=this.numberValue(t.x),i=this.numberValue(t.y);if(!(e===void 0||i===void 0))return`${Math.round(e)},${Math.round(i)}`}localCenterForNode(t,e){let i=this.recordAtPath(t,["frame"]);if(!i||!e)return;let n=this.numberValue(e.x),s=this.numberValue(e.y),o=this.numberValue(i.x),r=this.numberValue(i.y),c=this.numberValue(i.width),a=this.numberValue(i.height);if(n===void 0||s===void 0||o===void 0||r===void 0||c===void 0||a===void 0)return;let u=o-n+c/2,p=r-s+a/2;if(!(u<0||p<0))return{x:Math.round(u),y:Math.round(p)}}compactLocalCenters(t){let e=this.recordAtPath(t,["tree"]),n=this.arrayAtPath(e,["nodes"]).filter(s=>this.isRecord(s)).map(s=>{let o=this.isRecord(s.localCenter)?this.compactPoint(s.localCenter):void 0;if(!o)return;let r=this.stringValue(s.index)??this.numberValue(s.index)?.toString(),c=this.stringValue(s.nodeId),a=r??c;if(!(!a||a==="1"||!this.isActionableNode(s)))return`${a}=${o}`}).filter(s=>!!s).slice(0,12);return n.length?`local_centers: ${n.join(" ")} (window-local)`:void 0}isActionableNode(t){let e=this.compactWindowRole(this.stringValue(t.role),this.stringValue(t.subrole));return(Array.isArray(t.actions)?t.actions.some(n=>typeof n=="string"):!1)||["button","checkbox","radio_button","text_field","text_area","link","menu_item","row","cell"].includes(e)}expandHomePath(t){return t==="~"||t.startsWith("~/")?ft(jt(),t.slice(2)):t}isRecord(t){return typeof t=="object"&&t!==null&&!Array.isArray(t)}stringValue(t){return typeof t=="string"?t:void 0}numberValue(t){return typeof t=="number"&&Number.isFinite(t)?t:void 0}valueAtPath(t,e){let i=t;for(let n of e){if(!this.isRecord(i))return;i=i[n]}return i}stringAtPath(t,e){return this.stringValue(this.valueAtPath(t,e))}recordAtPath(t,e){let i=this.valueAtPath(t,e);return this.isRecord(i)?i:void 0}arrayAtPath(t,e){let i=this.valueAtPath(t,e);return Array.isArray(i)?i:[]}};var Bt=new Set(["ctrl","control","shift","alt","option","super","meta","cmd","command","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","return","enter","tab","space","backspace","delete","forwarddelete","escape","esc","up","down","left","right","home","end","pageup","pagedown","insert","capslock","f1","f2","f3","f4","f5","f6","f7","f8","f9","f10","f11","f12","-","minus","=","equal","plus","[","]","\\",";","'",",",".","/","`"]),Ut={arrowup:"up",arrowdown:"down",arrowleft:"left",arrowright:"right",page_up:"pageup",page_down:"pagedown",backslash:"\\",semicolon:";",slash:"/",grave:"`",bracketleft:"[",bracketright:"]",super_l:"super",gui:"super",win:"super",windows:"super",caps_lock:"capslock",caps:"capslock",del:"delete"};function rt(l){l==="+"?l="plus":l.length>2&&l.endsWith("++")&&(l=l.slice(0,-1)+"plus");let t=l.split("+").map(i=>i.trim().toLowerCase()).filter(Boolean);if(t.length===0)throw new Error("Key combo must contain at least one key.");return t.map(i=>{let n=Ut[i]??i;if(!Bt.has(n))throw new Error(`Unknown key "${i}" in combo "${l}".`);return n}).join("+")}import*as I from"fs/promises";import*as yt from"os";import*as X from"path";var at="<!-- LOG -->",G=class{logDir;logPath;startTime=performance.now();queue=Promise.resolve();constructor(){this.logDir=X.join(yt.homedir(),".copilot","logs","computer-use"),this.logPath=X.join(this.logDir,`${this.timestamp()}.html`)}timestamp(){let t=new Date,e=(i,n=2)=>String(i).padStart(n,"0");return`${t.getFullYear()}${e(t.getMonth()+1)}${e(t.getDate())}-${e(t.getHours())}${e(t.getMinutes())}${e(t.getSeconds())}.${e(Math.floor(t.getMilliseconds()/10))}`}elapsed(){return`${((performance.now()-this.startTime)/1e3).toFixed(2)}s`}insert(t){return this.queue=this.queue.then(()=>this.write(t)).catch(()=>this.write(t)).catch(()=>{}),this.queue}async write(t){let e=!0;try{await I.access(this.logPath)}catch{e=!1}e||(await I.mkdir(this.logDir,{recursive:!0}),await I.writeFile(this.logPath,`<!DOCTYPE html>
|
|
2
10
|
<html>
|
|
3
11
|
<head>
|
|
4
12
|
<meta charset="utf-8">
|
|
@@ -22,19 +30,19 @@ import{McpServer as Yt}from"@modelcontextprotocol/sdk/server/mcp.js";import{Elic
|
|
|
22
30
|
<body>
|
|
23
31
|
<table>
|
|
24
32
|
<tr><th>Time</th><th>Level</th><th>Source</th><th>Action</th><th>Details</th></tr>
|
|
25
|
-
${
|
|
33
|
+
${at}
|
|
26
34
|
</table>
|
|
27
35
|
</body>
|
|
28
36
|
</html>
|
|
29
|
-
`));let i=await
|
|
37
|
+
`));let i=await I.readFile(this.logPath,"utf-8");await I.writeFile(this.logPath,i.replace(at,t+at))}rowHtml(t,e,i,n){return`<tr><td class="time">${this.elapsed()}</td><td class="level level-${i}">${i}</td><td class="source">${n}</td><td class="action">${this.escapeHtml(t)}</td><td>${e}</td></tr>
|
|
30
38
|
`}escapeHtml(t){return t.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""")}renderMarkdown(t){let e=t.split(`
|
|
31
|
-
`),i=[],s=[],o=()=>{s.length>0&&(i.push(`<ul style="margin:0;padding-left:1.2em">${s.map(n=>`<li>${this.escapeHtml(n)}</li>`).join("")}</ul>`),s=[])};for(let n of e)n.startsWith("- ")?s.push(n.slice(2)):(o(),n.length>0&&(i.length>0&&i.push("<br>"),i.push(this.escapeHtml(n))));return o(),i.join("")}log(t,e,i,s){this.insert(this.rowHtml(i,this.renderMarkdown(s),t,e))}logScreenshot(t,e,i,s){if(i){let o=s===void 0?"png":"jpg",n=`${this.timestamp()}.${o}`;A.mkdir(this.logDir,{recursive:!0}).then(()=>A.writeFile(H.join(this.logDir,n),i)),this.insert(this.rowHtml(t,`${this.escapeHtml(e)}<br><img src="${n}">`,"info","Computer"))}else this.insert(this.rowHtml(t,this.escapeHtml(e),"error","Computer"))}};var U=class{computer;logger;constructor(t,e){this.computer=t,this.logger=e}log(t,e){this.logger.log("info","Computer",t,e)}async checkPermissions(t){return this.computer.checkPermissions(t)}async requestPermissions(t){return this.computer.requestPermissions(t)}async capabilities(){let t=await this.computer.capabilities();return this.log("capabilities",JSON.stringify(t)),t}async click(t,e,i,s,o=""){await this.computer.click(t,e,i,s,o),this.log("click",`(${t}, ${e}) button=${i} count=${s}${o?` display=${o}`:""}`)}async move(t,e,i=""){await this.computer.move(t,e,i),this.log("move",`(${t}, ${e})${i?` display=${i}`:""}`)}async drag(t,e,i,s,o=""){await this.computer.drag(t,e,i,s,o),this.log("drag",`(${t}, ${e}) \u2192 (${i}, ${s})${o?` display=${o}`:""}`)}async mouseDown(t,e,i=""){await this.computer.mouseDown(t,e,i),this.log("mouseDown",`(${t}, ${e})${i?` display=${i}`:""}`)}async mouseUp(t,e,i=""){await this.computer.mouseUp(t,e,i),this.log("mouseUp",`(${t}, ${e})${i?` display=${i}`:""}`)}async type(t){await this.computer.type(t),this.log("type",`"${t}"`)}async key(t){await this.computer.key(t),this.log("key",t)}async scroll(t,e,i,s,o=""){await this.computer.scroll(t,e,i,s,o),this.log("scroll",`(${t}, ${e}) dx=${i} dy=${s}${o?` display=${o}`:""}`)}async cursorPosition(t=""){let e=await this.computer.cursorPosition(t);return this.log("cursorPosition",`(${e.x}, ${e.y})${t?` display=${t}`:""}`),e}async display(t=""){let e=await this.computer.display(t);return this.log("display",`${e.width}x${e.height}${t?` display=${t}`:""}`),e}async listDisplays(){let t=await this.computer.listDisplays(),e=t.map(i=>`${i.isPrimary?"*":""}${i.displayId} "${i.label}" ${i.width}x${i.height}`).join("; ");return this.log("listDisplays",`${t.length} displays${e?`: ${e}`:""}`),t}async listWindows(){let t=await this.computer.listWindows(),e=new Map;for(let s of t){let o=s.displayId||"(unknown)",n=e.get(o)??{visible:0,minimized:0};s.isMinimized?n.minimized+=1:n.visible+=1,e.set(o,n)}let i=[...e.entries()].sort(([s],[o])=>s.localeCompare(o)).map(([s,o])=>`${s}: ${o.visible} visible, ${o.minimized} minimized`).join("; ");return this.log("listWindows",`${t.length} windows${i?`: ${i}`:""}`),t}async windowAtPoint(t,e,i){let s=await this.computer.windowAtPoint(t,e,i);return this.log("windowAtPoint",`display=${t||"(primary)"} (${e}, ${i}) -> ${s?`${s.windowId} title="${s.title}"`:"null"}`),s}async getActiveWindow(){let t=await this.computer.getActiveWindow();return this.log("getActiveWindow",t?`${t.windowId} title="${t.title}"`:"null"),t}async activateApplication(t){let e=await this.computer.activateApplication(t);return this.log("activateApplication",`${t} -> ${e}`),e}async concealApplication(t){let e=await this.computer.concealApplication(t);return this.log("concealApplication",`${t} -> ${e}`),e}async restoreApplication(t){let e=await this.computer.restoreApplication(t);return this.log("restoreApplication",`${t} -> ${e}`),e}async activateWindow(t){let e=await this.computer.activateWindow(t);return this.log("activateWindow",`${t} -> ${e}`),e}async concealWindow(t){let e=await this.computer.concealWindow(t);return this.log("concealWindow",`${t} -> ${e}`),e}async restoreWindow(t){let e=await this.computer.restoreWindow(t);return this.log("restoreWindow",`${t} -> ${e}`),e}async getClipboard(){let t=await this.computer.getClipboard(),e=t.slice(0,200)+(t.length>200?"\u2026":"");return this.log("getClipboard",`"${e}"`),t}async setClipboard(t){await this.computer.setClipboard(t);let e=t.slice(0,200)+(t.length>200?"\u2026":"");this.log("setClipboard",`"${e}" (${t.length} chars)`)}lock(t){return this.computer.lock(t)}unlock(){this.computer.unlock()}async prepareForInput(t,e,i){let s=await this.computer.prepareForInput(t,e,i),o=i?` @ (${i.x},${i.y})`:"";return this.log("prepareForInput",`${t} -> ${s} (${e.length} apps)${o}`),s}async screenshot(t,e,i,s,o,n,r){let l=await this.computer.screenshot(t,e,i,s,o,n,r),a=s&&o?` (${s}x${o})`:"",c=n?` crop=[${n.join(",")}]`:"",d=`display=${t||"(primary)"}`,b=r===void 0?"":` quality=${r}`,f=`${d}${a}${c}${b}`;return this.logger.logScreenshot("screenshot",f,l,r),l}};import{createRequire as $t}from"module";import{dirname as xt,join as Pt}from"path";import{fileURLToPath as Wt}from"url";var z=class u{constructor(t,e){this.native=t;t.setLogger?.((i,s,o)=>{e?.log(this.nativeLogLevel(i),"Driver",s,o)})}native;static create(t){let e=Pt(xt(Wt(import.meta.url)),"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),i;try{i=$t(import.meta.url)(e)}catch(s){let o=s instanceof Error?s.message:String(s);throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}: ${o} (path: ${e})`,{cause:s})}return new u(i,t)}async checkPermissions(t){return this.native.checkPermissions(t)}async requestPermissions(t){this.native.requestPermissions(t)}async click(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,l]=this.toNative(n,t,e);this.native.click(r,l,i,s)}nativeLogLevel(t){switch(t){case 0:return"trace";case 1:return"debug";case 2:return"info";case 3:return"warn";case 4:return"error";default:return"info"}}async move(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.move(o,n)}async drag(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e),this.assertPointInBounds(n,i,s);let[r,l]=this.toNative(n,t,e),[a,c]=this.toNative(n,i,s);this.native.drag(r,l,a,c)}async mouseDown(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseDown(o,n)}async mouseUp(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseUp(o,n)}async type(t){this.native.type(t)}async key(t){this.native.key(t)}async scroll(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,l]=this.toNative(n,t,e);this.native.scroll(r,l,i,s)}async cursorPosition(t=""){let e=this.native.cursorPosition(),i=this.resolveCoordinateSpace(t),[s,o]=this.fromNative(i,e.x,e.y);return{x:s,y:o}}async display(t=""){let e=this.resolveCoordinateSpace(t);return{width:e.targetWidth,height:e.targetHeight}}async screenshot(t,e,i,s,o,n,r){let l=this.resolveCoordinateSpace(t,s,o),a=r??-1;if(n){this.assertRegionEdgeInBounds(l,n[0],n[1]),this.assertRegionEdgeInBounds(l,n[2],n[3]);let[c,d,b,f]=this.toNativeCrop(l,n[0],n[1],n[2],n[3]);return this.native.screenshot(t,e,i,0,0,c,d,b,f,a)}return this.native.screenshot(t,e,i,l.targetWidth,l.targetHeight,0,0,0,0,a)}async capabilities(){let t=this.native.capabilities();return{...t,canGetActiveWindow:typeof this.native.getActiveWindow=="function",canLocateWindowAtPoint:typeof this.native.windowAtPoint=="function"&&!!t.canLocateWindowAtPoint}}async listDisplays(){return this.native.listDisplays().map(e=>{let{targetWidth:i,targetHeight:s}=this.targetSizeForDisplay(e.bounds.width,e.bounds.height);return{displayId:e.displayId,label:e.label,width:i,height:s,isPrimary:e.isPrimary}})}async listWindows(){return this.native.listWindows()}async windowAtPoint(t,e,i){if(!this.native.windowAtPoint)return null;let s=this.resolveCoordinateSpace(t);this.assertPointInBounds(s,e,i);let[o,n]=this.toNative(s,e,i);return this.native.windowAtPoint(t,o,n)}async getActiveWindow(){return this.native.getActiveWindow?this.native.getActiveWindow():null}async activateApplication(t){return this.native.activateApplication(t)}async concealApplication(t){return this.native.concealApplication(t)}async restoreApplication(t){return this.native.restoreApplication(t)}async activateWindow(t){return this.native.activateWindow(t)}async concealWindow(t){return this.native.concealWindow(t)}async restoreWindow(t){return this.native.restoreWindow(t)}async getClipboard(){return this.native.getClipboard()}async setClipboard(t){this.native.setClipboard(t)}lock(t){return this.native.lock?.(t)??!1}unlock(){this.native.unlock?.()}async prepareForInput(t,e,i){let s=Number.NaN,o=Number.NaN;if(i){let n=this.resolveCoordinateSpace(t);this.assertPointInBounds(n,i.x,i.y),[s,o]=this.toNative(n,i.x,i.y)}return this.native.prepareForInput(t,e,i?.blockedWindowIds??[],s,o)}resolveDisplay(t){let e=this.native.listDisplays();return t!==""?e.find(i=>i.displayId===t):e.find(i=>i.isPrimary)??e[0]}targetSizeForDisplay(t,e,i=0,s=0){if(i>0&&s>0)return{targetWidth:i,targetHeight:s};let r=Math.min(1,1568/Math.max(t,e),Math.sqrt(115e4/(t*e)));return{targetWidth:Math.floor(t*r),targetHeight:Math.floor(e*r)}}resolveCoordinateSpace(t,e=0,i=0){let s=this.resolveDisplay(t),o=s?.bounds.x??0,n=s?.bounds.y??0,r=s?void 0:this.native.display(),l=s?.bounds.width??r?.width??1,a=s?.bounds.height??r?.height??1,{targetWidth:c,targetHeight:d}=this.targetSizeForDisplay(l,a,e,i);return{originX:o,originY:n,displayWidth:l,displayHeight:a,scaleX:c/l,scaleY:d/a,targetWidth:c,targetHeight:d}}toNative(t,e,i){return[t.originX+e/t.scaleX,t.originY+i/t.scaleY]}toNativeCrop(t,e,i,s,o){return[Math.floor(e/t.scaleX),Math.floor(i/t.scaleY),Math.ceil(s/t.scaleX),Math.ceil(o/t.scaleY)]}assertPointInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>=t.targetWidth||i>=t.targetHeight)throw new RangeError(`Coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}assertRegionEdgeInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>t.targetWidth||i>t.targetHeight)throw new RangeError(`Crop coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}fromNative(t,e,i){return[Math.round((e-t.originX)*t.scaleX),Math.round((i-t.originY)*t.scaleY)]}};var _=.8,Ct=250,W={readOnlyHint:!0,destructiveHint:!1,openWorldHint:!1},I={readOnlyHint:!1,destructiveHint:!0,openWorldHint:!0},Z={readOnlyHint:!1,destructiveHint:!1,openWorldHint:!1},w=" Requires `request_access`.",rt=" If the target app is missing, call `list_applications` then `request_access`.",T=p.coerce.number().transform(Math.round),q=p.object({x:T.describe("Horizontal pixel coordinate."),y:T.describe("Vertical pixel coordinate.")}),_t=p.object({x1:T.describe("Left edge of the region in screenshot pixel coordinates."),y1:T.describe("Top edge of the region in screenshot pixel coordinates."),x2:T.describe("Right edge of the region in screenshot pixel coordinates."),y2:T.describe("Bottom edge of the region in screenshot pixel coordinates.")}).refine(({x1:u,x2:t})=>t>u,{message:"x2 must be greater than x1.",path:["x2"]}).refine(({y1:u,y2:t})=>t>u,{message:"y2 must be greater than y1.",path:["y2"]}),Tt=p.string().min(1).describe("Application name or stable app id from `list_applications`. Examples: 'Google Chrome', 'Microsoft Outlook', 'app.windows.abc123'."),Nt=p.object({display_id:p.string().optional().describe("Display id to select. Omit or pass an empty string to use the default display.")}),Dt=p.object({display_id:p.string().optional().describe("Optional display id to filter by. When provided, only apps with at least one non-minimized window on that display are returned.")}),lt=q.optional().describe("{x, y} pixel coordinate. Clicks at current cursor position if omitted."),ct=p.enum(["left","right","middle"]).optional().describe("Mouse button to click (default left)."),pt=p.coerce.number().int().min(1).max(3).optional().describe("Number of clicks: 1 single, 2 double, 3 triple (default 1)."),dt=q.describe("{x, y} pixel coordinate to move the cursor to."),ut=q.describe("{x, y} pixel coordinate to start the drag from."),ht=q.describe("{x, y} pixel coordinate to drag to."),mt=p.string().describe("The text to type."),gt=p.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab')."),wt=p.enum(["up","down","left","right"]).describe("Direction to scroll."),yt=q.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),bt=p.number().int().nonnegative().max(100).optional().describe("Number of scroll clicks (default 3)."),Mt=p.number().nonnegative().max(100).describe("Seconds to wait (max 100). Use after a screenshot shows the UI is not in the expected state yet, but an operation may still finish."),ft=p.string().describe("The text to copy to the clipboard."),Rt=p.object({action:p.literal("click"),coordinate:lt,button:ct,count:pt}),qt=p.object({action:p.literal("mouse_move"),coordinate:dt}),Lt=p.object({action:p.literal("left_click_drag"),start_coordinate:ut,coordinate:ht}),Et=p.object({action:p.literal("left_mouse_down")}),jt=p.object({action:p.literal("left_mouse_up")}),Bt=p.object({action:p.literal("type"),text:mt}),Ht=p.object({action:p.literal("key"),text:gt}),Ft=p.object({action:p.literal("scroll"),scroll_direction:wt,coordinate:yt,scroll_amount:bt}),Ut=p.object({action:p.literal("set_clipboard"),text:ft}),zt=p.discriminatedUnion("action",[Rt,qt,Lt,Et,jt,Bt,Ht,Ft,Ut]),Ot=p.object({actions:p.array(zt).min(1).describe("Ordered actions. Do not include waits; call the separate `wait` tool only after observing a screenshot that is not ready yet.")}),O=class u{constructor(t={}){this.options=t;process.env.DEBUG&&(this.logger=new F)}options;_computer;_caps;_locked=null;_safetyTimer;static SAFETY_TIMEOUT_MS=180*1e3;server;logger;accessStore=new j;appNameResolver=new B;lastUnsharedWindowsNote=null;selectedDisplay="";toText(t,e){let i=[{type:"text",text:t}];return e?{content:i,isError:!0}:{content:i}}toJson(t){return this.toText(JSON.stringify(t,null,2))}toImage(t,e,i){return t?{content:[{type:"image",data:t.toString("base64"),mimeType:this.imageMimeType(i),_meta:{screenshot:!0}}]}:this.toText(e,!0)}buildAccessMessage(t,e){let i=t.length===0?["","Computer Use runs on your actual desktop and can send mouse and keyboard input.","","Computer Use wants to control your desktop for this session.","","Apps that are not allowed may be hidden."]:["","Computer Use runs on your actual desktop and can send mouse and keyboard input to the apps you share.","","Computer Use wants to control these apps:","",...t.map(s=>`- ${s}`),"","Apps that are not allowed may be hidden."];return e&&i.push("","Reason:",e),i.join(`
|
|
32
|
-
`)}imageMimeType(t){return t!==void 0?"image/jpeg":"image/png"}collectDisplayIds(t,e){let i=new Set;for(let
|
|
33
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${e.join(" and ")} permission${e.length>1?"s":""}`,oneOf:[{const:"open",title:"1. Open System Settings"},{const:"done",title:"2. I've granted the permissions"},{const:"skip",title:"3. Skip (things may not work)"}]}},required:["action"]}}),l=r.action==="accept"?r.content?.action:void 0;if(l==="open"){i||await t.requestPermissions("accessibility"),s||await t.requestPermissions("screen"),await this.showHostWindow(t);let a=await this.server.server.elicitInput({mode:"form",message:["","System Settings has been opened.","",`Enable ${e.join(" and ")} for your terminal app.`,e.length>1?"Both settings pages have been opened \u2014 check each one.":"","",`After granting permissions, click "I've granted the permissions" below.`,"If the toggle was already on, try removing and re-adding your terminal app."].filter(Boolean).join(`
|
|
34
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((a.action==="accept"?a.content?.action:void 0)!=="done")break}else if(l!=="done")break;if(i=await t.checkPermissions("accessibility"),s=await t.checkPermissions("screen"),!i||!s){let a=[];throw i||a.push("Accessibility"),s||a.push("Screen Recording"),new Error(`${a.join(" and ")} permission${a.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(t){return this.selectedDisplay!==""?this.selectedDisplay:(await t.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let t=this.accessStore.getState();if(!t.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let e=await this.computer();if(t.allowAll&&(await this.capabilities()).canListWindows){let{windows:s}=await this.filterHostWindow(e),o=C(s);this.accessStore.rememberApplications(o),this.appNameResolver.updateApplications(o),t=this.accessStore.getState()}return{computer:e,allowedAppIds:t.allowedAppIds,hostWindowId:t.hostWindowId}}async hostWindowId(t){let e=this.accessStore.getState().hostWindowId;if(e)return e;if(!(await this.capabilities()).canListWindows)return null;let s=await t.getActiveWindow();return s&&(await t.listWindows()).some(n=>n.windowId===s.windowId)?s.windowId:null}async showHostWindow(t){let e=await this.hostWindowId(t);if(e)try{await t.activateWindow(e)}catch{}}async filterHostWindow(t,e){let i=e??await t.listWindows(),s=await this.hostWindowId(t);return{windows:s?i.filter(o=>o.windowId!==s):i,hostWindowId:s}}async prepareForInput(t,e,i){let s=await this.capabilities();if(!s.canListWindows)return;let o=await this.activeDisplayId(t),n=i&&s.canHitTest;if(e.length===0&&!n)return;if(!await t.prepareForInput(o,e,n?i:void 0)){if(n)throw new Error("Input blocked: a disallowed app at the target could not be hidden. Call `list_applications` to see what's there, then `request_access` to allow it.");if(s.canGetActiveWindow){let l=await t.getActiveWindow();if(l&&!e.includes(l.applicationId))throw new Error(this.keyboardInputBlockedMessage(l))}throw new Error("Keyboard input blocked: an allowed application could not be focused. Click an allowed application first, or call `request_access` to allow it.")}}keyboardInputBlockedMessage(t){return`Keyboard input blocked: the focused application ("${t.applicationName}") is not allowed. Click an allowed application first, or call \`request_access\` to allow it.`}async validateActiveWindow(t,e){if(!(await this.capabilities()).canGetActiveWindow)return;let s=await t.getActiveWindow();if(s&&!e.includes(s.applicationId))throw new Error(this.keyboardInputBlockedMessage(s))}pointerInputBlockedMessage(t){return`Pointer input blocked: the focused application ("${t.applicationName}") is not allowed. Focus an allowed application first, or call \`request_access\` to allow it.`}async validatePointerInput(t,e,i){if(e.length===0)return;let s=await this.capabilities();if(s.canGetActiveWindow){let l=await t.getActiveWindow();if(l&&!e.includes(l.applicationId))throw new Error(this.pointerInputBlockedMessage(l))}if(!s.canLocateWindowAtPoint)return;let o=await this.activeDisplayId(t),n=await t.windowAtPoint(o,i.x,i.y);if(!n||e.includes(n.applicationId))return;let r=n.applicationName||n.title||n.applicationId;throw new Error(`Pointer input blocked: the target location is covered by disallowed application ("${r}"). Call \`list_applications\` to see what's there, then \`request_access\` to allow it.`)}async list_displays(){let t=await this.computer(),e=await t.listDisplays(),i=await this.activeDisplayId(t);return this.toJson({displays:e,selectedDisplay:i})}async select_display({display_id:t}){let e=t??"",i=await this.computer(),s=await i.listDisplays();if(e!==""&&!s.some(r=>r.displayId===e))throw new Error(`Unknown display id '${e}'. Call list_displays first.`);this.selectedDisplay=e;let o=await this.activeDisplayId(i),n=s.find(r=>r.displayId===o)??null;return this.toJson({selectedDisplay:o,status:"selected",message:e===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${n?.label??e}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications({display_id:t}={}){let e=await this.computer(),{windows:i}=await this.filterHostWindow(e),s=C(i);this.accessStore.rememberApplications(s),this.appNameResolver.updateApplications(s);let o=t??"";if(o!==""&&!(await e.listDisplays()).some(c=>c.displayId===o))throw new Error(`Unknown display id '${o}'. Call list_displays first.`);let n=o===""?i:i.filter(a=>!a.isMinimized&&a.displayId===o),r=C(n),l=await this.activeDisplayId(e);return this.toJson({selectedDisplay:l,applications:r})}async request_access({apps:t,reason:e},i){this.logEvent("request_access",`Start: apps=[${t.join(", ")}]${e?` reason="${e}"`:""}`),await this.requestPermission();let s={const:"allow",title:"Allow"},o={const:"allow_all",title:"Allow all apps (don't ask again)"},n={const:"deny",title:"Deny (Esc)"},r,l=async(h,g,N)=>{this.logEvent("request_access",`End: allowed=${h} allowAll=${g.allowAll} selectedDisplay=${this.selectedDisplay||"(default)"} hostWindowId=${g.hostWindowId??"null"} message="${N}"`);let $=r??(g.allowAll?void 0:g.allowedAppIds.map(y=>{let x=this.accessStore.tryGetKnownApplication(y);return{appId:y,name:x?.displayName,displayIds:this.collectDisplayIds(x?.windows??[])}})),D=this.toJson({allowed:h,allowAll:g.allowAll,...$?{allowedApps:$}:{},selectedDisplay:this.selectedDisplay||void 0,message:N});if(!h)return D;let S=await this.computer();this.lock();let K=g.hostWindowId?[g.hostWindowId]:[];return this.addScreenshot(D,S,g.allowedAppIds,K)};if(this.options.yolo){let h=this.accessStore.allowApplications([],!0);return await l(!0,h,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let a=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let h=this.accessStore.allowApplications([],!0);return await l(!0,h,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let d,b,f,Y,G;if(a.canListWindows){let h=await this.computer(),g=this.accessStore.getState().hostWindowId,N=await h.listWindows(),$=g?N.filter(m=>m.windowId!==g):N;G=$;let D=C($);this.accessStore.rememberApplications(D),this.appNameResolver.updateApplications(D);let S=t&&t.length>0?"":await this.activeDisplayId(h);S&&(this.selectedDisplay=S);let K=new Set(g?[g]:[]),y;if(t&&t.length>0){let m=new Map;for(let v of t){let R=this.appNameResolver.resolve(v);if(R.length===0)throw new Error(`No matching application found for '${v}'. Call list_applications to all running apps and their IDs.`);for(let P of R)if(!m.has(P.appId)){let E=this.accessStore.tryGetKnownApplication(P.appId);m.set(P.appId,E??{id:P.appId,displayName:P.displayName,windows:[]})}}y=[...m.values()];let At=new Set(m.keys()),L=new Map;for(let v of $)At.has(v.applicationId)&&!v.isMinimized&&v.displayId&&L.set(v.displayId,(L.get(v.displayId)??0)+1);if(L.size>0){let v="",R=0;for(let[P,E]of L)E>R&&(v=P,R=E);this.selectedDisplay=v}r=this.buildAllowedAppsInfo(y,$)}else y=C($.filter(m=>!m.isMinimized&&(S===""||m.displayId===S)&&!K.has(m.windowId)));let x=y.map(m=>m.id),J=this.accessStore.getState(),M=y.length;b=["allow","allow_all"],f=x.length===0?J.allowAll||J.accessActive:this.accessStore.areAllowedForAccess(x),Y=x.length===0?J.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":M===1?`'${y[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",d={choices:[s,o,n],message:this.buildAccessMessage(y.map(m=>m.displayName),e),denyMessage:M===0?"The user declined desktop access.":M===1?`The user declined to share '${y[0].displayName}'.`:"The user declined to share the requested apps.",allow:m=>this.accessStore.allowApplications(x,m==="allow_all"),allowedMessage:m=>m==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":M===0?"Desktop access is active for this session.":M===1?`Access session started for '${y[0].displayName}'.`:"Access session started for the requested apps."}}else{let h=this.accessStore.getState();b=["allow_all"],f=h.allowAll||h.accessActive,Y=h.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",d={choices:[o,n],message:this.buildAccessMessage([],e),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(f){let h=this.accessStore.getState();return await l(!0,h,Y)}this.accessStore.getState().hostWindowId&&await this.showHostWindow(await this.computer());let X=await this.server.server.request({method:"elicitation/create",params:{mode:"form",message:d.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:d.choices}},required:["decision"]}}},St,{signal:i,timeout:300*1e3}),vt=await this.computer(),tt=await this.hostWindowId(vt);tt&&this.accessStore.setHostWindowId(tt);let k=X.action==="accept"?X.content?.decision:void 0;if(X.action!=="accept"||!k||typeof k!="string"||!b.includes(k)){let h=this.accessStore.getState(),g=typeof k=="string"&&k!==n.const?`The user did not approve this access request. User response: ${k}`:d.denyMessage;return await l(!1,h,g)}let et=d.allow(k);return et.allowAll&&G&&(r=this.buildAllowedAppsInfo(this.accessStore.allKnownApplications(),G)),await l(!0,et,d.allowedMessage(k))}async screenshot(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let s=i?[i]:[];return this.captureScreenshot(t,e,s)}logScreenshotFilter(t,e){let i=r=>{let l=this.accessStore.tryGetKnownApplication(r);return l?`${l.displayName} (${r})`:r},s=r=>{let l=r.includes("|")?r.split("|")[1]:r;return i(l)},o=(r,l)=>l.length===1?`${r}: ${l[0]}`:`${r}:
|
|
35
|
-
${l.map(a=>`- ${a}`).join(`
|
|
36
|
-
|
|
37
|
-
`)
|
|
39
|
+
`),i=[],n=[],s=()=>{n.length>0&&(i.push(`<ul style="margin:0;padding-left:1.2em">${n.map(o=>`<li>${this.escapeHtml(o)}</li>`).join("")}</ul>`),n=[])};for(let o of e)o.startsWith("- ")?n.push(o.slice(2)):(s(),o.length>0&&(i.length>0&&i.push("<br>"),i.push(this.escapeHtml(o))));return s(),i.join("")}log(t,e,i,n){this.insert(this.rowHtml(i,this.renderMarkdown(n),t,e))}logScreenshot(t,e,i,n){if(i){let s=n===void 0?"png":"jpg",o=`${this.timestamp()}.${s}`;I.mkdir(this.logDir,{recursive:!0}).then(()=>I.writeFile(X.join(this.logDir,o),i)),this.insert(this.rowHtml(t,`${this.escapeHtml(e)}<br><img src="${o}">`,"info","Computer"))}else this.insert(this.rowHtml(t,this.escapeHtml(e),"error","Computer"))}};var Z=class{computer;logger;constructor(t,e){this.computer=t,this.logger=e}log(t,e){this.logger.log("info","Computer",t,e)}async checkPermissions(t){return this.computer.checkPermissions(t)}async requestPermissions(t){return this.computer.requestPermissions(t)}async capabilities(){let t=await this.computer.capabilities();return this.log("capabilities",JSON.stringify(t)),t}async click(t,e,i,n,s=""){await this.computer.click(t,e,i,n,s),this.log("click",`(${t}, ${e}) button=${i} count=${n}${s?` display=${s}`:""}`)}async move(t,e,i=""){await this.computer.move(t,e,i),this.log("move",`(${t}, ${e})${i?` display=${i}`:""}`)}async drag(t,e,i,n,s=""){await this.computer.drag(t,e,i,n,s),this.log("drag",`(${t}, ${e}) \u2192 (${i}, ${n})${s?` display=${s}`:""}`)}async mouseDown(t,e,i=""){await this.computer.mouseDown(t,e,i),this.log("mouseDown",`(${t}, ${e})${i?` display=${i}`:""}`)}async mouseUp(t,e,i=""){await this.computer.mouseUp(t,e,i),this.log("mouseUp",`(${t}, ${e})${i?` display=${i}`:""}`)}async type(t){await this.computer.type(t),this.log("type",`"${t}"`)}async key(t){await this.computer.key(t),this.log("key",t)}async scroll(t,e,i,n,s=""){await this.computer.scroll(t,e,i,n,s),this.log("scroll",`(${t}, ${e}) dx=${i} dy=${n}${s?` display=${s}`:""}`)}async cursorPosition(t=""){let e=await this.computer.cursorPosition(t);return this.log("cursorPosition",`(${e.x}, ${e.y})${t?` display=${t}`:""}`),e}async display(t=""){let e=await this.computer.display(t);return this.log("display",`${e.width}x${e.height}${t?` display=${t}`:""}`),e}async listDisplays(){let t=await this.computer.listDisplays(),e=t.map(i=>`${i.isPrimary?"*":""}${i.displayId} "${i.label}" ${i.width}x${i.height}`).join("; ");return this.log("listDisplays",`${t.length} displays${e?`: ${e}`:""}`),t}async listWindows(){let t=await this.computer.listWindows(),e=new Map;for(let n of t){let s=n.displayId||"(unknown)",o=e.get(s)??{visible:0,minimized:0};n.isMinimized?o.minimized+=1:o.visible+=1,e.set(s,o)}let i=[...e.entries()].sort(([n],[s])=>n.localeCompare(s)).map(([n,s])=>`${n}: ${s.visible} visible, ${s.minimized} minimized`).join("; ");return this.log("listWindows",`${t.length} windows${i?`: ${i}`:""}`),t}async windowAtPoint(t,e,i){let n=await this.computer.windowAtPoint(t,e,i);return this.log("windowAtPoint",`display=${t||"(primary)"} (${e}, ${i}) -> ${n?`${n.windowId} title="${n.title}"`:"null"}`),n}async getActiveWindow(){let t=await this.computer.getActiveWindow();return this.log("getActiveWindow",t?`${t.windowId} title="${t.title}"`:"null"),t}async activateApplication(t){let e=await this.computer.activateApplication(t);return this.log("activateApplication",`${t} -> ${e}`),e}async concealApplication(t){let e=await this.computer.concealApplication(t);return this.log("concealApplication",`${t} -> ${e}`),e}async restoreApplication(t){let e=await this.computer.restoreApplication(t);return this.log("restoreApplication",`${t} -> ${e}`),e}async activateWindow(t){let e=await this.computer.activateWindow(t);return this.log("activateWindow",`${t} -> ${e}`),e}async concealWindow(t){let e=await this.computer.concealWindow(t);return this.log("concealWindow",`${t} -> ${e}`),e}async restoreWindow(t){let e=await this.computer.restoreWindow(t);return this.log("restoreWindow",`${t} -> ${e}`),e}async getClipboard(){let t=await this.computer.getClipboard(),e=t.slice(0,200)+(t.length>200?"\u2026":"");return this.log("getClipboard",`"${e}"`),t}async setClipboard(t){await this.computer.setClipboard(t);let e=t.slice(0,200)+(t.length>200?"\u2026":"");this.log("setClipboard",`"${e}" (${t.length} chars)`)}async getWindowState(t,e,i,n,s,o,r){let c=await this.computer.getWindowState(t,e,i,n,s,o,r);return this.log("getWindowState",`app=${t} windowId=${c.windowId??"(default)"} stateToken=${c.stateToken??"(none)"} diffSince=${r??"(none)"}`),c}async windowClick(t,e,i,n,s,o,r,c,a){let u=await this.computer.windowClick(t,e,i,n,s,o,r,c,a);return this.log("windowClick",`app=${t} ok=${u.ok}`),u}async invokeAction(t,e,i,n,s,o){let r=await this.computer.invokeAction(t,e,i,n,s,o);return this.log("invokeAction",`app=${t} element=${e} action=${i} ok=${r.ok}`),r}async windowScroll(t,e,i,n,s,o,r){let c=await this.computer.windowScroll(t,e,i,n,s,o,r);return this.log("windowScroll",`app=${t} element=${e} direction=${i} ok=${c.ok}`),c}async windowDrag(t,e,i,n,s,o,r,c){let a=await this.computer.windowDrag(t,e,i,n,s,o,r,c);return this.log("windowDrag",`app=${t} (${e},${i}) -> (${n},${s}) ok=${a.ok}`),a}async typeText(t,e,i,n,s,o){let r=await this.computer.typeText(t,e,i,n,s,o);return this.log("typeText",`app=${t} chars=${e.length} ok=${r.ok}`),r}async pressKey(t,e,i,n,s){let o=await this.computer.pressKey(t,e,i,n,s);return this.log("pressKey",`app=${t} key=${e} ok=${o.ok}`),o}async setValue(t,e,i,n,s,o){let r=await this.computer.setValue(t,e,i,n,s,o);return this.log("setValue",`app=${t} element=${e} chars=${i.length} ok=${r.ok}`),r}lock(t){return this.computer.lock(t)}unlock(){this.computer.unlock()}hideAppCursorOverlay(){this.computer.hideAppCursorOverlay()}async prepareForInput(t,e,i){let n=await this.computer.prepareForInput(t,e,i),s=i?` @ (${i.x},${i.y})`:"",o=e===null?"all":e.length.toString();return this.log("prepareForInput",`${t} -> ${n} (${o} apps)${s}`),n}async screenshot(t,e,i,n,s,o,r){let c=await this.computer.screenshot(t,e,i,n,s,o,r),a=n&&s?` (${n}x${s})`:"",u=o?` crop=[${o.join(",")}]`:"",p=`display=${t||"(primary)"}`,h=r===void 0?"":` quality=${r}`,m=`${p}${a}${u}${h}`;return this.logger.logScreenshot("screenshot",m,c,r),c}};import{createRequire as Ht}from"module";import{dirname as Vt,join as Jt}from"path";import{fileURLToPath as Kt}from"url";function S(l,t){let e=JSON.parse(l);if(!q(e))throw new Error(`Native ${t} returned an invalid JSON payload.`);return e}function Yt(l,t){let e=Xt(t);return{app:l,windowId:Q(t,["window","windowID"]),stateToken:Q(t,["stateToken"]),text:N(t,"text"),json:lt(t),image:e,diagnostics:t.diagnostics}}function R(l){return{ok:Zt(l,"ok")??!1,stateToken:N(l,"preStateToken")??N(l,"stateToken"),postStateToken:N(l,"postStateToken"),classification:N(l,"classification"),summary:N(l,"summary")??N(l,"classification"),warnings:Gt(l,["warnings"]).filter(t=>typeof t=="string"),diagnostics:l}}function Xt(l){let t=Q(l,["screenshot","image","imageBase64"]);if(!t)return;let e=Q(l,["screenshot","image","mimeType"]);return{data:t,mimeType:e==="image/jpeg"?"image/jpeg":"image/png"}}var ct=1e3;function lt(l){if(typeof l=="string"&&l.length>ct)return`${l.slice(0,ct)}...[truncated ${l.length-ct} chars]`;if(Array.isArray(l))return l.map(e=>lt(e));if(!q(l))return l;let t={};for(let[e,i]of Object.entries(l))t[e]=e==="imageBase64"?void 0:lt(i);return t}function Q(l,t){let e=l;for(let i of t){if(!q(e))return;e=e[i]}return typeof e=="string"?e:void 0}function Gt(l,t){let e=l;for(let i of t){if(!q(e))return[];e=e[i]}return Array.isArray(e)?e:[]}function N(l,t){return q(l)&&typeof l[t]=="string"?l[t]:void 0}function Zt(l,t){return q(l)&&typeof l[t]=="boolean"?l[t]:void 0}function q(l){return typeof l=="object"&&l!==null&&!Array.isArray(l)}var tt=class l{constructor(t,e){this.native=t;t.setLogger?.((i,n,s)=>{e?.log(this.nativeLogLevel(i),"Driver",n,s)})}native;static create(t){let e=Jt(Vt(Kt(import.meta.url)),"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),i;try{i=Ht(import.meta.url)(e)}catch(n){let s=n instanceof Error?n.message:String(n);throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}: ${s} (path: ${e})`,{cause:n})}return new l(i,t)}async checkPermissions(t){return this.native.checkPermissions(t)}async requestPermissions(t){this.native.requestPermissions(t)}async click(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e);let[r,c]=this.toNative(o,t,e);this.native.click(r,c,i,n)}nativeLogLevel(t){switch(t){case 0:return"trace";case 1:return"debug";case 2:return"info";case 3:return"warn";case 4:return"error";default:return"info"}}async move(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.move(s,o)}async drag(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e),this.assertPointInBounds(o,i,n);let[r,c]=this.toNative(o,t,e),[a,u]=this.toNative(o,i,n);this.native.drag(r,c,a,u)}async mouseDown(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.mouseDown(s,o)}async mouseUp(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.mouseUp(s,o)}async type(t){this.native.type(t)}async key(t){this.native.key(t)}async scroll(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e);let[r,c]=this.toNative(o,t,e);this.native.scroll(r,c,i,n)}async cursorPosition(t=""){let e=this.native.cursorPosition(),i=this.resolveCoordinateSpace(t),[n,s]=this.fromNative(i,e.x,e.y);return{x:n,y:s}}async display(t=""){let e=this.resolveCoordinateSpace(t);return{width:e.targetWidth,height:e.targetHeight}}async screenshot(t,e,i,n,s,o,r){let c=this.resolveCoordinateSpace(t,n,s),a=r??-1;if(o){this.assertRegionEdgeInBounds(c,o[0],o[1]),this.assertRegionEdgeInBounds(c,o[2],o[3]);let[u,p,h,m]=this.toNativeCrop(c,o[0],o[1],o[2],o[3]);return this.native.screenshot(t,e,i,0,0,u,p,h,m,a)}return this.native.screenshot(t,e,i,c.targetWidth,c.targetHeight,0,0,0,0,a)}async capabilities(){let t=this.native.capabilities();return{...t,discovery:!!t.discovery&&typeof this.native.getActiveWindow=="function"&&typeof this.native.windowAtPoint=="function",state:!!t.state}}async listDisplays(){return this.native.listDisplays().map(e=>{let{targetWidth:i,targetHeight:n}=this.targetSizeForDisplay(e.bounds.width,e.bounds.height);return{displayId:e.displayId,label:e.label,width:i,height:n,isPrimary:e.isPrimary}})}async listWindows(){return this.native.listWindows()}async windowAtPoint(t,e,i){if(!this.native.windowAtPoint)return null;let n=this.resolveCoordinateSpace(t);this.assertPointInBounds(n,e,i);let[s,o]=this.toNative(n,e,i);return this.native.windowAtPoint(t,s,o)}async getActiveWindow(){return this.native.getActiveWindow?this.native.getActiveWindow():null}async activateApplication(t){return this.native.activateApplication(t)}async concealApplication(t){return this.native.concealApplication(t)}async restoreApplication(t){return this.native.restoreApplication(t)}async activateWindow(t){return this.native.activateWindow(t)}async concealWindow(t){return this.native.concealWindow(t)}async restoreWindow(t){return this.native.restoreWindow(t)}async getClipboard(){return this.native.getClipboard()}async setClipboard(t){this.native.setClipboard(t)}lock(t){return this.native.lock?.(t)??!1}unlock(){this.native.unlock?.()}hideAppCursorOverlay(){this.native.hideAppCursorOverlay?.()}async getWindowState(t,e,i,n,s,o,r){return Yt(t,S(this.native.getWindowState(t,e,i,n,s,o,r),"getWindowState"))}async windowClick(t,e,i,n,s,o,r,c,a){return R(S(this.native.windowClick(t,e,i,n,s,o,r,c,a),"windowClick"))}async invokeAction(t,e,i,n,s,o){return R(S(this.native.invokeAction(t,e,i,n,s,o),"invokeAction"))}async windowScroll(t,e,i,n,s,o,r){return R(S(this.native.windowScroll(t,e,i,n,s,o,r),"windowScroll"))}async windowDrag(t,e,i,n,s,o,r,c){return R(S(this.native.windowDrag(t,e,i,n,s,o,r,c),"windowDrag"))}async typeText(t,e,i,n,s,o){return R(S(this.native.typeText(t,e,i,n,s,o),"typeText"))}async pressKey(t,e,i,n,s){return R(S(this.native.pressKey(t,e,i,n,s),"pressKey"))}async setValue(t,e,i,n,s,o){return R(S(this.native.setValue(t,e,i,n,s,o),"setValue"))}async prepareForInput(t,e,i){let n=Number.NaN,s=Number.NaN;if(i){let o=this.resolveCoordinateSpace(t);this.assertPointInBounds(o,i.x,i.y),[n,s]=this.toNative(o,i.x,i.y)}return this.native.prepareForInput(t,e,i?.blockedWindowIds??[],n,s)}resolveDisplay(t){let e=this.native.listDisplays();return t!==""?e.find(i=>i.displayId===t):e.find(i=>i.isPrimary)??e[0]}targetSizeForDisplay(t,e,i=0,n=0){if(i>0&&n>0)return{targetWidth:i,targetHeight:n};let r=Math.min(1,1568/Math.max(t,e),Math.sqrt(115e4/(t*e)));return{targetWidth:Math.floor(t*r),targetHeight:Math.floor(e*r)}}resolveCoordinateSpace(t,e=0,i=0){let n=this.resolveDisplay(t),s=n?.bounds.x??0,o=n?.bounds.y??0,r=n?void 0:this.native.display(),c=n?.bounds.width??r?.width??1,a=n?.bounds.height??r?.height??1,{targetWidth:u,targetHeight:p}=this.targetSizeForDisplay(c,a,e,i);return{originX:s,originY:o,displayWidth:c,displayHeight:a,scaleX:u/c,scaleY:p/a,targetWidth:u,targetHeight:p}}toNative(t,e,i){return[t.originX+e/t.scaleX,t.originY+i/t.scaleY]}toNativeCrop(t,e,i,n,s){return[Math.floor(e/t.scaleX),Math.floor(i/t.scaleY),Math.ceil(n/t.scaleX),Math.ceil(s/t.scaleY)]}assertPointInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>=t.targetWidth||i>=t.targetHeight)throw new RangeError(`Coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}assertRegionEdgeInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>t.targetWidth||i>t.targetHeight)throw new RangeError(`Crop coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}fromNative(t,e,i){return[Math.round((e-t.originX)*t.scaleX),Math.round((i-t.originY)*t.scaleY)]}};var j=.8,ee=250,P={readOnlyHint:!0,destructiveHint:!1,openWorldHint:!1},b={readOnlyHint:!1,destructiveHint:!0,openWorldHint:!0},dt={readOnlyHint:!1,destructiveHint:!1,openWorldHint:!1},_=" Requires `request_access`.",bt=" If the target app is missing, call `list_applications` then `request_access`.",A=d.coerce.number().transform(Math.round),U=d.object({x:A.describe("Horizontal pixel coordinate."),y:A.describe("Vertical pixel coordinate.")}),ie=d.object({x1:A.describe("Left edge of the region in screenshot pixel coordinates."),y1:A.describe("Top edge of the region in screenshot pixel coordinates."),x2:A.describe("Right edge of the region in screenshot pixel coordinates."),y2:A.describe("Bottom edge of the region in screenshot pixel coordinates.")}).refine(({x1:l,x2:t})=>t>l,{message:"x2 must be greater than x1.",path:["x2"]}).refine(({y1:l,y2:t})=>t>l,{message:"y2 must be greater than y1.",path:["y2"]}),ne=d.string().min(1).describe("Application name or stable app id from `list_applications`. Examples: 'Google Chrome', 'Microsoft Outlook', 'app.windows.abc123'."),se=d.object({display_id:d.string().optional().describe("Display id to select. Omit or pass an empty string to use the default display.")}),oe=d.object({display_id:d.string().optional().describe("Optional display id to filter by. When provided, only apps with at least one non-minimized window on that display are returned.")}),vt=U.optional().describe("{x, y} pixel coordinate. Clicks at current cursor position if omitted."),kt=d.enum(["left","right","middle"]).optional().describe("Mouse button to click (default left)."),At=d.coerce.number().int().min(1).max(3).optional().describe("Number of clicks: 1 single, 2 double, 3 triple (default 1)."),It=U.describe("{x, y} pixel coordinate to move the cursor to."),xt=U.describe("{x, y} pixel coordinate to start the drag from."),$t=U.describe("{x, y} pixel coordinate to drag to."),St=d.string().describe("The text to type."),Pt=d.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab')."),Ct=d.enum(["up","down","left","right"]).describe("Direction to scroll."),Wt=U.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),Tt=d.number().int().nonnegative().max(100).optional().describe("Number of scroll clicks (default 3)."),re=d.number().nonnegative().max(100).describe("Seconds to wait (max 100). Use after a screenshot shows the UI is not in the expected state yet, but an operation may still finish."),Rt=d.string().describe("The text to copy to the clipboard."),w={app:d.string().min(1).describe("App name or bundle identifier."),window_title_contains:d.string().min(1).optional().describe("Optional window title substring to choose a specific window."),window_id:d.string().min(1).optional().describe("Optional helper/native window id returned by a prior window-state response."),state_token:d.string().min(1).optional().describe("Optional state token returned by get_window_state. Pass it to detect stale UI state.")},H=d.string().min(1).describe("Element index or stable nodeId from get_window_state. Prefer nodeId when available."),ae=d.enum(["image","path","omit"]).describe("How to return screenshots. `image` returns MCP image content, `path` writes a local file and returns its path, `omit` skips capture. Defaults to omit; pass image or path when a visual snapshot is needed."),ce=d.enum(["text","json","both"]).describe("Response format. `text` returns compact semantic state, `json` returns sanitized debug metadata, and `both` returns compact state plus sanitized JSON. Defaults to text."),le=d.enum(["none","text","image","path"]).describe("Post-action state to return. Defaults to none for low-latency action responses; pass text/image/path when a fresh snapshot is needed."),D={return_state:le.optional(),screenshot_out_file:d.string().min(1).optional().describe("Optional absolute path for return_state=path. Parent directories are created if needed."),max_nodes:d.number().int().positive().optional().describe("Maximum UI tree nodes for returned post-action state.")},de=new Set(["modal_sheet_opened","window_closed","window_state_changed"]);function pe(l){if(l.classification)return l.classification;let t=l.diagnostics;if(typeof t!="object"||t===null||Array.isArray(t))return;let e=t.classification;return typeof e=="string"?e:void 0}function _t(l,t){if(t!=="window_closed")return!1;let e=l instanceof Error?l.message:String(l);return e.includes("No app window found")||e.includes("exposed no accessibility windows")||e.includes("Stale window_id")}var ue=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,image_mode:ae.optional(),format:ce.optional(),screenshot_out_file:d.string().min(1).optional().describe("Optional absolute path for image_mode=path. Parent directories are created if needed."),include_screenshot:d.boolean().optional().describe("Deprecated compatibility flag. Use image_mode instead. false maps to image_mode=omit."),include_tree:d.boolean().optional().describe("Whether to include the accessibility/UI tree. Defaults to true."),max_nodes:d.number().int().positive().optional().describe("Maximum UI tree nodes to return."),diff_since:d.string().min(1).optional().describe("Optional prior state_token. Returns compact semantic diff when native can compare against that snapshot."),diff_from_state_token:d.string().min(1).optional().describe("Alias for diff_since.")}),he=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.optional(),x:A.optional().describe("Window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),y:A.optional().describe("Window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),click_count:d.number().int().positive().max(2).optional().describe("Number of clicks. Defaults to 1."),mouse_button:d.enum(["left","right","middle"]).optional().describe("Mouse button. Defaults to left."),...D}),we=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H,action:d.string().min(1).describe("Accessibility action label from get_window_state."),...D}),me=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.describe("Scrollable element index or stable nodeId from get_window_state."),direction:d.enum(["up","down","left","right"]).describe("Scroll direction."),pages:d.number().int().positive().optional().describe("Number of pages to scroll. Defaults to 1."),...D}),ge=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,from_x:A.describe("Start window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),from_y:A.describe("Start window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),to_x:A.describe("End window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),to_y:A.describe("End window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),...D}),fe=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.optional().describe("Optional text element index or stable nodeId from get_window_state."),text:d.string().describe("Literal text to type."),...D}),ye=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,key:d.string().min(1).optional().describe("Key or key-combination, e.g. command+a, Return, Escape, PageDown."),keys:d.string().min(1).optional().describe("Alias for `key`. Models sometimes emit `keys` instead; either name works."),...D}),be=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.describe("Settable element index or stable nodeId from get_window_state."),value:d.string().describe("Value to set."),...D}),_e=d.object({action:d.literal("click"),coordinate:vt,button:kt,count:At}),ve=d.object({action:d.literal("mouse_move"),coordinate:It}),ke=d.object({action:d.literal("left_click_drag"),start_coordinate:xt,coordinate:$t}),Ae=d.object({action:d.literal("left_mouse_down")}),Ie=d.object({action:d.literal("left_mouse_up")}),xe=d.object({action:d.literal("type"),text:St}),$e=d.object({action:d.literal("key"),text:Pt}),Se=d.object({action:d.literal("scroll"),scroll_direction:Ct,coordinate:Wt,scroll_amount:Tt}),Pe=d.object({action:d.literal("set_clipboard"),text:Rt}),Ce=d.discriminatedUnion("action",[_e,ve,ke,Ae,Ie,xe,$e,Se,Pe]),We=d.object({actions:d.array(Ce).min(1).describe("Ordered actions. Do not include waits; call the separate `wait` tool only after observing a screenshot that is not ready yet.")}),et=class l{_computer;_caps;_locked=null;_safetyTimer;static SAFETY_TIMEOUT_MS=180*1e3;server;logger;accessStore=new K;appNameResolver=new Y;lastUnsharedWindowsNote=null;selectedDisplay="";options;constructor(t={}){this.options={...t,window_state:t.window_state},process.env.DEBUG&&(this.logger=new G)}toText(t,e){let i=[{type:"text",text:t}];return e?{content:i,isError:!0}:{content:i}}toJson(t){return this.toText(JSON.stringify(t,null,2))}toImage(t,e,i){return t?{content:[{type:"image",data:t.toString("base64"),mimeType:this.imageMimeType(i),_meta:{screenshot:!0}}]}:this.toText(e,!0)}buildAccessMessage(t,e){let i=t.length===0?["","Computer Use runs on your actual desktop and can send mouse and keyboard input.","","Computer Use wants to control your desktop for this session.","","Apps that are not allowed may be hidden."]:["","Computer Use runs on your actual desktop and can send mouse and keyboard input to the apps you share.","","Computer Use wants to control these apps:","",...t.map(n=>`- ${n}`),"","Apps that are not allowed may be hidden."];return e&&i.push("","Reason:",e),i.join(`
|
|
40
|
+
`)}imageMimeType(t){return t!==void 0?"image/jpeg":"image/png"}collectDisplayIds(t,e){let i=new Set;for(let n of t)!n.isMinimized&&n.displayId&&(!e||n.applicationId===e)&&i.add(n.displayId);return[...i]}buildAllowedAppsInfo(t,e){return t.map(i=>({appId:i.id,name:i.displayName,displayIds:this.collectDisplayIds(e,i.id)}))}create(){let t=["This MCP server provides desktop automation tools (mouse, keyboard, screenshots, clipboard).",this.options.yolo?"YOLO mode is enabled. Call `request_access` once to auto-allow all current and future apps for this session.":"Before using access-gated tools (screenshot, click, type, clipboard, etc.), you MUST call `request_access` to start an access session.",this.options.window_state?"For a named target app, call `request_access` directly; it can find the app, select its display, and start the access session without capturing a screenshot. Use `list_applications` only for discovery or ambiguity.":"For a named target app, call `request_access` directly; it can find the app, select its display, and return the first screenshot. Use `list_applications` only for discovery or ambiguity.","Calling `request_access` with an empty apps array allows all visible apps on the selected display. If none are visible, it falls back to desktop access and empty desktop screenshots until apps are allowed.","Screenshots and zoom captures are composited to show ONLY the windows of allowed applications (plus system UI like the Dock). Disallowed app windows are not visible.",this.options.window_state?"For speed, use `get_window_state` after `request_access`; do not call `screenshot` unless visual pixels are needed.":"For speed, use returned screenshots instead of post-action `screenshot`, and batch predictable actions. Returned screenshots already include a small settle delay; do not add waits just for screenshot timing.","Prefer keyboard shortcuts, `type`, and `key` over visual menu navigation when reliable.","If `request_access` returns `allowAll=true`, all current and future apps are allowed for the rest of this session and you do not need to call `request_access` again.","Otherwise, if you launch a new app during the session, call `request_access` again to add it. Use `list_applications` first if you need to discover the app or its display."];this.options.window_state&&t.push("Call `get_window_state` once per turn before `click`, `scroll`, `drag`, `type_text`, `press_key`, `set_value`, or `invoke_action`. Pass the returned `state_token`, and prefer nodeId/element_index targets from `get_window_state` over screenshot coordinates.","`get_window_state` returns compact semantic text and is much faster than a desktop screenshot. Default `image_mode=omit` skips screenshot capture; pass `image` only when visual coordinates are needed. Its output labels action coordinates as window-local screenshot x/y with origin at the target window top-left; AX frames are screen/global and should not be passed directly to click/drag. Pass `diff_since` with a prior state_token for compact changes when useful, and use `format=json` or `format=both` only for sanitized debug metadata.","For background windows, close documents by pressing the close button element from `get_window_state`, then handle any sheet/dialog buttons from a refreshed app-level `get_window_state`. Do not use `press_key` for shortcuts like Cmd+W unless the app is foreground, and do not use `invoke_action` with an action that is not listed on that exact element.","Window-state actions show an animated non-interactive overlay cursor on macOS so the user can see what the window-scoped action is doing, even when the action uses Accessibility instead of a physical mouse click.","Raw coordinate/HID input tools (`left_click_drag`, `mouse_move`, `left_mouse_down`, `left_mouse_up`, `batch`) and the cross-platform `key`/`type` tools are intentionally NOT available on this platform. The window-scoped tools (`click`, `drag`, `scroll`, `type_text`, `press_key`, `set_value`, `invoke_action`) target windows directly, support stable element ids, and don't move the user's real cursor. For perception use `get_window_state`, `screenshot`, or `zoom`.");let e={name:"computer-use",version:"1.0.0"};this.server=new Qt(e,{instructions:t.join(`
|
|
41
|
+
`)});let i=d.object({method:d.literal("notifications/copilot"),params:d.object({type:d.string()}).passthrough()});return this.server.server.setNotificationHandler(i,n=>{switch(n.params.type){case"assistant.turn_start":this._locked===null&&(this._locked=!1),this.unlock();break;case"assistant.turn_end":this.unlock();break;case"user.abort":case"assistant.abort":this.unlock();break}}),this.server.registerTool("list_applications",{description:"List running apps and `selectedDisplay`. Use for discovery only; `request_access` can resolve named apps directly.",inputSchema:oe.shape,annotations:P},n=>this.list_applications(n)),this.server.registerTool("request_access",{description:"Request app access by friendly name or appId; pass [] for all visible apps. For a named app, call this directly; it finds the app and selects its display. "+(this.options.window_state?"Returns JSON (`allowed`, `allowAll`, `message`, `allowedApps?`, `selectedDisplay?`); call `get_window_state` next for perception. ":"Returns JSON (`allowed`, `allowAll`, `message`, `allowedApps?`, `selectedDisplay?`) and, when allowed, a screenshot. ")+"`allowAll=true` covers future apps for this session.",inputSchema:{apps:d.array(ne).describe('Applications to allow. Each entry can be a friendly name (e.g. "Outlook") or a stable appId, if already known. Pass an empty array to allow all apps on the selected display.'),reason:d.string().min(1).optional().describe("Optional reason text shown in the access dialog.")},annotations:dt},(n,s)=>this.request_access(n,s.signal)),this.server.registerTool("get_clipboard",{description:"Get the current text contents of the system clipboard."+_,annotations:P},()=>this.get_clipboard()),this.server.registerTool("set_clipboard",{description:"Set the system clipboard to the specified text."+_,inputSchema:{text:Rt},annotations:dt},n=>this.set_clipboard(n)),this.server.registerTool("wait",{description:"Pause, then return an updated screenshot if access is active. Use after a screenshot shows the UI is not ready yet.",inputSchema:{duration:re.optional(),seconds:d.number().nonnegative().max(100).optional().describe("Alias for `duration`. Either name works.")},annotations:{...P,idempotentHint:!0}},n=>this.wait({duration:n.duration??n.seconds??0})),this.options.window_state?this.registerWindowStateTools():this.registerCoordinateTools(),this.server}registerCoordinateTools(){this.server.registerTool("list_displays",{description:"List available displays and the currently selected display used for screenshots, zoom, cursor position, and coordinate-based actions.",annotations:P},()=>this.list_displays()),this.server.registerTool("select_display",{description:"Select the active display used for screenshots, zoom, cursor position, and coordinate-based actions. Omit display_id or pass an empty string to use the default display.",inputSchema:se.shape,annotations:dt},e=>this.select_display(e)),this.server.registerTool("screenshot",{description:"Capture the current filtered screen. "+bt+_,annotations:P},()=>this.screenshot()),this.server.registerTool("zoom",{description:"Capture a filtered screen region at full resolution."+bt+_,inputSchema:{region:ie.describe("{x1, y1, x2, y2} coordinates defining top-left and bottom-right corners of the region to capture.")},annotations:P},e=>this.zoom(e)),this.server.registerTool("cursor_position",{description:"Get the current cursor position in pixel coordinates. Returns {x, y}."+_,annotations:P},()=>this.cursor_position());let t={coordinate:vt,button:kt,count:At};this.server.registerTool("click",{description:"Click a mouse button. Defaults to a single left click; use `button` for right/middle click and `count` 2 or 3 for double/triple-click. Optionally move to a coordinate first."+_,inputSchema:t,annotations:b},e=>this.click(e)),this.server.registerTool("mouse_move",{description:"Move the mouse cursor to the specified pixel coordinate."+_,inputSchema:{coordinate:It},annotations:b},e=>this.mouse_move(e)),this.server.registerTool("left_click_drag",{description:"Click and drag from a start coordinate to an end coordinate."+_,inputSchema:{start_coordinate:xt,coordinate:$t},annotations:b},e=>this.left_click_drag(e)),this.server.registerTool("left_mouse_down",{description:"Press and hold the left mouse button at the current cursor position."+_,annotations:b},()=>this.left_mouse_down()),this.server.registerTool("left_mouse_up",{description:"Release the left mouse button at the current cursor position."+_,annotations:b},()=>this.left_mouse_up()),this.server.registerTool("type",{description:"Type a string for text input fields. Use `key` for physical-key controls."+_,inputSchema:{text:St},annotations:b},e=>this.type(e)),this.server.registerTool("key",{description:"Press a key or combo, e.g. 'Return', 'ctrl+s', 'alt+Tab', '1'."+_,inputSchema:{text:Pt},annotations:b},e=>this.key(e)),this.server.registerTool("scroll",{description:"Scroll the screen in a given direction at an optional coordinate."+_,inputSchema:{scroll_direction:Ct,coordinate:Wt,scroll_amount:Tt},annotations:b},e=>this.scroll(e)),this.server.registerTool("batch",{description:"Run ordered actions and return one final screenshot after a built-in settle delay. Use for predictable sequences; wait is intentionally not supported. Supported actions: click, mouse_move, left_click_drag, left_mouse_down, left_mouse_up, type, key, scroll, set_clipboard.",inputSchema:We.shape,annotations:b},e=>this.batch(e))}registerWindowStateTools(){let t=" Call `get_window_state` once per turn before using window-state actions. Pass `state_token`, and prefer a nodeId/element_index target over screenshot coordinates; if coordinates are needed, use local_center/window-local hints, not screen@ AX frames. Actions show an animated overlay cursor on macOS.";this.server.registerTool("get_window_state",{description:"Get compact semantic state for a target app window. Defaults to text; use format=json/both only for sanitized debug metadata. Screenshots are returned according to `image_mode`: MCP image content, local file path, or omitted; base64 is never embedded in text. Output coordinate contract: click/drag x,y are window-local screenshot coordinates with origin at the target window top-left; screen@/AX frames are global screen coordinates and should not be used directly as action coordinates. Pass `diff_since` with a prior state_token for compact semantic changes when useful.",inputSchema:ue.shape,annotations:P},e=>this.get_window_state(e)),this.server.registerTool("click",{description:"Click an element by numeric index or stable nodeId in `element_index`, or a window-local screenshot coordinate/local_center from `get_window_state` as fallback."+t,inputSchema:he.shape,annotations:b},e=>this.window_click(e)),this.server.registerTool("invoke_action",{description:"Invoke an accessibility action exposed by an element in `get_window_state`; the `action` string must come from that element's listed actions such as {press} or {confirm}, not an inferred command like `close` unless it is explicitly listed."+t,inputSchema:we.shape,annotations:b},e=>this.invoke_action(e)),this.server.registerTool("scroll",{description:"Scroll an element from `get_window_state` in an app window."+t,inputSchema:me.shape,annotations:b},e=>this.scroll_window(e)),this.server.registerTool("drag",{description:"Drag between window-local screenshot coordinates from `get_window_state` (origin is the target window's top-left; do not use screen@ AX frame coordinates directly).",inputSchema:ge.shape,annotations:b},e=>this.drag_window(e)),this.server.registerTool("type_text",{description:"Type literal text into an app window, optionally targeting an element from `get_window_state`."+t,inputSchema:fe.shape,annotations:b},e=>this.type_text(e)),this.server.registerTool("press_key",{description:"Press a key or key-combination against an app window. Background apps cannot receive global shortcuts without activation; for background document close use the close button element from `get_window_state` instead of Cmd+W."+t,inputSchema:ye.shape,annotations:b},e=>this.press_key(e)),this.server.registerTool("set_value",{description:"Set the value of a settable accessibility element from `get_window_state`."+t,inputSchema:be.shape,annotations:b},e=>this.set_value(e))}lock(){this._locked===null||!this._computer||(this._locked||(this._locked=this._computer.lock(()=>{this.unlock(),this.server.server.notification({method:"notifications/copilot",params:{type:"user.abort"}}).catch(()=>{});let t=this.accessStore.getState().hostWindowId;t&&this._computer&&this._computer.activateWindow(t).catch(()=>{})})),this._safetyTimer&&clearTimeout(this._safetyTimer),this._safetyTimer=setTimeout(()=>this.unlock(),l.SAFETY_TIMEOUT_MS))}unlock(){this._computer&&this._computer.hideAppCursorOverlay(),!(this._locked===null||!this._computer)&&(this._safetyTimer&&(clearTimeout(this._safetyTimer),this._safetyTimer=void 0),this._locked&&this._computer.unlock(),this._locked=!1)}async computer(){return this._computer||(this._computer=tt.create(this.logger),this.logger&&(this._computer=new Z(this._computer,this.logger))),this._computer}logEvent(t,e){this.logger?.log("info","Server",t,e)}async capabilities(){if(!this._caps){let t=await this.computer();this._caps=await t.capabilities()}return this._caps}async requestPermission(){switch(process.platform){case"linux":{await(await this.computer()).display();break}case"darwin":{let t=await this.computer(),e=[],i=await t.checkPermissions("accessibility");i||e.push("Accessibility");let n=await t.checkPermissions("screen");if(n||e.push("Screen Recording"),e.length>0){if(!this.server.server.getClientCapabilities()?.elicitation?.form)break;let o=["",`Computer Use needs the following macOS permission${e.length>1?"s":""} to control your desktop:`,"",...e.map(a=>`\u2022 ${a}`),"",'Clicking "Open System Settings" will open the relevant settings page(s).',`Enable the permission${e.length>1?"s":""} for your terminal app, then click "I've granted the permissions".`,"","Note: You may need to restart your terminal after granting permissions for them to take effect."];await this.showHostWindow(t);let r=await this.server.server.elicitInput({mode:"form",message:o.join(`
|
|
42
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${e.join(" and ")} permission${e.length>1?"s":""}`,oneOf:[{const:"open",title:"1. Open System Settings"},{const:"done",title:"2. I've granted the permissions"},{const:"skip",title:"3. Skip (things may not work)"}]}},required:["action"]}}),c=r.action==="accept"?r.content?.action:void 0;if(c==="open"){i||await t.requestPermissions("accessibility"),n||await t.requestPermissions("screen"),await this.showHostWindow(t);let a=await this.server.server.elicitInput({mode:"form",message:["","System Settings has been opened.","",`Enable ${e.join(" and ")} for your terminal app.`,e.length>1?"Both settings pages have been opened \u2014 check each one.":"","",`After granting permissions, click "I've granted the permissions" below.`,"If the toggle was already on, try removing and re-adding your terminal app."].filter(Boolean).join(`
|
|
43
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((a.action==="accept"?a.content?.action:void 0)!=="done")break}else if(c!=="done")break;if(i=await t.checkPermissions("accessibility"),n=await t.checkPermissions("screen"),!i||!n){let a=[];throw i||a.push("Accessibility"),n||a.push("Screen Recording"),new Error(`${a.join(" and ")} permission${a.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(t){return this.selectedDisplay!==""?this.selectedDisplay:(await t.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let t=this.accessStore.getState();if(!t.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let e=await this.computer();if(t.allowAll&&(await this.capabilities()).discovery){let{windows:n}=await this.filterHostWindow(e),s=T(n);this.accessStore.rememberApplications(s),this.appNameResolver.updateApplications(s),t=this.accessStore.getState()}return{computer:e,allowedAppIds:t.allowAll?null:t.allowedAppIds,hostWindowId:t.hostWindowId}}async hostWindowId(t){let e=this.accessStore.getState().hostWindowId;if(e)return e;if(!(await this.capabilities()).discovery)return null;let n=await t.getActiveWindow();return n&&(await t.listWindows()).some(o=>o.windowId===n.windowId)?n.windowId:null}async showHostWindow(t){let e=await this.hostWindowId(t);if(e)try{await t.activateWindow(e)}catch{}}async filterHostWindow(t,e){let i=e??await t.listWindows(),n=await this.hostWindowId(t);return{windows:n?i.filter(s=>s.windowId!==n):i,hostWindowId:n}}async prepareForInput(t,e,i){if(!(await this.capabilities()).discovery)return;let s=await this.activeDisplayId(t),o=i!==void 0;if((e===null||e.length===0)&&!o)return;if(!await t.prepareForInput(s,e,o?i:void 0)){if(o)throw new Error("Input blocked: a disallowed app at the target could not be hidden. Call `list_applications` to see what's there, then `request_access` to allow it.");let c=await t.getActiveWindow();throw c&&e!==null&&!e.includes(c.applicationId)?new Error(this.keyboardInputBlockedMessage(c)):new Error("Keyboard input blocked: an allowed application could not be focused. Click an allowed application first, or call `request_access` to allow it.")}}keyboardInputBlockedMessage(t){return`Keyboard input blocked: the focused application ("${t.applicationName}") is not allowed. Click an allowed application first, or call \`request_access\` to allow it.`}async validateActiveWindow(t,e){if(e===null||e.length===0||!(await this.capabilities()).discovery)return;let n=await t.getActiveWindow();if(n&&!e.includes(n.applicationId))throw new Error(this.keyboardInputBlockedMessage(n))}pointerInputBlockedMessage(t){return`Pointer input blocked: the focused application ("${t.applicationName}") is not allowed. Focus an allowed application first, or call \`request_access\` to allow it.`}async validatePointerInput(t,e,i){if(e===null||e.length===0||!(await this.capabilities()).discovery)return;let s=await t.getActiveWindow();if(s&&!e.includes(s.applicationId))throw new Error(this.pointerInputBlockedMessage(s));let o=await this.activeDisplayId(t),r=await t.windowAtPoint(o,i.x,i.y);if(!r||e.includes(r.applicationId))return;let c=r.applicationName||r.title||r.applicationId;throw new Error(`Pointer input blocked: the target location is covered by disallowed application ("${c}"). Call \`list_applications\` to see what's there, then \`request_access\` to allow it.`)}async list_displays(){let t=await this.computer(),e=await t.listDisplays(),i=await this.activeDisplayId(t);return this.toJson({displays:e,selectedDisplay:i})}async select_display({display_id:t}){let e=t??"",i=await this.computer(),n=await i.listDisplays();if(e!==""&&!n.some(r=>r.displayId===e))throw new Error(`Unknown display id '${e}'. Call list_displays first.`);this.selectedDisplay=e;let s=await this.activeDisplayId(i),o=n.find(r=>r.displayId===s)??null;return this.toJson({selectedDisplay:s,status:"selected",message:e===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${o?.label??e}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications({display_id:t}={}){let e=await this.computer(),{windows:i}=await this.filterHostWindow(e),n=T(i);this.accessStore.rememberApplications(n),this.appNameResolver.updateApplications(n);let s=t??"";if(s!==""&&!(await e.listDisplays()).some(u=>u.displayId===s))throw new Error(`Unknown display id '${s}'. Call list_displays first.`);let o=s===""?i:i.filter(a=>!a.isMinimized&&a.displayId===s),r=T(o),c=await this.activeDisplayId(e);return this.toJson({selectedDisplay:c,applications:r})}async request_access({apps:t,reason:e},i){this.logEvent("request_access",`Start: apps=[${t.join(", ")}]${e?` reason="${e}"`:""}`),await this.requestPermission();let n={const:"allow",title:"Allow"},s={const:"allow_all",title:"Allow all apps (don't ask again)"},o={const:"deny",title:"Deny (Esc)"},r,c=async(g,y,L)=>{this.logEvent("request_access",`End: allowed=${g} allowAll=${y.allowAll} selectedDisplay=${this.selectedDisplay||"(default)"} hostWindowId=${y.hostWindowId??"null"} message="${L}"`);let $=r??(y.allowAll?void 0:y.allowedAppIds.map(v=>{let C=this.accessStore.tryGetKnownApplication(v);return{appId:v,name:C?.displayName,displayIds:this.collectDisplayIds(C?.windows??[])}})),M=this.toJson({allowed:g,allowAll:y.allowAll,...$?{allowedApps:$}:{},selectedDisplay:this.selectedDisplay||void 0,message:L});if(!g||this.options.window_state)return M;let O=await this.computer();this.lock();let st=y.hostWindowId?[y.hostWindowId]:[];return this.addScreenshot(M,O,y.allowAll?null:y.allowedAppIds,st)};if(this.options.yolo){let g=this.accessStore.allowApplications([],!0);return await c(!0,g,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let a=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let g=this.accessStore.allowApplications([],!0);return await c(!0,g,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let p,h,m,E,it;if(a.discovery){let g=await this.computer(),y=this.accessStore.getState().hostWindowId,L=await g.listWindows(),$=y?L.filter(f=>f.windowId!==y):L;it=$;let M=T($);this.accessStore.rememberApplications(M),this.appNameResolver.updateApplications(M);let O=t&&t.length>0?"":await this.activeDisplayId(g);O&&(this.selectedDisplay=O);let st=new Set(y?[y]:[]),v;if(t&&t.length>0){let f=new Map;for(let k of t){let z=this.appNameResolver.resolve(k);if(z.length===0)throw new Error(`No matching application found for '${k}'. The app may not be running. On macOS, launch it first (e.g. via 'open -a "${k}"' through a shell tool), then call request_access again. Or call list_applications to see what is currently running.`);for(let W of z)if(!f.has(W.appId)){let J=this.accessStore.tryGetKnownApplication(W.appId);f.set(W.appId,J??{id:W.appId,displayName:W.displayName,windows:[]})}}v=[...f.values()];let Dt=new Set(f.keys()),V=new Map;for(let k of $)Dt.has(k.applicationId)&&!k.isMinimized&&k.displayId&&V.set(k.displayId,(V.get(k.displayId)??0)+1);if(V.size>0){let k="",z=0;for(let[W,J]of V)J>z&&(k=W,z=J);this.selectedDisplay=k}r=this.buildAllowedAppsInfo(v,$)}else v=T($.filter(f=>!f.isMinimized&&(O===""||f.displayId===O)&&!st.has(f.windowId)));let C=v.map(f=>f.id),ot=this.accessStore.getState(),F=v.length;h=["allow","allow_all"],m=C.length===0?ot.allowAll||ot.accessActive:this.accessStore.areAllowedForAccess(C),E=C.length===0?ot.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":F===1?`'${v[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",p={choices:[n,s,o],message:this.buildAccessMessage(v.map(f=>f.displayName),e),denyMessage:F===0?"The user declined desktop access.":F===1?`The user declined to share '${v[0].displayName}'.`:"The user declined to share the requested apps.",allow:f=>this.accessStore.allowApplications(C,f==="allow_all"),allowedMessage:f=>f==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":F===0?"Desktop access is active for this session.":F===1?`Access session started for '${v[0].displayName}'.`:"Access session started for the requested apps."}}else{let g=this.accessStore.getState();h=["allow_all"],m=g.allowAll||g.accessActive,E=g.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",p={choices:[s,o],message:this.buildAccessMessage([],e),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(m){let g=this.accessStore.getState();return await c(!0,g,E)}this.accessStore.getState().hostWindowId&&await this.showHostWindow(await this.computer());let nt=await this.server.server.request({method:"elicitation/create",params:{mode:"form",message:p.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:p.choices}},required:["decision"]}}},te,{signal:i,timeout:300*1e3}),Nt=await this.computer(),pt=await this.hostWindowId(Nt);pt&&this.accessStore.setHostWindowId(pt);let x=nt.action==="accept"?nt.content?.decision:void 0;if(nt.action!=="accept"||!x||typeof x!="string"||!h.includes(x)){let g=this.accessStore.getState(),y=typeof x=="string"&&x!==o.const?`The user did not approve this access request. User response: ${x}`:p.denyMessage;return await c(!1,g,y)}let ut=p.allow(x);return ut.allowAll&&it&&(r=this.buildAllowedAppsInfo(this.accessStore.allKnownApplications(),it)),await c(!0,ut,p.allowedMessage(x))}windowStateAuthCache=new Map;windowStateAuthCacheKey(t,e,i){return`${e}\0${t.toLowerCase()}\0${i?.toLowerCase()??""}`}async requireWindowStateAccess(t,e,i){let{computer:n,allowedAppIds:s}=await this.requireAccess();if(e){let h=this.windowStateAuthCacheKey(t,e,i),m=this.windowStateAuthCache.get(h);if(m&&(s===null||s.includes(m.appId)))return n}let o=await n.listWindows();this.appNameResolver.updateApplications(T(o));let r=new Set(this.appNameResolver.resolve(t).map(h=>h.appId)),c=h=>r.has(h.applicationId),a=(e?o.filter(h=>h.windowId===e):o.filter(c)).filter(h=>!i||h.title.toLowerCase().includes(i.toLowerCase()));if(a.length===0)throw new Error(`Window-state target not found for "${t}". Call list_applications, then pass the intended app/window_id.`);if(e&&!a.some(c)){let h=a[0];throw new Error(`Window-state target blocked: window_id "${e}" belongs to "${h.applicationName}", not "${t}".`)}let u=[...new Set(a.map(h=>h.applicationId))];if(u.length!==1)throw new Error(`Window-state target is ambiguous for "${t}". Candidate apps: ${a.map(h=>`${h.applicationName} (${h.applicationId})`).join(", ")}.`);let p=u[0];if(s!==null&&!s.includes(p)){let h=a[0]?.applicationName??t;throw new Error(`Window-state input blocked: "${h}" is not shared. Call request_access to allow it.`)}return e&&this.windowStateAuthCache.set(this.windowStateAuthCacheKey(t,e,i),{appId:p}),n}async getWindowStateAction(t,e,i){let n=await e,s=n.ok?this.toJson(n):{...this.toJson(n),isError:!0},o=i.return_state??"none";if(!n.ok||o==="none")return s;await new Promise(p=>setTimeout(p,150));let r=o==="image"||o==="path"?o:"omit",c=pe(n),a=c!==void 0&&de.has(c),u=async(p,h)=>{let m=await t.getWindowState(i.app,p,h,r!=="omit",!0,i.max_nodes),E=new B(m).toToolResult({imageMode:r,format:"text",screenshotOutFile:i.screenshot_out_file});return{content:[...s.content,...E.content],isError:s.isError}};try{return await u(a?void 0:i.window_title_contains,a?void 0:i.window_id)}catch(p){if(_t(p,c))return s;if(!a)try{return await u()}catch(m){if(_t(m,c))return s}let h=`post_state_error: ${p instanceof Error?p.message:String(p)}`;return this.toJson({...n,warnings:[...n.warnings??[],h]})}}async get_window_state(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains),i=t.image_mode??(t.include_screenshot===!0?"image":"omit");if(t.include_screenshot===!1&&i!=="omit")throw new Error("get_window_state include_screenshot=false conflicts with image_mode. Use image_mode=omit.");if(t.include_screenshot===!0&&t.image_mode==="omit")throw new Error("get_window_state include_screenshot=true conflicts with image_mode=omit.");let n=i!=="omit";return new B(await e.getWindowState(t.app,t.window_title_contains,t.window_id,n,t.include_tree,t.max_nodes,t.diff_since??t.diff_from_state_token)).toToolResult({imageMode:i,format:t.format??"text",screenshotOutFile:t.screenshot_out_file})}async window_click(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);if(!t.element_index&&(t.x===void 0||t.y===void 0))throw new Error("click requires either element_index or both x and y from get_window_state.");if(t.element_index&&(t.x!==void 0||t.y!==void 0))throw new Error("click accepts either element_index or coordinates, not both.");return this.getWindowStateAction(e,e.windowClick(t.app,t.element_index,t.x,t.y,t.click_count,t.mouse_button,t.window_title_contains,t.window_id,t.state_token),t)}async invoke_action(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.invokeAction(t.app,t.element_index,t.action,t.window_title_contains,t.window_id,t.state_token),t)}async scroll_window(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.windowScroll(t.app,t.element_index,t.direction,t.pages,t.window_title_contains,t.window_id,t.state_token),t)}async drag_window(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.windowDrag(t.app,t.from_x,t.from_y,t.to_x,t.to_y,t.window_title_contains,t.window_id,t.state_token),t)}async type_text(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.typeText(t.app,t.text,t.element_index,t.window_title_contains,t.window_id,t.state_token),t)}async press_key(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains),i=t.key??t.keys;if(!i)throw new Error("press_key requires `key` (or its alias `keys`).");return this.getWindowStateAction(e,e.pressKey(t.app,i,t.window_title_contains,t.window_id,t.state_token),t)}async set_value(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.setValue(t.app,t.element_index,t.value,t.window_title_contains,t.window_id,t.state_token),t)}async screenshot(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[];return this.captureScreenshot(t,e,n)}logScreenshotFilter(t,e){let i=r=>{let c=this.accessStore.tryGetKnownApplication(r);return c?`${c.displayName} (${r})`:r},n=r=>{let c=r.includes("|")?r.split("|")[1]:r;return i(c)},s=(r,c)=>c.length===1?`${r}: ${c[0]}`:`${r}:
|
|
44
|
+
${c.map(a=>`- ${a}`).join(`
|
|
45
|
+
`)}`,o=[t===null?"allowed: (all apps)":t.length?s("allowed",t.map(i)):"allowed: (no app windows)"];e.length&&o.push(s("blocked",e.map(n))),this.logEvent("screenshot filter",o.join(`
|
|
46
|
+
`))}async captureScreenshot(t,e,i){this.logScreenshotFilter(e,i);let n=await t.screenshot(this.selectedDisplay,e,i,0,0,void 0,j),s=this.toImage(n,"Screenshot failed",j);if(n){let o=await this.unsharedWindowsNote(t,e,i);this.appendUnsharedWindowsNote(s,o,!0)}return s}async addScreenshot(t,e,i,n){await new Promise(r=>setTimeout(r,ee)),this.logScreenshotFilter(i,n);let s=null;try{s=await e.screenshot(this.selectedDisplay,i,n,0,0,void 0,j)}catch{}if(!s)return t.content.push({type:"text",text:"Screenshot capture failed"}),t;t.content.push({type:"image",data:s.toString("base64"),mimeType:this.imageMimeType(j),_meta:{screenshot:!0}});let o=await this.unsharedWindowsNote(e,i,n);return this.appendUnsharedWindowsNote(t,o,!1),t}appendUnsharedWindowsNote(t,e,i){if(!e){this.lastUnsharedWindowsNote=null;return}(i||e!==this.lastUnsharedWindowsNote)&&t.content.push({type:"text",text:e}),this.lastUnsharedWindowsNote=e}async unsharedWindowsNote(t,e,i){if(e===null||!(await this.capabilities()).discovery)return null;let s=await this.activeDisplayId(t),o=new Set(e),r=new Set(i),{windows:c}=await this.filterHostWindow(t),a=c.filter(p=>!p.isMinimized&&(s===""||p.displayId===s)&&!o.has(p.applicationId)&&!r.has(p.windowId));if(a.length===0)return null;let u=a.length===1?"window":"windows";return`${a.length} other ${u} on this display ${a.length===1?"is":"are"} from an unshared app. The screenshot may not show everything that's running. Use \`list_applications\` to see what else is there if you need to.`}async cursor_position(){let e=await(await this.computer()).cursorPosition(this.selectedDisplay);return this.toText(`${e.x},${e.y}`)}async _click(t,e,i,n,s,o,r){let c;switch(o){case 1:c="";break;case 2:c="double ";break;case 3:c="triple ";break;default:throw new Error(`Invalid click count '${o}'. Expected 1, 2, or 3.`)}let a=r??await t.cursorPosition(n);await this.prepareForInput(t,e,{x:a.x,y:a.y,blockedWindowIds:i}),await this.validatePointerInput(t,e,a),r&&await t.move(a.x,a.y,n),await t.click(a.x,a.y,s,o,n);let u=r?` at (${r.x},${r.y})`:"",p=s==="left"?"":`${s} `;return`${c}${p}click${u}`}async click({coordinate:t,button:e,count:i}){let{computer:n,allowedAppIds:s,hostWindowId:o}=await this.requireAccess();this.lock();let r=o?[o]:[],c=await this._click(n,s,r,this.selectedDisplay,e??"left",i??1,t);return this.addScreenshot(this.toText(c),n,s,r)}async mouse_move({coordinate:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await this.prepareForInput(e,i,{x:t.x,y:t.y,blockedWindowIds:s}),await this.validatePointerInput(e,i,t),await e.move(t.x,t.y,this.selectedDisplay),this.addScreenshot(this.toText(`Moved to (${t.x},${t.y})`),e,i,s)}async left_click_drag({start_coordinate:t,coordinate:e}){let{computer:i,allowedAppIds:n,hostWindowId:s}=await this.requireAccess();this.lock();let o=s?[s]:[];return await this.prepareForInput(i,n,{x:t.x,y:t.y,blockedWindowIds:o}),await this.prepareForInput(i,n,{x:e.x,y:e.y,blockedWindowIds:o}),await this.validatePointerInput(i,n,t),await this.validatePointerInput(i,n,e),await i.drag(t.x,t.y,e.x,e.y,this.selectedDisplay),this.addScreenshot(this.toText(`Dragged (${t.x},${t.y}) -> (${e.x},${e.y})`),i,n,o)}async left_mouse_down(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[],s=await t.cursorPosition(this.selectedDisplay);return await this.prepareForInput(t,e,{x:s.x,y:s.y,blockedWindowIds:n}),await this.validatePointerInput(t,e,s),await t.mouseDown(s.x,s.y,this.selectedDisplay),this.addScreenshot(this.toText(`Mouse down at (${s.x},${s.y})`),t,e,n)}async left_mouse_up(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[],s=await t.cursorPosition(this.selectedDisplay);return await this.prepareForInput(t,e,{x:s.x,y:s.y,blockedWindowIds:n}),await this.validatePointerInput(t,e,s),await t.mouseUp(s.x,s.y,this.selectedDisplay),this.addScreenshot(this.toText(`Mouse up at (${s.x},${s.y})`),t,e,n)}async type({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await this.prepareForInput(e,i),await this.validateActiveWindow(e,i),await e.type(t),this.addScreenshot(this.toText(`Typed ${t.length} chars`),e,i,s)}async key({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];await this.prepareForInput(e,i),await this.validateActiveWindow(e,i);let o=rt(t);return await e.key(o),this.addScreenshot(this.toText(`Pressed ${o}`),e,i,s)}async scroll({scroll_direction:t,coordinate:e,scroll_amount:i}){let{computer:n,allowedAppIds:s,hostWindowId:o}=await this.requireAccess();this.lock();let r=o?[o]:[],c=i??3,a=await n.cursorPosition(this.selectedDisplay),u=e?.x??a.x,p=e?.y??a.y;await this.prepareForInput(n,s,{x:u,y:p,blockedWindowIds:r}),await this.validatePointerInput(n,s,{x:u,y:p});let h=t==="left"?-c:t==="right"?c:0,m=t==="down"?c:t==="up"?-c:0;return await n.scroll(u,p,h,m,this.selectedDisplay),this.addScreenshot(this.toText(`Scrolled ${t} ${c} at (${u},${p})`),n,s,r)}async wait({duration:t}){this.logEvent("wait",`duration=${String(t)}s`),await new Promise(r=>setTimeout(r,t*1e3));let e=this.toText(`Waited ${t}s`);if(!this.accessStore.getState().accessActive)return e;let{computer:i,allowedAppIds:n,hostWindowId:s}=await this.requireAccess(),o=s?[s]:[];return this.addScreenshot(e,i,n,o)}async get_clipboard(){let{computer:t}=await this.requireAccess();this.lock();let e=await t.getClipboard();return this.toText(e)}async set_clipboard({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await e.setClipboard(t),this.addScreenshot(this.toText(`Clipboard set (${t.length} chars)`),e,i,s)}async zoom({region:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[],o=await e.screenshot(this.selectedDisplay,i,s,0,0,[t.x1,t.y1,t.x2,t.y2],j);return this.toImage(o,"Zoom screenshot failed",j)}async batch({actions:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock(),this.logEvent("batch",`${t.length} actions: ${t.map(c=>c.action).join(", ")}`);let s=this.selectedDisplay,o=n?[n]:[],r=[];for(let c=0;c<t.length;c++){let a=t[c];try{switch(a.action){case"click":{let u=await this._click(e,i,o,s,a.button??"left",a.count??1,a.coordinate);r.push(`[${c}] ${u}`);break}case"mouse_move":await this.prepareForInput(e,i,{x:a.coordinate.x,y:a.coordinate.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,a.coordinate),await e.move(a.coordinate.x,a.coordinate.y,s),r.push(`[${c}] moved to (${a.coordinate.x},${a.coordinate.y})`);break;case"left_click_drag":await this.prepareForInput(e,i,{x:a.start_coordinate.x,y:a.start_coordinate.y,blockedWindowIds:o}),await this.prepareForInput(e,i,{x:a.coordinate.x,y:a.coordinate.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,a.start_coordinate),await this.validatePointerInput(e,i,a.coordinate),await e.drag(a.start_coordinate.x,a.start_coordinate.y,a.coordinate.x,a.coordinate.y,s),r.push(`[${c}] dragged (${a.start_coordinate.x},${a.start_coordinate.y}) -> (${a.coordinate.x},${a.coordinate.y})`);break;case"left_mouse_down":{let u=await e.cursorPosition(s);await this.prepareForInput(e,i,{x:u.x,y:u.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,u),await e.mouseDown(u.x,u.y,s),r.push(`[${c}] mouse down at (${u.x},${u.y})`);break}case"left_mouse_up":{let u=await e.cursorPosition(s);await this.prepareForInput(e,i,{x:u.x,y:u.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,u),await e.mouseUp(u.x,u.y,s),r.push(`[${c}] mouse up at (${u.x},${u.y})`);break}case"type":await this.prepareForInput(e,i),await this.validateActiveWindow(e,i),await e.type(a.text),r.push(`[${c}] typed ${a.text.length} chars`);break;case"key":{await this.prepareForInput(e,i),await this.validateActiveWindow(e,i);let u=rt(a.text);await e.key(u),r.push(`[${c}] key ${a.text}`);break}case"scroll":{let u=a.scroll_amount??3,p=a.coordinate??await e.cursorPosition(s);await this.prepareForInput(e,i,{x:p.x,y:p.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,p);let h=a.scroll_direction==="left"?-u:a.scroll_direction==="right"?u:0,m=a.scroll_direction==="down"?u:a.scroll_direction==="up"?-u:0;await e.scroll(p.x,p.y,h,m,s),r.push(`[${c}] scroll ${a.scroll_direction}${a.coordinate?` at (${p.x},${p.y})`:""}`);break}case"set_clipboard":await e.setClipboard(a.text),r.push(`[${c}] clipboard set`);break}}catch(u){return r.push(`[${c}] ${a.action}: FAILED - ${u instanceof Error?u.message:String(u)}`),this.toText(r.join(`
|
|
38
47
|
`),!0)}}return this.addScreenshot(this.toText(r.join(`
|
|
39
|
-
`)),e,i,
|
|
40
|
-
`)});return new O(u).connect(i),i}export{he as createServer};
|
|
48
|
+
`)),e,i,o)}};function ni(l={}){return process.env.COPILOT_COMPUTER_USE_WINDOW_STATE&&(l.window_state=!0),process.env.COPILOT_COMPUTER_USE_YOLO&&(l.yolo=!0),new et(l).create()}export{ni as createServer};
|
package/dist/main.js
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{StdioServerTransport as
|
|
2
|
+
import{StdioServerTransport as Re}from"@modelcontextprotocol/sdk/server/stdio.js";import{McpServer as te}from"@modelcontextprotocol/sdk/server/mcp.js";import{ElicitResultSchema as ee}from"@modelcontextprotocol/sdk/types.js";import{z as d}from"zod";var K=class{knownApplications=new Map;allowedAppIds=new Set;accessActive=!1;allowAll=!1;hostWindowId=null;rememberApplications(t){this.knownApplications.clear();for(let e of t)this.knownApplications.set(e.id,e);if(this.allowAll)for(let e of t)this.allowedAppIds.add(e.id)}getKnownApplications(t){return t.map(e=>{let i=this.knownApplications.get(e);if(!i)throw new Error(`Unknown application id '${e}'. Call list_applications first.`);return i})}setHostWindowId(t){this.hostWindowId=t}tryGetKnownApplication(t){return this.knownApplications.get(t)}allKnownApplications(){return[...this.knownApplications.values()]}allowApplications(t,e=!1,i){this.accessActive=!0;for(let n of t)this.allowedAppIds.add(n);if(e){this.allowAll=!0;for(let n of this.knownApplications.keys())this.allowedAppIds.add(n)}return i&&(this.hostWindowId=i),this.getState()}areAllowedForAccess(t){return this.allowAll||t.every(e=>this.allowedAppIds.has(e))}getState(){return{accessActive:this.accessActive,allowedAppIds:[...this.allowedAppIds],allowAll:this.allowAll,hostWindowId:this.hostWindowId}}};function T(l){let t=new Map,e=[...l].sort((i,n)=>`${i.applicationName}\0${i.title}\0${i.windowId}`.localeCompare(`${n.applicationName}\0${n.title}\0${n.windowId}`));for(let i of e){let n=i.applicationName||i.title||i.applicationId,s=t.get(i.applicationId);s||(s={id:i.applicationId,displayName:n,applicationNames:i.applicationNames,windows:[]},t.set(i.applicationId,s)),s.windows.push({windowId:i.windowId,title:i.title,displayId:i.displayId,isMinimized:i.isMinimized})}return[...t.values()].sort((i,n)=>`${i.displayName}\0${i.id}`.localeCompare(`${n.displayName}\0${n.id}`))}function mt(l){return l.toLowerCase().split(/[^a-z0-9]+/).filter(t=>t.length>0)}function ht(l){let t=new Set;for(let e of l)for(let i of mt(e))t.add(i);return t}function gt(l){return l.trim().toLowerCase()}function wt(l){let t=new Set;for(let e of l){let i=gt(e);i&&t.add(i)}return t}var Y=class{appRecords=new Map;updateApplications(t){for(let e of t){let i=[e.displayName,...e.applicationNames??[]],n=this.appRecords.get(e.id);if(n){for(let s of ht(i))n.tokenBag.add(s);for(let s of wt(i))n.exactNames.add(s);n.displayName=e.displayName}else this.appRecords.set(e.id,{appId:e.id,displayName:e.displayName,tokenBag:ht(i),exactNames:wt(i)})}}resolve(t){let e=this.appRecords.get(t);if(e)return[e];let i=gt(t);if(i){let o=[];for(let r of this.appRecords.values())r.exactNames.has(i)&&o.push(r);if(o.length>0)return o}let n=mt(t);if(n.length===0)return[];let s=[];for(let o of this.appRecords.values())n.every(r=>o.tokenBag.has(r))&&s.push(o);return s}allRecords(){return[...this.appRecords.values()]}};import{randomUUID as Ot}from"node:crypto";import{mkdirSync as qt,writeFileSync as jt}from"node:fs";import{homedir as Et,tmpdir as Lt}from"node:os";import{dirname as Ft,isAbsolute as zt,join as ft}from"node:path";var Bt={actionCoordinates:"window-local screenshot coordinates with origin at the target window top-left",screenFrames:"screen@ frames and window bounds are global screen coordinates; do not pass them directly to click/drag",localCenter:"local_center/localCenter values are window-local x,y hints suitable for click/drag coordinates"},B=class{constructor(t){this.result=t}result;toToolResult(t){let e=[],i=this.sanitizeJsonForOutput(),n;t.imageMode==="path"&&this.result.image&&(n=this.writeImage(t.screenshotOutFile),this.setScreenshotPath(i,n));let s=JSON.stringify(i,null,2),o=this.result.text?this.augmentProjection(this.result.text,i):this.buildProjection(i);n&&t.format!=="json"&&(o=`${o}
|
|
3
|
+
screenshot: ${n}`);let r=t.format==="json"?s:t.format==="both"?`${o}
|
|
4
|
+
|
|
5
|
+
--- window_state_json ---
|
|
6
|
+
${s}`:o;return e.push({type:"text",text:r}),t.imageMode==="image"&&this.result.image&&e.push({type:"image",data:this.result.image.data,mimeType:this.result.image.mimeType,_meta:{screenshot:!0,windowState:!0,stateToken:this.result.stateToken}}),{content:e}}sanitizeJsonForOutput(){return this.addCoordinateMetadata(this.sanitizeJson(this.result.json??{app:this.result.app,windowId:this.result.windowId,stateToken:this.result.stateToken}))}buildProjection(t=this.sanitizeJsonForOutput()){let e=this.stringAtPath(t,["window","applicationName"])??this.result.app??this.stringAtPath(t,["app"])??"unknown",i=this.stringAtPath(t,["window","applicationId"]),n=this.stringAtPath(t,["window","title"]),s=this.stringAtPath(t,["window","windowID"])??this.result.windowId,o=this.stringAtPath(t,["stateToken"])??this.result.stateToken,r=this.recordAtPath(t,["window","bounds"]),c=this.recordAtPath(t,["tree"]),a=this.arrayAtPath(c,["nodes"]).filter(m=>this.isRecord(m)),u=this.valueAtPath(c,["truncated"])===!0,p=["window_state compact_v1",`app: ${e}${i?` (${i})`:""}`,`window: ${n?JSON.stringify(n):"(untitled)"}${s?` id=${s}`:""}${r?` ${this.compactWindowRect(r,"screen@")}`:""}`,o?`state_token: ${o}`:void 0,`tree: ${a.length} node${a.length===1?"":"s"}${u?" (truncated)":""}`,"coordinate_contract: action x,y are window-local screenshot coordinates; screen@ frames are global; use local_center for click/drag","targeting: pass a line number or nodeId as element_index; include state_token when available",""].filter(m=>m!==void 0),h=this.renderCompactWindowNodes(a);return p.push(...h.length?h:["(no accessibility nodes returned)"]),p.join(`
|
|
7
|
+
`)}writeImage(t){if(!this.result.image)throw new Error("get_window_state image_mode=path requested a screenshot, but no screenshot was returned.");let e=t?this.expandHomePath(t):this.defaultImagePath();if(!zt(e))throw new Error("get_window_state screenshot_out_file must be an absolute path.");return qt(Ft(e),{recursive:!0}),jt(e,Buffer.from(this.result.image.data,"base64")),e}defaultImagePath(){let t=this.result.image?.mimeType==="image/jpeg"?"jpg":"png",e=(this.result.app??"app").replace(/[^A-Za-z0-9._-]+/g,"-").replace(/^-+|-+$/g,"")||"app";return ft(Lt(),"computer-use-mcp","window-state",`${e}-${Date.now()}-${Ot()}.${t}`)}setScreenshotPath(t,e){if(!this.isRecord(t))return;let i=this.isRecord(t.screenshot)?t.screenshot:{},n=this.isRecord(i.image)?i.image:{};n.imagePath=e,i.image=n,t.screenshot=i}sanitizeJson(t,e){if(e==="imageBase64")return;if(typeof t=="string")return this.truncateString(t);if(Array.isArray(t))return t.map(n=>this.sanitizeJson(n));if(!this.isRecord(t))return t;let i={};for(let[n,s]of Object.entries(t))i[n]=this.sanitizeJson(s,n);return i}addCoordinateMetadata(t){if(!this.isRecord(t))return t;t.coordinateContract??=Bt;let e=this.recordAtPath(t,["window","bounds"]),i=this.recordAtPath(t,["tree"]),n=this.arrayAtPath(i,["nodes"]);for(let s of n){if(!this.isRecord(s)||this.isRecord(s.localCenter))continue;let o=this.localCenterForNode(s,e);o&&(s.localCenter=o)}return t}augmentProjection(t,e){if(t=this.labelScreenFrames(t),t.includes("coordinate_contract:"))return t;let i=t.split(`
|
|
8
|
+
`),n=Math.max(i.findIndex(o=>o.trim()===""),0),s=["coordinate_contract: action x,y are window-local screenshot coordinates; screen@ frames are global; use local_center for click/drag",this.compactLocalCenters(e)].filter(o=>!!o);return n===0&&i[0]?.trim()!==""?[...s,...i].join(`
|
|
9
|
+
`):(i.splice(n,0,...s),i.join(`
|
|
10
|
+
`))}labelScreenFrames(t){return t.replace(/(^|[\s(])@(-?\d+,-?\d+\s+\d+x\d+)/g,"$1screen@$2")}truncateString(t){return t.length<=1e3?t:`${t.slice(0,1e3)}...[truncated ${t.length-1e3} chars]`}renderCompactWindowNodes(t){let e=[];for(let i of t){let n=this.compactWindowNodeLine(i);n&&e.push(n)}return e}compactWindowNodeLine(t){let e=this.stringValue(t.index)??this.numberValue(t.index)?.toString(),i=this.compactWindowRole(this.stringValue(t.role),this.stringValue(t.subrole)),n=[this.stringValue(t.title),this.stringValue(t.description),this.stringValue(t.value),this.stringValue(t.valueDescription)].map(p=>p?.replace(/\s+/g," ").trim()).filter(p=>!!p),s=[...new Set(n)].slice(0,2).map(p=>JSON.stringify(p.length>160?`${p.slice(0,157)}...`:p)),o=Array.isArray(t.actions)?t.actions.filter(p=>typeof p=="string").slice(0,4):[],r=[t.focused===!0?"focused":void 0,t.enabled===!1?"disabled":void 0].filter(p=>!!p),c=this.isRecord(t.frame)?this.compactWindowRect(t.frame,"screen@"):void 0,a=this.isRecord(t.localCenter)?this.compactPoint(t.localCenter):void 0;if(e==="1"||s.length>0||o.length>0||r.length>0||["window","button","checkbox","radio_button","text_field","text_area","link","menu_item","row","cell","table","list","scroll_area","web_area"].includes(i))return[`${e?`${e}. `:""}${i}`,...s,r.length?`[${r.join(",")}]`:void 0,o.length?`{${o.map(p=>p.replace(/^AX/,"").toLowerCase()).join(",")}}`:void 0,c,a?`local_center=${a}`:void 0].filter(p=>!!p).join(" ")}compactWindowRole(t,e){return(e&&e!==t?e:t??"element").replace(/^AX/,"").replace(/([a-z])([A-Z])/g,"$1_$2").replace(/[^A-Za-z0-9]+/g,"_").replace(/^_+|_+$/g,"").toLowerCase()||"element"}compactWindowRect(t,e="@"){let i=this.numberValue(t.x),n=this.numberValue(t.y),s=this.numberValue(t.width),o=this.numberValue(t.height);if(!(i===void 0||n===void 0||s===void 0||o===void 0))return`${e}${Math.round(i)},${Math.round(n)} ${Math.round(s)}x${Math.round(o)}`}compactPoint(t){let e=this.numberValue(t.x),i=this.numberValue(t.y);if(!(e===void 0||i===void 0))return`${Math.round(e)},${Math.round(i)}`}localCenterForNode(t,e){let i=this.recordAtPath(t,["frame"]);if(!i||!e)return;let n=this.numberValue(e.x),s=this.numberValue(e.y),o=this.numberValue(i.x),r=this.numberValue(i.y),c=this.numberValue(i.width),a=this.numberValue(i.height);if(n===void 0||s===void 0||o===void 0||r===void 0||c===void 0||a===void 0)return;let u=o-n+c/2,p=r-s+a/2;if(!(u<0||p<0))return{x:Math.round(u),y:Math.round(p)}}compactLocalCenters(t){let e=this.recordAtPath(t,["tree"]),n=this.arrayAtPath(e,["nodes"]).filter(s=>this.isRecord(s)).map(s=>{let o=this.isRecord(s.localCenter)?this.compactPoint(s.localCenter):void 0;if(!o)return;let r=this.stringValue(s.index)??this.numberValue(s.index)?.toString(),c=this.stringValue(s.nodeId),a=r??c;if(!(!a||a==="1"||!this.isActionableNode(s)))return`${a}=${o}`}).filter(s=>!!s).slice(0,12);return n.length?`local_centers: ${n.join(" ")} (window-local)`:void 0}isActionableNode(t){let e=this.compactWindowRole(this.stringValue(t.role),this.stringValue(t.subrole));return(Array.isArray(t.actions)?t.actions.some(n=>typeof n=="string"):!1)||["button","checkbox","radio_button","text_field","text_area","link","menu_item","row","cell"].includes(e)}expandHomePath(t){return t==="~"||t.startsWith("~/")?ft(Et(),t.slice(2)):t}isRecord(t){return typeof t=="object"&&t!==null&&!Array.isArray(t)}stringValue(t){return typeof t=="string"?t:void 0}numberValue(t){return typeof t=="number"&&Number.isFinite(t)?t:void 0}valueAtPath(t,e){let i=t;for(let n of e){if(!this.isRecord(i))return;i=i[n]}return i}stringAtPath(t,e){return this.stringValue(this.valueAtPath(t,e))}recordAtPath(t,e){let i=this.valueAtPath(t,e);return this.isRecord(i)?i:void 0}arrayAtPath(t,e){let i=this.valueAtPath(t,e);return Array.isArray(i)?i:[]}};var Ut=new Set(["ctrl","control","shift","alt","option","super","meta","cmd","command","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","0","1","2","3","4","5","6","7","8","9","return","enter","tab","space","backspace","delete","forwarddelete","escape","esc","up","down","left","right","home","end","pageup","pagedown","insert","capslock","f1","f2","f3","f4","f5","f6","f7","f8","f9","f10","f11","f12","-","minus","=","equal","plus","[","]","\\",";","'",",",".","/","`"]),Ht={arrowup:"up",arrowdown:"down",arrowleft:"left",arrowright:"right",page_up:"pageup",page_down:"pagedown",backslash:"\\",semicolon:";",slash:"/",grave:"`",bracketleft:"[",bracketright:"]",super_l:"super",gui:"super",win:"super",windows:"super",caps_lock:"capslock",caps:"capslock",del:"delete"};function rt(l){l==="+"?l="plus":l.length>2&&l.endsWith("++")&&(l=l.slice(0,-1)+"plus");let t=l.split("+").map(i=>i.trim().toLowerCase()).filter(Boolean);if(t.length===0)throw new Error("Key combo must contain at least one key.");return t.map(i=>{let n=Ht[i]??i;if(!Ut.has(n))throw new Error(`Unknown key "${i}" in combo "${l}".`);return n}).join("+")}import*as I from"fs/promises";import*as yt from"os";import*as X from"path";var at="<!-- LOG -->",G=class{logDir;logPath;startTime=performance.now();queue=Promise.resolve();constructor(){this.logDir=X.join(yt.homedir(),".copilot","logs","computer-use"),this.logPath=X.join(this.logDir,`${this.timestamp()}.html`)}timestamp(){let t=new Date,e=(i,n=2)=>String(i).padStart(n,"0");return`${t.getFullYear()}${e(t.getMonth()+1)}${e(t.getDate())}-${e(t.getHours())}${e(t.getMinutes())}${e(t.getSeconds())}.${e(Math.floor(t.getMilliseconds()/10))}`}elapsed(){return`${((performance.now()-this.startTime)/1e3).toFixed(2)}s`}insert(t){return this.queue=this.queue.then(()=>this.write(t)).catch(()=>this.write(t)).catch(()=>{}),this.queue}async write(t){let e=!0;try{await I.access(this.logPath)}catch{e=!1}e||(await I.mkdir(this.logDir,{recursive:!0}),await I.writeFile(this.logPath,`<!DOCTYPE html>
|
|
3
11
|
<html>
|
|
4
12
|
<head>
|
|
5
13
|
<meta charset="utf-8">
|
|
@@ -23,20 +31,20 @@ import{StdioServerTransport as Xt}from"@modelcontextprotocol/sdk/server/stdio.js
|
|
|
23
31
|
<body>
|
|
24
32
|
<table>
|
|
25
33
|
<tr><th>Time</th><th>Level</th><th>Source</th><th>Action</th><th>Details</th></tr>
|
|
26
|
-
${
|
|
34
|
+
${at}
|
|
27
35
|
</table>
|
|
28
36
|
</body>
|
|
29
37
|
</html>
|
|
30
|
-
`));let i=await
|
|
38
|
+
`));let i=await I.readFile(this.logPath,"utf-8");await I.writeFile(this.logPath,i.replace(at,t+at))}rowHtml(t,e,i,n){return`<tr><td class="time">${this.elapsed()}</td><td class="level level-${i}">${i}</td><td class="source">${n}</td><td class="action">${this.escapeHtml(t)}</td><td>${e}</td></tr>
|
|
31
39
|
`}escapeHtml(t){return t.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""")}renderMarkdown(t){let e=t.split(`
|
|
32
|
-
`),i=[],s=[],o=()=>{s.length>0&&(i.push(`<ul style="margin:0;padding-left:1.2em">${s.map(n=>`<li>${this.escapeHtml(n)}</li>`).join("")}</ul>`),s=[])};for(let n of e)n.startsWith("- ")?s.push(n.slice(2)):(o(),n.length>0&&(i.length>0&&i.push("<br>"),i.push(this.escapeHtml(n))));return o(),i.join("")}log(t,e,i,s){this.insert(this.rowHtml(i,this.renderMarkdown(s),t,e))}logScreenshot(t,e,i,s){if(i){let o=s===void 0?"png":"jpg",n=`${this.timestamp()}.${o}`;A.mkdir(this.logDir,{recursive:!0}).then(()=>A.writeFile(H.join(this.logDir,n),i)),this.insert(this.rowHtml(t,`${this.escapeHtml(e)}<br><img src="${n}">`,"info","Computer"))}else this.insert(this.rowHtml(t,this.escapeHtml(e),"error","Computer"))}};var F=class{computer;logger;constructor(t,e){this.computer=t,this.logger=e}log(t,e){this.logger.log("info","Computer",t,e)}async checkPermissions(t){return this.computer.checkPermissions(t)}async requestPermissions(t){return this.computer.requestPermissions(t)}async capabilities(){let t=await this.computer.capabilities();return this.log("capabilities",JSON.stringify(t)),t}async click(t,e,i,s,o=""){await this.computer.click(t,e,i,s,o),this.log("click",`(${t}, ${e}) button=${i} count=${s}${o?` display=${o}`:""}`)}async move(t,e,i=""){await this.computer.move(t,e,i),this.log("move",`(${t}, ${e})${i?` display=${i}`:""}`)}async drag(t,e,i,s,o=""){await this.computer.drag(t,e,i,s,o),this.log("drag",`(${t}, ${e}) \u2192 (${i}, ${s})${o?` display=${o}`:""}`)}async mouseDown(t,e,i=""){await this.computer.mouseDown(t,e,i),this.log("mouseDown",`(${t}, ${e})${i?` display=${i}`:""}`)}async mouseUp(t,e,i=""){await this.computer.mouseUp(t,e,i),this.log("mouseUp",`(${t}, ${e})${i?` display=${i}`:""}`)}async type(t){await this.computer.type(t),this.log("type",`"${t}"`)}async key(t){await this.computer.key(t),this.log("key",t)}async scroll(t,e,i,s,o=""){await this.computer.scroll(t,e,i,s,o),this.log("scroll",`(${t}, ${e}) dx=${i} dy=${s}${o?` display=${o}`:""}`)}async cursorPosition(t=""){let e=await this.computer.cursorPosition(t);return this.log("cursorPosition",`(${e.x}, ${e.y})${t?` display=${t}`:""}`),e}async display(t=""){let e=await this.computer.display(t);return this.log("display",`${e.width}x${e.height}${t?` display=${t}`:""}`),e}async listDisplays(){let t=await this.computer.listDisplays(),e=t.map(i=>`${i.isPrimary?"*":""}${i.displayId} "${i.label}" ${i.width}x${i.height}`).join("; ");return this.log("listDisplays",`${t.length} displays${e?`: ${e}`:""}`),t}async listWindows(){let t=await this.computer.listWindows(),e=new Map;for(let s of t){let o=s.displayId||"(unknown)",n=e.get(o)??{visible:0,minimized:0};s.isMinimized?n.minimized+=1:n.visible+=1,e.set(o,n)}let i=[...e.entries()].sort(([s],[o])=>s.localeCompare(o)).map(([s,o])=>`${s}: ${o.visible} visible, ${o.minimized} minimized`).join("; ");return this.log("listWindows",`${t.length} windows${i?`: ${i}`:""}`),t}async windowAtPoint(t,e,i){let s=await this.computer.windowAtPoint(t,e,i);return this.log("windowAtPoint",`display=${t||"(primary)"} (${e}, ${i}) -> ${s?`${s.windowId} title="${s.title}"`:"null"}`),s}async getActiveWindow(){let t=await this.computer.getActiveWindow();return this.log("getActiveWindow",t?`${t.windowId} title="${t.title}"`:"null"),t}async activateApplication(t){let e=await this.computer.activateApplication(t);return this.log("activateApplication",`${t} -> ${e}`),e}async concealApplication(t){let e=await this.computer.concealApplication(t);return this.log("concealApplication",`${t} -> ${e}`),e}async restoreApplication(t){let e=await this.computer.restoreApplication(t);return this.log("restoreApplication",`${t} -> ${e}`),e}async activateWindow(t){let e=await this.computer.activateWindow(t);return this.log("activateWindow",`${t} -> ${e}`),e}async concealWindow(t){let e=await this.computer.concealWindow(t);return this.log("concealWindow",`${t} -> ${e}`),e}async restoreWindow(t){let e=await this.computer.restoreWindow(t);return this.log("restoreWindow",`${t} -> ${e}`),e}async getClipboard(){let t=await this.computer.getClipboard(),e=t.slice(0,200)+(t.length>200?"\u2026":"");return this.log("getClipboard",`"${e}"`),t}async setClipboard(t){await this.computer.setClipboard(t);let e=t.slice(0,200)+(t.length>200?"\u2026":"");this.log("setClipboard",`"${e}" (${t.length} chars)`)}lock(t){return this.computer.lock(t)}unlock(){this.computer.unlock()}async prepareForInput(t,e,i){let s=await this.computer.prepareForInput(t,e,i),o=i?` @ (${i.x},${i.y})`:"";return this.log("prepareForInput",`${t} -> ${s} (${e.length} apps)${o}`),s}async screenshot(t,e,i,s,o,n,r){let c=await this.computer.screenshot(t,e,i,s,o,n,r),a=s&&o?` (${s}x${o})`:"",l=n?` crop=[${n.join(",")}]`:"",u=`display=${t||"(primary)"}`,f=r===void 0?"":` quality=${r}`,b=`${u}${a}${l}${f}`;return this.logger.logScreenshot("screenshot",b,c,r),c}};import{createRequire as xt}from"module";import{dirname as Pt,join as Wt}from"path";import{fileURLToPath as St}from"url";var z=class d{constructor(t,e){this.native=t;t.setLogger?.((i,s,o)=>{e?.log(this.nativeLogLevel(i),"Driver",s,o)})}native;static create(t){let e=Wt(Pt(St(import.meta.url)),"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),i;try{i=xt(import.meta.url)(e)}catch(s){let o=s instanceof Error?s.message:String(s);throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}: ${o} (path: ${e})`,{cause:s})}return new d(i,t)}async checkPermissions(t){return this.native.checkPermissions(t)}async requestPermissions(t){this.native.requestPermissions(t)}async click(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,c]=this.toNative(n,t,e);this.native.click(r,c,i,s)}nativeLogLevel(t){switch(t){case 0:return"trace";case 1:return"debug";case 2:return"info";case 3:return"warn";case 4:return"error";default:return"info"}}async move(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.move(o,n)}async drag(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e),this.assertPointInBounds(n,i,s);let[r,c]=this.toNative(n,t,e),[a,l]=this.toNative(n,i,s);this.native.drag(r,c,a,l)}async mouseDown(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseDown(o,n)}async mouseUp(t,e,i=""){let s=this.resolveCoordinateSpace(i);this.assertPointInBounds(s,t,e);let[o,n]=this.toNative(s,t,e);this.native.mouseUp(o,n)}async type(t){this.native.type(t)}async key(t){this.native.key(t)}async scroll(t,e,i,s,o=""){let n=this.resolveCoordinateSpace(o);this.assertPointInBounds(n,t,e);let[r,c]=this.toNative(n,t,e);this.native.scroll(r,c,i,s)}async cursorPosition(t=""){let e=this.native.cursorPosition(),i=this.resolveCoordinateSpace(t),[s,o]=this.fromNative(i,e.x,e.y);return{x:s,y:o}}async display(t=""){let e=this.resolveCoordinateSpace(t);return{width:e.targetWidth,height:e.targetHeight}}async screenshot(t,e,i,s,o,n,r){let c=this.resolveCoordinateSpace(t,s,o),a=r??-1;if(n){this.assertRegionEdgeInBounds(c,n[0],n[1]),this.assertRegionEdgeInBounds(c,n[2],n[3]);let[l,u,f,b]=this.toNativeCrop(c,n[0],n[1],n[2],n[3]);return this.native.screenshot(t,e,i,0,0,l,u,f,b,a)}return this.native.screenshot(t,e,i,c.targetWidth,c.targetHeight,0,0,0,0,a)}async capabilities(){let t=this.native.capabilities();return{...t,canGetActiveWindow:typeof this.native.getActiveWindow=="function",canLocateWindowAtPoint:typeof this.native.windowAtPoint=="function"&&!!t.canLocateWindowAtPoint}}async listDisplays(){return this.native.listDisplays().map(e=>{let{targetWidth:i,targetHeight:s}=this.targetSizeForDisplay(e.bounds.width,e.bounds.height);return{displayId:e.displayId,label:e.label,width:i,height:s,isPrimary:e.isPrimary}})}async listWindows(){return this.native.listWindows()}async windowAtPoint(t,e,i){if(!this.native.windowAtPoint)return null;let s=this.resolveCoordinateSpace(t);this.assertPointInBounds(s,e,i);let[o,n]=this.toNative(s,e,i);return this.native.windowAtPoint(t,o,n)}async getActiveWindow(){return this.native.getActiveWindow?this.native.getActiveWindow():null}async activateApplication(t){return this.native.activateApplication(t)}async concealApplication(t){return this.native.concealApplication(t)}async restoreApplication(t){return this.native.restoreApplication(t)}async activateWindow(t){return this.native.activateWindow(t)}async concealWindow(t){return this.native.concealWindow(t)}async restoreWindow(t){return this.native.restoreWindow(t)}async getClipboard(){return this.native.getClipboard()}async setClipboard(t){this.native.setClipboard(t)}lock(t){return this.native.lock?.(t)??!1}unlock(){this.native.unlock?.()}async prepareForInput(t,e,i){let s=Number.NaN,o=Number.NaN;if(i){let n=this.resolveCoordinateSpace(t);this.assertPointInBounds(n,i.x,i.y),[s,o]=this.toNative(n,i.x,i.y)}return this.native.prepareForInput(t,e,i?.blockedWindowIds??[],s,o)}resolveDisplay(t){let e=this.native.listDisplays();return t!==""?e.find(i=>i.displayId===t):e.find(i=>i.isPrimary)??e[0]}targetSizeForDisplay(t,e,i=0,s=0){if(i>0&&s>0)return{targetWidth:i,targetHeight:s};let r=Math.min(1,1568/Math.max(t,e),Math.sqrt(115e4/(t*e)));return{targetWidth:Math.floor(t*r),targetHeight:Math.floor(e*r)}}resolveCoordinateSpace(t,e=0,i=0){let s=this.resolveDisplay(t),o=s?.bounds.x??0,n=s?.bounds.y??0,r=s?void 0:this.native.display(),c=s?.bounds.width??r?.width??1,a=s?.bounds.height??r?.height??1,{targetWidth:l,targetHeight:u}=this.targetSizeForDisplay(c,a,e,i);return{originX:o,originY:n,displayWidth:c,displayHeight:a,scaleX:l/c,scaleY:u/a,targetWidth:l,targetHeight:u}}toNative(t,e,i){return[t.originX+e/t.scaleX,t.originY+i/t.scaleY]}toNativeCrop(t,e,i,s,o){return[Math.floor(e/t.scaleX),Math.floor(i/t.scaleY),Math.ceil(s/t.scaleX),Math.ceil(o/t.scaleY)]}assertPointInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>=t.targetWidth||i>=t.targetHeight)throw new RangeError(`Coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}assertRegionEdgeInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>t.targetWidth||i>t.targetHeight)throw new RangeError(`Crop coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}fromNative(t,e,i){return[Math.round((e-t.originX)*t.scaleX),Math.round((i-t.originY)*t.scaleY)]}};var _=.8,_t=250,W={readOnlyHint:!0,destructiveHint:!1,openWorldHint:!1},I={readOnlyHint:!1,destructiveHint:!0,openWorldHint:!0},Z={readOnlyHint:!1,destructiveHint:!1,openWorldHint:!1},w=" Requires `request_access`.",rt=" If the target app is missing, call `list_applications` then `request_access`.",T=p.coerce.number().transform(Math.round),q=p.object({x:T.describe("Horizontal pixel coordinate."),y:T.describe("Vertical pixel coordinate.")}),Tt=p.object({x1:T.describe("Left edge of the region in screenshot pixel coordinates."),y1:T.describe("Top edge of the region in screenshot pixel coordinates."),x2:T.describe("Right edge of the region in screenshot pixel coordinates."),y2:T.describe("Bottom edge of the region in screenshot pixel coordinates.")}).refine(({x1:d,x2:t})=>t>d,{message:"x2 must be greater than x1.",path:["x2"]}).refine(({y1:d,y2:t})=>t>d,{message:"y2 must be greater than y1.",path:["y2"]}),Nt=p.string().min(1).describe("Application name or stable app id from `list_applications`. Examples: 'Google Chrome', 'Microsoft Outlook', 'app.windows.abc123'."),Dt=p.object({display_id:p.string().optional().describe("Display id to select. Omit or pass an empty string to use the default display.")}),Mt=p.object({display_id:p.string().optional().describe("Optional display id to filter by. When provided, only apps with at least one non-minimized window on that display are returned.")}),ct=q.optional().describe("{x, y} pixel coordinate. Clicks at current cursor position if omitted."),lt=p.enum(["left","right","middle"]).optional().describe("Mouse button to click (default left)."),pt=p.coerce.number().int().min(1).max(3).optional().describe("Number of clicks: 1 single, 2 double, 3 triple (default 1)."),dt=q.describe("{x, y} pixel coordinate to move the cursor to."),ut=q.describe("{x, y} pixel coordinate to start the drag from."),ht=q.describe("{x, y} pixel coordinate to drag to."),mt=p.string().describe("The text to type."),gt=p.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab')."),wt=p.enum(["up","down","left","right"]).describe("Direction to scroll."),yt=q.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),ft=p.number().int().nonnegative().max(100).optional().describe("Number of scroll clicks (default 3)."),Rt=p.number().nonnegative().max(100).describe("Seconds to wait (max 100). Use after a screenshot shows the UI is not in the expected state yet, but an operation may still finish."),bt=p.string().describe("The text to copy to the clipboard."),qt=p.object({action:p.literal("click"),coordinate:ct,button:lt,count:pt}),Lt=p.object({action:p.literal("mouse_move"),coordinate:dt}),Et=p.object({action:p.literal("left_click_drag"),start_coordinate:ut,coordinate:ht}),jt=p.object({action:p.literal("left_mouse_down")}),Bt=p.object({action:p.literal("left_mouse_up")}),Ht=p.object({action:p.literal("type"),text:mt}),Ut=p.object({action:p.literal("key"),text:gt}),Ft=p.object({action:p.literal("scroll"),scroll_direction:wt,coordinate:yt,scroll_amount:ft}),zt=p.object({action:p.literal("set_clipboard"),text:bt}),Ot=p.discriminatedUnion("action",[qt,Lt,Et,jt,Bt,Ht,Ut,Ft,zt]),Yt=p.object({actions:p.array(Ot).min(1).describe("Ordered actions. Do not include waits; call the separate `wait` tool only after observing a screenshot that is not ready yet.")}),O=class d{constructor(t={}){this.options=t;process.env.DEBUG&&(this.logger=new U)}options;_computer;_caps;_locked=null;_safetyTimer;static SAFETY_TIMEOUT_MS=180*1e3;server;logger;accessStore=new j;appNameResolver=new B;lastUnsharedWindowsNote=null;selectedDisplay="";toText(t,e){let i=[{type:"text",text:t}];return e?{content:i,isError:!0}:{content:i}}toJson(t){return this.toText(JSON.stringify(t,null,2))}toImage(t,e,i){return t?{content:[{type:"image",data:t.toString("base64"),mimeType:this.imageMimeType(i),_meta:{screenshot:!0}}]}:this.toText(e,!0)}buildAccessMessage(t,e){let i=t.length===0?["","Computer Use runs on your actual desktop and can send mouse and keyboard input.","","Computer Use wants to control your desktop for this session.","","Apps that are not allowed may be hidden."]:["","Computer Use runs on your actual desktop and can send mouse and keyboard input to the apps you share.","","Computer Use wants to control these apps:","",...t.map(s=>`- ${s}`),"","Apps that are not allowed may be hidden."];return e&&i.push("","Reason:",e),i.join(`
|
|
33
|
-
`)}imageMimeType(t){return t!==void 0?"image/jpeg":"image/png"}collectDisplayIds(t,e){let i=new Set;for(let
|
|
34
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${e.join(" and ")} permission${e.length>1?"s":""}`,oneOf:[{const:"open",title:"1. Open System Settings"},{const:"done",title:"2. I've granted the permissions"},{const:"skip",title:"3. Skip (things may not work)"}]}},required:["action"]}}),c=r.action==="accept"?r.content?.action:void 0;if(c==="open"){i||await t.requestPermissions("accessibility"),s||await t.requestPermissions("screen"),await this.showHostWindow(t);let a=await this.server.server.elicitInput({mode:"form",message:["","System Settings has been opened.","",`Enable ${e.join(" and ")} for your terminal app.`,e.length>1?"Both settings pages have been opened \u2014 check each one.":"","",`After granting permissions, click "I've granted the permissions" below.`,"If the toggle was already on, try removing and re-adding your terminal app."].filter(Boolean).join(`
|
|
35
|
-
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((a.action==="accept"?a.content?.action:void 0)!=="done")break}else if(c!=="done")break;if(i=await t.checkPermissions("accessibility"),s=await t.checkPermissions("screen"),!i||!s){let a=[];throw i||a.push("Accessibility"),s||a.push("Screen Recording"),new Error(`${a.join(" and ")} permission${a.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(t){return this.selectedDisplay!==""?this.selectedDisplay:(await t.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let t=this.accessStore.getState();if(!t.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let e=await this.computer();if(t.allowAll&&(await this.capabilities()).canListWindows){let{windows:s}=await this.filterHostWindow(e),o=C(s);this.accessStore.rememberApplications(o),this.appNameResolver.updateApplications(o),t=this.accessStore.getState()}return{computer:e,allowedAppIds:t.allowedAppIds,hostWindowId:t.hostWindowId}}async hostWindowId(t){let e=this.accessStore.getState().hostWindowId;if(e)return e;if(!(await this.capabilities()).canListWindows)return null;let s=await t.getActiveWindow();return s&&(await t.listWindows()).some(n=>n.windowId===s.windowId)?s.windowId:null}async showHostWindow(t){let e=await this.hostWindowId(t);if(e)try{await t.activateWindow(e)}catch{}}async filterHostWindow(t,e){let i=e??await t.listWindows(),s=await this.hostWindowId(t);return{windows:s?i.filter(o=>o.windowId!==s):i,hostWindowId:s}}async prepareForInput(t,e,i){let s=await this.capabilities();if(!s.canListWindows)return;let o=await this.activeDisplayId(t),n=i&&s.canHitTest;if(e.length===0&&!n)return;if(!await t.prepareForInput(o,e,n?i:void 0)){if(n)throw new Error("Input blocked: a disallowed app at the target could not be hidden. Call `list_applications` to see what's there, then `request_access` to allow it.");if(s.canGetActiveWindow){let c=await t.getActiveWindow();if(c&&!e.includes(c.applicationId))throw new Error(this.keyboardInputBlockedMessage(c))}throw new Error("Keyboard input blocked: an allowed application could not be focused. Click an allowed application first, or call `request_access` to allow it.")}}keyboardInputBlockedMessage(t){return`Keyboard input blocked: the focused application ("${t.applicationName}") is not allowed. Click an allowed application first, or call \`request_access\` to allow it.`}async validateActiveWindow(t,e){if(!(await this.capabilities()).canGetActiveWindow)return;let s=await t.getActiveWindow();if(s&&!e.includes(s.applicationId))throw new Error(this.keyboardInputBlockedMessage(s))}pointerInputBlockedMessage(t){return`Pointer input blocked: the focused application ("${t.applicationName}") is not allowed. Focus an allowed application first, or call \`request_access\` to allow it.`}async validatePointerInput(t,e,i){if(e.length===0)return;let s=await this.capabilities();if(s.canGetActiveWindow){let c=await t.getActiveWindow();if(c&&!e.includes(c.applicationId))throw new Error(this.pointerInputBlockedMessage(c))}if(!s.canLocateWindowAtPoint)return;let o=await this.activeDisplayId(t),n=await t.windowAtPoint(o,i.x,i.y);if(!n||e.includes(n.applicationId))return;let r=n.applicationName||n.title||n.applicationId;throw new Error(`Pointer input blocked: the target location is covered by disallowed application ("${r}"). Call \`list_applications\` to see what's there, then \`request_access\` to allow it.`)}async list_displays(){let t=await this.computer(),e=await t.listDisplays(),i=await this.activeDisplayId(t);return this.toJson({displays:e,selectedDisplay:i})}async select_display({display_id:t}){let e=t??"",i=await this.computer(),s=await i.listDisplays();if(e!==""&&!s.some(r=>r.displayId===e))throw new Error(`Unknown display id '${e}'. Call list_displays first.`);this.selectedDisplay=e;let o=await this.activeDisplayId(i),n=s.find(r=>r.displayId===o)??null;return this.toJson({selectedDisplay:o,status:"selected",message:e===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${n?.label??e}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications({display_id:t}={}){let e=await this.computer(),{windows:i}=await this.filterHostWindow(e),s=C(i);this.accessStore.rememberApplications(s),this.appNameResolver.updateApplications(s);let o=t??"";if(o!==""&&!(await e.listDisplays()).some(l=>l.displayId===o))throw new Error(`Unknown display id '${o}'. Call list_displays first.`);let n=o===""?i:i.filter(a=>!a.isMinimized&&a.displayId===o),r=C(n),c=await this.activeDisplayId(e);return this.toJson({selectedDisplay:c,applications:r})}async request_access({apps:t,reason:e},i){this.logEvent("request_access",`Start: apps=[${t.join(", ")}]${e?` reason="${e}"`:""}`),await this.requestPermission();let s={const:"allow",title:"Allow"},o={const:"allow_all",title:"Allow all apps (don't ask again)"},n={const:"deny",title:"Deny (Esc)"},r,c=async(h,g,N)=>{this.logEvent("request_access",`End: allowed=${h} allowAll=${g.allowAll} selectedDisplay=${this.selectedDisplay||"(default)"} hostWindowId=${g.hostWindowId??"null"} message="${N}"`);let $=r??(g.allowAll?void 0:g.allowedAppIds.map(y=>{let x=this.accessStore.tryGetKnownApplication(y);return{appId:y,name:x?.displayName,displayIds:this.collectDisplayIds(x?.windows??[])}})),D=this.toJson({allowed:h,allowAll:g.allowAll,...$?{allowedApps:$}:{},selectedDisplay:this.selectedDisplay||void 0,message:N});if(!h)return D;let S=await this.computer();this.lock();let K=g.hostWindowId?[g.hostWindowId]:[];return this.addScreenshot(D,S,g.allowedAppIds,K)};if(this.options.yolo){let h=this.accessStore.allowApplications([],!0);return await c(!0,h,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let a=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let h=this.accessStore.allowApplications([],!0);return await c(!0,h,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let u,f,b,Y,G;if(a.canListWindows){let h=await this.computer(),g=this.accessStore.getState().hostWindowId,N=await h.listWindows(),$=g?N.filter(m=>m.windowId!==g):N;G=$;let D=C($);this.accessStore.rememberApplications(D),this.appNameResolver.updateApplications(D);let S=t&&t.length>0?"":await this.activeDisplayId(h);S&&(this.selectedDisplay=S);let K=new Set(g?[g]:[]),y;if(t&&t.length>0){let m=new Map;for(let v of t){let R=this.appNameResolver.resolve(v);if(R.length===0)throw new Error(`No matching application found for '${v}'. Call list_applications to all running apps and their IDs.`);for(let P of R)if(!m.has(P.appId)){let E=this.accessStore.tryGetKnownApplication(P.appId);m.set(P.appId,E??{id:P.appId,displayName:P.displayName,windows:[]})}}y=[...m.values()];let It=new Set(m.keys()),L=new Map;for(let v of $)It.has(v.applicationId)&&!v.isMinimized&&v.displayId&&L.set(v.displayId,(L.get(v.displayId)??0)+1);if(L.size>0){let v="",R=0;for(let[P,E]of L)E>R&&(v=P,R=E);this.selectedDisplay=v}r=this.buildAllowedAppsInfo(y,$)}else y=C($.filter(m=>!m.isMinimized&&(S===""||m.displayId===S)&&!K.has(m.windowId)));let x=y.map(m=>m.id),J=this.accessStore.getState(),M=y.length;f=["allow","allow_all"],b=x.length===0?J.allowAll||J.accessActive:this.accessStore.areAllowedForAccess(x),Y=x.length===0?J.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":M===1?`'${y[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",u={choices:[s,o,n],message:this.buildAccessMessage(y.map(m=>m.displayName),e),denyMessage:M===0?"The user declined desktop access.":M===1?`The user declined to share '${y[0].displayName}'.`:"The user declined to share the requested apps.",allow:m=>this.accessStore.allowApplications(x,m==="allow_all"),allowedMessage:m=>m==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":M===0?"Desktop access is active for this session.":M===1?`Access session started for '${y[0].displayName}'.`:"Access session started for the requested apps."}}else{let h=this.accessStore.getState();f=["allow_all"],b=h.allowAll||h.accessActive,Y=h.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",u={choices:[o,n],message:this.buildAccessMessage([],e),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(b){let h=this.accessStore.getState();return await c(!0,h,Y)}this.accessStore.getState().hostWindowId&&await this.showHostWindow(await this.computer());let X=await this.server.server.request({method:"elicitation/create",params:{mode:"form",message:u.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:u.choices}},required:["decision"]}}},Ct,{signal:i,timeout:300*1e3}),At=await this.computer(),tt=await this.hostWindowId(At);tt&&this.accessStore.setHostWindowId(tt);let k=X.action==="accept"?X.content?.decision:void 0;if(X.action!=="accept"||!k||typeof k!="string"||!f.includes(k)){let h=this.accessStore.getState(),g=typeof k=="string"&&k!==n.const?`The user did not approve this access request. User response: ${k}`:u.denyMessage;return await c(!1,h,g)}let et=u.allow(k);return et.allowAll&&G&&(r=this.buildAllowedAppsInfo(this.accessStore.allKnownApplications(),G)),await c(!0,et,u.allowedMessage(k))}async screenshot(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let s=i?[i]:[];return this.captureScreenshot(t,e,s)}logScreenshotFilter(t,e){let i=r=>{let c=this.accessStore.tryGetKnownApplication(r);return c?`${c.displayName} (${r})`:r},s=r=>{let c=r.includes("|")?r.split("|")[1]:r;return i(c)},o=(r,c)=>c.length===1?`${r}: ${c[0]}`:`${r}:
|
|
40
|
+
`),i=[],n=[],s=()=>{n.length>0&&(i.push(`<ul style="margin:0;padding-left:1.2em">${n.map(o=>`<li>${this.escapeHtml(o)}</li>`).join("")}</ul>`),n=[])};for(let o of e)o.startsWith("- ")?n.push(o.slice(2)):(s(),o.length>0&&(i.length>0&&i.push("<br>"),i.push(this.escapeHtml(o))));return s(),i.join("")}log(t,e,i,n){this.insert(this.rowHtml(i,this.renderMarkdown(n),t,e))}logScreenshot(t,e,i,n){if(i){let s=n===void 0?"png":"jpg",o=`${this.timestamp()}.${s}`;I.mkdir(this.logDir,{recursive:!0}).then(()=>I.writeFile(X.join(this.logDir,o),i)),this.insert(this.rowHtml(t,`${this.escapeHtml(e)}<br><img src="${o}">`,"info","Computer"))}else this.insert(this.rowHtml(t,this.escapeHtml(e),"error","Computer"))}};var Z=class{computer;logger;constructor(t,e){this.computer=t,this.logger=e}log(t,e){this.logger.log("info","Computer",t,e)}async checkPermissions(t){return this.computer.checkPermissions(t)}async requestPermissions(t){return this.computer.requestPermissions(t)}async capabilities(){let t=await this.computer.capabilities();return this.log("capabilities",JSON.stringify(t)),t}async click(t,e,i,n,s=""){await this.computer.click(t,e,i,n,s),this.log("click",`(${t}, ${e}) button=${i} count=${n}${s?` display=${s}`:""}`)}async move(t,e,i=""){await this.computer.move(t,e,i),this.log("move",`(${t}, ${e})${i?` display=${i}`:""}`)}async drag(t,e,i,n,s=""){await this.computer.drag(t,e,i,n,s),this.log("drag",`(${t}, ${e}) \u2192 (${i}, ${n})${s?` display=${s}`:""}`)}async mouseDown(t,e,i=""){await this.computer.mouseDown(t,e,i),this.log("mouseDown",`(${t}, ${e})${i?` display=${i}`:""}`)}async mouseUp(t,e,i=""){await this.computer.mouseUp(t,e,i),this.log("mouseUp",`(${t}, ${e})${i?` display=${i}`:""}`)}async type(t){await this.computer.type(t),this.log("type",`"${t}"`)}async key(t){await this.computer.key(t),this.log("key",t)}async scroll(t,e,i,n,s=""){await this.computer.scroll(t,e,i,n,s),this.log("scroll",`(${t}, ${e}) dx=${i} dy=${n}${s?` display=${s}`:""}`)}async cursorPosition(t=""){let e=await this.computer.cursorPosition(t);return this.log("cursorPosition",`(${e.x}, ${e.y})${t?` display=${t}`:""}`),e}async display(t=""){let e=await this.computer.display(t);return this.log("display",`${e.width}x${e.height}${t?` display=${t}`:""}`),e}async listDisplays(){let t=await this.computer.listDisplays(),e=t.map(i=>`${i.isPrimary?"*":""}${i.displayId} "${i.label}" ${i.width}x${i.height}`).join("; ");return this.log("listDisplays",`${t.length} displays${e?`: ${e}`:""}`),t}async listWindows(){let t=await this.computer.listWindows(),e=new Map;for(let n of t){let s=n.displayId||"(unknown)",o=e.get(s)??{visible:0,minimized:0};n.isMinimized?o.minimized+=1:o.visible+=1,e.set(s,o)}let i=[...e.entries()].sort(([n],[s])=>n.localeCompare(s)).map(([n,s])=>`${n}: ${s.visible} visible, ${s.minimized} minimized`).join("; ");return this.log("listWindows",`${t.length} windows${i?`: ${i}`:""}`),t}async windowAtPoint(t,e,i){let n=await this.computer.windowAtPoint(t,e,i);return this.log("windowAtPoint",`display=${t||"(primary)"} (${e}, ${i}) -> ${n?`${n.windowId} title="${n.title}"`:"null"}`),n}async getActiveWindow(){let t=await this.computer.getActiveWindow();return this.log("getActiveWindow",t?`${t.windowId} title="${t.title}"`:"null"),t}async activateApplication(t){let e=await this.computer.activateApplication(t);return this.log("activateApplication",`${t} -> ${e}`),e}async concealApplication(t){let e=await this.computer.concealApplication(t);return this.log("concealApplication",`${t} -> ${e}`),e}async restoreApplication(t){let e=await this.computer.restoreApplication(t);return this.log("restoreApplication",`${t} -> ${e}`),e}async activateWindow(t){let e=await this.computer.activateWindow(t);return this.log("activateWindow",`${t} -> ${e}`),e}async concealWindow(t){let e=await this.computer.concealWindow(t);return this.log("concealWindow",`${t} -> ${e}`),e}async restoreWindow(t){let e=await this.computer.restoreWindow(t);return this.log("restoreWindow",`${t} -> ${e}`),e}async getClipboard(){let t=await this.computer.getClipboard(),e=t.slice(0,200)+(t.length>200?"\u2026":"");return this.log("getClipboard",`"${e}"`),t}async setClipboard(t){await this.computer.setClipboard(t);let e=t.slice(0,200)+(t.length>200?"\u2026":"");this.log("setClipboard",`"${e}" (${t.length} chars)`)}async getWindowState(t,e,i,n,s,o,r){let c=await this.computer.getWindowState(t,e,i,n,s,o,r);return this.log("getWindowState",`app=${t} windowId=${c.windowId??"(default)"} stateToken=${c.stateToken??"(none)"} diffSince=${r??"(none)"}`),c}async windowClick(t,e,i,n,s,o,r,c,a){let u=await this.computer.windowClick(t,e,i,n,s,o,r,c,a);return this.log("windowClick",`app=${t} ok=${u.ok}`),u}async invokeAction(t,e,i,n,s,o){let r=await this.computer.invokeAction(t,e,i,n,s,o);return this.log("invokeAction",`app=${t} element=${e} action=${i} ok=${r.ok}`),r}async windowScroll(t,e,i,n,s,o,r){let c=await this.computer.windowScroll(t,e,i,n,s,o,r);return this.log("windowScroll",`app=${t} element=${e} direction=${i} ok=${c.ok}`),c}async windowDrag(t,e,i,n,s,o,r,c){let a=await this.computer.windowDrag(t,e,i,n,s,o,r,c);return this.log("windowDrag",`app=${t} (${e},${i}) -> (${n},${s}) ok=${a.ok}`),a}async typeText(t,e,i,n,s,o){let r=await this.computer.typeText(t,e,i,n,s,o);return this.log("typeText",`app=${t} chars=${e.length} ok=${r.ok}`),r}async pressKey(t,e,i,n,s){let o=await this.computer.pressKey(t,e,i,n,s);return this.log("pressKey",`app=${t} key=${e} ok=${o.ok}`),o}async setValue(t,e,i,n,s,o){let r=await this.computer.setValue(t,e,i,n,s,o);return this.log("setValue",`app=${t} element=${e} chars=${i.length} ok=${r.ok}`),r}lock(t){return this.computer.lock(t)}unlock(){this.computer.unlock()}hideAppCursorOverlay(){this.computer.hideAppCursorOverlay()}async prepareForInput(t,e,i){let n=await this.computer.prepareForInput(t,e,i),s=i?` @ (${i.x},${i.y})`:"",o=e===null?"all":e.length.toString();return this.log("prepareForInput",`${t} -> ${n} (${o} apps)${s}`),n}async screenshot(t,e,i,n,s,o,r){let c=await this.computer.screenshot(t,e,i,n,s,o,r),a=n&&s?` (${n}x${s})`:"",u=o?` crop=[${o.join(",")}]`:"",p=`display=${t||"(primary)"}`,h=r===void 0?"":` quality=${r}`,m=`${p}${a}${u}${h}`;return this.logger.logScreenshot("screenshot",m,c,r),c}};import{createRequire as Vt}from"module";import{dirname as Jt,join as Kt}from"path";import{fileURLToPath as Yt}from"url";function S(l,t){let e=JSON.parse(l);if(!q(e))throw new Error(`Native ${t} returned an invalid JSON payload.`);return e}function Xt(l,t){let e=Gt(t);return{app:l,windowId:Q(t,["window","windowID"]),stateToken:Q(t,["stateToken"]),text:N(t,"text"),json:lt(t),image:e,diagnostics:t.diagnostics}}function R(l){return{ok:Qt(l,"ok")??!1,stateToken:N(l,"preStateToken")??N(l,"stateToken"),postStateToken:N(l,"postStateToken"),classification:N(l,"classification"),summary:N(l,"summary")??N(l,"classification"),warnings:Zt(l,["warnings"]).filter(t=>typeof t=="string"),diagnostics:l}}function Gt(l){let t=Q(l,["screenshot","image","imageBase64"]);if(!t)return;let e=Q(l,["screenshot","image","mimeType"]);return{data:t,mimeType:e==="image/jpeg"?"image/jpeg":"image/png"}}var ct=1e3;function lt(l){if(typeof l=="string"&&l.length>ct)return`${l.slice(0,ct)}...[truncated ${l.length-ct} chars]`;if(Array.isArray(l))return l.map(e=>lt(e));if(!q(l))return l;let t={};for(let[e,i]of Object.entries(l))t[e]=e==="imageBase64"?void 0:lt(i);return t}function Q(l,t){let e=l;for(let i of t){if(!q(e))return;e=e[i]}return typeof e=="string"?e:void 0}function Zt(l,t){let e=l;for(let i of t){if(!q(e))return[];e=e[i]}return Array.isArray(e)?e:[]}function N(l,t){return q(l)&&typeof l[t]=="string"?l[t]:void 0}function Qt(l,t){return q(l)&&typeof l[t]=="boolean"?l[t]:void 0}function q(l){return typeof l=="object"&&l!==null&&!Array.isArray(l)}var tt=class l{constructor(t,e){this.native=t;t.setLogger?.((i,n,s)=>{e?.log(this.nativeLogLevel(i),"Driver",n,s)})}native;static create(t){let e=Kt(Jt(Yt(import.meta.url)),"prebuilds",`${process.platform}-${process.arch}`,"computer.node"),i;try{i=Vt(import.meta.url)(e)}catch(n){let s=n instanceof Error?n.message:String(n);throw new Error(`Native computer bindings not available for ${process.platform}-${process.arch}: ${s} (path: ${e})`,{cause:n})}return new l(i,t)}async checkPermissions(t){return this.native.checkPermissions(t)}async requestPermissions(t){this.native.requestPermissions(t)}async click(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e);let[r,c]=this.toNative(o,t,e);this.native.click(r,c,i,n)}nativeLogLevel(t){switch(t){case 0:return"trace";case 1:return"debug";case 2:return"info";case 3:return"warn";case 4:return"error";default:return"info"}}async move(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.move(s,o)}async drag(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e),this.assertPointInBounds(o,i,n);let[r,c]=this.toNative(o,t,e),[a,u]=this.toNative(o,i,n);this.native.drag(r,c,a,u)}async mouseDown(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.mouseDown(s,o)}async mouseUp(t,e,i=""){let n=this.resolveCoordinateSpace(i);this.assertPointInBounds(n,t,e);let[s,o]=this.toNative(n,t,e);this.native.mouseUp(s,o)}async type(t){this.native.type(t)}async key(t){this.native.key(t)}async scroll(t,e,i,n,s=""){let o=this.resolveCoordinateSpace(s);this.assertPointInBounds(o,t,e);let[r,c]=this.toNative(o,t,e);this.native.scroll(r,c,i,n)}async cursorPosition(t=""){let e=this.native.cursorPosition(),i=this.resolveCoordinateSpace(t),[n,s]=this.fromNative(i,e.x,e.y);return{x:n,y:s}}async display(t=""){let e=this.resolveCoordinateSpace(t);return{width:e.targetWidth,height:e.targetHeight}}async screenshot(t,e,i,n,s,o,r){let c=this.resolveCoordinateSpace(t,n,s),a=r??-1;if(o){this.assertRegionEdgeInBounds(c,o[0],o[1]),this.assertRegionEdgeInBounds(c,o[2],o[3]);let[u,p,h,m]=this.toNativeCrop(c,o[0],o[1],o[2],o[3]);return this.native.screenshot(t,e,i,0,0,u,p,h,m,a)}return this.native.screenshot(t,e,i,c.targetWidth,c.targetHeight,0,0,0,0,a)}async capabilities(){let t=this.native.capabilities();return{...t,discovery:!!t.discovery&&typeof this.native.getActiveWindow=="function"&&typeof this.native.windowAtPoint=="function",state:!!t.state}}async listDisplays(){return this.native.listDisplays().map(e=>{let{targetWidth:i,targetHeight:n}=this.targetSizeForDisplay(e.bounds.width,e.bounds.height);return{displayId:e.displayId,label:e.label,width:i,height:n,isPrimary:e.isPrimary}})}async listWindows(){return this.native.listWindows()}async windowAtPoint(t,e,i){if(!this.native.windowAtPoint)return null;let n=this.resolveCoordinateSpace(t);this.assertPointInBounds(n,e,i);let[s,o]=this.toNative(n,e,i);return this.native.windowAtPoint(t,s,o)}async getActiveWindow(){return this.native.getActiveWindow?this.native.getActiveWindow():null}async activateApplication(t){return this.native.activateApplication(t)}async concealApplication(t){return this.native.concealApplication(t)}async restoreApplication(t){return this.native.restoreApplication(t)}async activateWindow(t){return this.native.activateWindow(t)}async concealWindow(t){return this.native.concealWindow(t)}async restoreWindow(t){return this.native.restoreWindow(t)}async getClipboard(){return this.native.getClipboard()}async setClipboard(t){this.native.setClipboard(t)}lock(t){return this.native.lock?.(t)??!1}unlock(){this.native.unlock?.()}hideAppCursorOverlay(){this.native.hideAppCursorOverlay?.()}async getWindowState(t,e,i,n,s,o,r){return Xt(t,S(this.native.getWindowState(t,e,i,n,s,o,r),"getWindowState"))}async windowClick(t,e,i,n,s,o,r,c,a){return R(S(this.native.windowClick(t,e,i,n,s,o,r,c,a),"windowClick"))}async invokeAction(t,e,i,n,s,o){return R(S(this.native.invokeAction(t,e,i,n,s,o),"invokeAction"))}async windowScroll(t,e,i,n,s,o,r){return R(S(this.native.windowScroll(t,e,i,n,s,o,r),"windowScroll"))}async windowDrag(t,e,i,n,s,o,r,c){return R(S(this.native.windowDrag(t,e,i,n,s,o,r,c),"windowDrag"))}async typeText(t,e,i,n,s,o){return R(S(this.native.typeText(t,e,i,n,s,o),"typeText"))}async pressKey(t,e,i,n,s){return R(S(this.native.pressKey(t,e,i,n,s),"pressKey"))}async setValue(t,e,i,n,s,o){return R(S(this.native.setValue(t,e,i,n,s,o),"setValue"))}async prepareForInput(t,e,i){let n=Number.NaN,s=Number.NaN;if(i){let o=this.resolveCoordinateSpace(t);this.assertPointInBounds(o,i.x,i.y),[n,s]=this.toNative(o,i.x,i.y)}return this.native.prepareForInput(t,e,i?.blockedWindowIds??[],n,s)}resolveDisplay(t){let e=this.native.listDisplays();return t!==""?e.find(i=>i.displayId===t):e.find(i=>i.isPrimary)??e[0]}targetSizeForDisplay(t,e,i=0,n=0){if(i>0&&n>0)return{targetWidth:i,targetHeight:n};let r=Math.min(1,1568/Math.max(t,e),Math.sqrt(115e4/(t*e)));return{targetWidth:Math.floor(t*r),targetHeight:Math.floor(e*r)}}resolveCoordinateSpace(t,e=0,i=0){let n=this.resolveDisplay(t),s=n?.bounds.x??0,o=n?.bounds.y??0,r=n?void 0:this.native.display(),c=n?.bounds.width??r?.width??1,a=n?.bounds.height??r?.height??1,{targetWidth:u,targetHeight:p}=this.targetSizeForDisplay(c,a,e,i);return{originX:s,originY:o,displayWidth:c,displayHeight:a,scaleX:u/c,scaleY:p/a,targetWidth:u,targetHeight:p}}toNative(t,e,i){return[t.originX+e/t.scaleX,t.originY+i/t.scaleY]}toNativeCrop(t,e,i,n,s){return[Math.floor(e/t.scaleX),Math.floor(i/t.scaleY),Math.ceil(n/t.scaleX),Math.ceil(s/t.scaleY)]}assertPointInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>=t.targetWidth||i>=t.targetHeight)throw new RangeError(`Coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}assertRegionEdgeInBounds(t,e,i){if(!Number.isFinite(e)||!Number.isFinite(i))throw new RangeError(`Coordinates must be finite numbers: x=${e}, y=${i}`);if(e<0||i<0||e>t.targetWidth||i>t.targetHeight)throw new RangeError(`Crop coordinates out of bounds for target display: x=${e}, y=${i}, width=${t.targetWidth}, height=${t.targetHeight}`)}fromNative(t,e,i){return[Math.round((e-t.originX)*t.scaleX),Math.round((i-t.originY)*t.scaleY)]}};var j=.8,ie=250,P={readOnlyHint:!0,destructiveHint:!1,openWorldHint:!1},b={readOnlyHint:!1,destructiveHint:!0,openWorldHint:!0},dt={readOnlyHint:!1,destructiveHint:!1,openWorldHint:!1},v=" Requires `request_access`.",bt=" If the target app is missing, call `list_applications` then `request_access`.",A=d.coerce.number().transform(Math.round),U=d.object({x:A.describe("Horizontal pixel coordinate."),y:A.describe("Vertical pixel coordinate.")}),ne=d.object({x1:A.describe("Left edge of the region in screenshot pixel coordinates."),y1:A.describe("Top edge of the region in screenshot pixel coordinates."),x2:A.describe("Right edge of the region in screenshot pixel coordinates."),y2:A.describe("Bottom edge of the region in screenshot pixel coordinates.")}).refine(({x1:l,x2:t})=>t>l,{message:"x2 must be greater than x1.",path:["x2"]}).refine(({y1:l,y2:t})=>t>l,{message:"y2 must be greater than y1.",path:["y2"]}),se=d.string().min(1).describe("Application name or stable app id from `list_applications`. Examples: 'Google Chrome', 'Microsoft Outlook', 'app.windows.abc123'."),oe=d.object({display_id:d.string().optional().describe("Display id to select. Omit or pass an empty string to use the default display.")}),re=d.object({display_id:d.string().optional().describe("Optional display id to filter by. When provided, only apps with at least one non-minimized window on that display are returned.")}),_t=U.optional().describe("{x, y} pixel coordinate. Clicks at current cursor position if omitted."),kt=d.enum(["left","right","middle"]).optional().describe("Mouse button to click (default left)."),At=d.coerce.number().int().min(1).max(3).optional().describe("Number of clicks: 1 single, 2 double, 3 triple (default 1)."),It=U.describe("{x, y} pixel coordinate to move the cursor to."),xt=U.describe("{x, y} pixel coordinate to start the drag from."),$t=U.describe("{x, y} pixel coordinate to drag to."),St=d.string().describe("The text to type."),Pt=d.string().describe("Key combo to press (e.g. 'Return', 'ctrl+s', 'alt+Tab')."),Ct=d.enum(["up","down","left","right"]).describe("Direction to scroll."),Wt=U.optional().describe("{x, y} pixel coordinate to scroll at. Scrolls at current cursor position if omitted."),Tt=d.number().int().nonnegative().max(100).optional().describe("Number of scroll clicks (default 3)."),ae=d.number().nonnegative().max(100).describe("Seconds to wait (max 100). Use after a screenshot shows the UI is not in the expected state yet, but an operation may still finish."),Rt=d.string().describe("The text to copy to the clipboard."),w={app:d.string().min(1).describe("App name or bundle identifier."),window_title_contains:d.string().min(1).optional().describe("Optional window title substring to choose a specific window."),window_id:d.string().min(1).optional().describe("Optional helper/native window id returned by a prior window-state response."),state_token:d.string().min(1).optional().describe("Optional state token returned by get_window_state. Pass it to detect stale UI state.")},H=d.string().min(1).describe("Element index or stable nodeId from get_window_state. Prefer nodeId when available."),ce=d.enum(["image","path","omit"]).describe("How to return screenshots. `image` returns MCP image content, `path` writes a local file and returns its path, `omit` skips capture. Defaults to omit; pass image or path when a visual snapshot is needed."),le=d.enum(["text","json","both"]).describe("Response format. `text` returns compact semantic state, `json` returns sanitized debug metadata, and `both` returns compact state plus sanitized JSON. Defaults to text."),de=d.enum(["none","text","image","path"]).describe("Post-action state to return. Defaults to none for low-latency action responses; pass text/image/path when a fresh snapshot is needed."),D={return_state:de.optional(),screenshot_out_file:d.string().min(1).optional().describe("Optional absolute path for return_state=path. Parent directories are created if needed."),max_nodes:d.number().int().positive().optional().describe("Maximum UI tree nodes for returned post-action state.")},pe=new Set(["modal_sheet_opened","window_closed","window_state_changed"]);function ue(l){if(l.classification)return l.classification;let t=l.diagnostics;if(typeof t!="object"||t===null||Array.isArray(t))return;let e=t.classification;return typeof e=="string"?e:void 0}function vt(l,t){if(t!=="window_closed")return!1;let e=l instanceof Error?l.message:String(l);return e.includes("No app window found")||e.includes("exposed no accessibility windows")||e.includes("Stale window_id")}var he=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,image_mode:ce.optional(),format:le.optional(),screenshot_out_file:d.string().min(1).optional().describe("Optional absolute path for image_mode=path. Parent directories are created if needed."),include_screenshot:d.boolean().optional().describe("Deprecated compatibility flag. Use image_mode instead. false maps to image_mode=omit."),include_tree:d.boolean().optional().describe("Whether to include the accessibility/UI tree. Defaults to true."),max_nodes:d.number().int().positive().optional().describe("Maximum UI tree nodes to return."),diff_since:d.string().min(1).optional().describe("Optional prior state_token. Returns compact semantic diff when native can compare against that snapshot."),diff_from_state_token:d.string().min(1).optional().describe("Alias for diff_since.")}),we=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.optional(),x:A.optional().describe("Window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),y:A.optional().describe("Window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),click_count:d.number().int().positive().max(2).optional().describe("Number of clicks. Defaults to 1."),mouse_button:d.enum(["left","right","middle"]).optional().describe("Mouse button. Defaults to left."),...D}),me=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H,action:d.string().min(1).describe("Accessibility action label from get_window_state."),...D}),ge=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.describe("Scrollable element index or stable nodeId from get_window_state."),direction:d.enum(["up","down","left","right"]).describe("Scroll direction."),pages:d.number().int().positive().optional().describe("Number of pages to scroll. Defaults to 1."),...D}),fe=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,from_x:A.describe("Start window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),from_y:A.describe("Start window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),to_x:A.describe("End window-local x coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),to_y:A.describe("End window-local y coordinate in the screenshot returned by get_window_state. Origin is the target window's top-left, not the desktop."),...D}),ye=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.optional().describe("Optional text element index or stable nodeId from get_window_state."),text:d.string().describe("Literal text to type."),...D}),be=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,key:d.string().min(1).optional().describe("Key or key-combination, e.g. command+a, Return, Escape, PageDown."),keys:d.string().min(1).optional().describe("Alias for `key`. Models sometimes emit `keys` instead; either name works."),...D}),ve=d.object({app:w.app,window_title_contains:w.window_title_contains,window_id:w.window_id,state_token:w.state_token,element_index:H.describe("Settable element index or stable nodeId from get_window_state."),value:d.string().describe("Value to set."),...D}),_e=d.object({action:d.literal("click"),coordinate:_t,button:kt,count:At}),ke=d.object({action:d.literal("mouse_move"),coordinate:It}),Ae=d.object({action:d.literal("left_click_drag"),start_coordinate:xt,coordinate:$t}),Ie=d.object({action:d.literal("left_mouse_down")}),xe=d.object({action:d.literal("left_mouse_up")}),$e=d.object({action:d.literal("type"),text:St}),Se=d.object({action:d.literal("key"),text:Pt}),Pe=d.object({action:d.literal("scroll"),scroll_direction:Ct,coordinate:Wt,scroll_amount:Tt}),Ce=d.object({action:d.literal("set_clipboard"),text:Rt}),We=d.discriminatedUnion("action",[_e,ke,Ae,Ie,xe,$e,Se,Pe,Ce]),Te=d.object({actions:d.array(We).min(1).describe("Ordered actions. Do not include waits; call the separate `wait` tool only after observing a screenshot that is not ready yet.")}),et=class l{_computer;_caps;_locked=null;_safetyTimer;static SAFETY_TIMEOUT_MS=180*1e3;server;logger;accessStore=new K;appNameResolver=new Y;lastUnsharedWindowsNote=null;selectedDisplay="";options;constructor(t={}){this.options={...t,window_state:t.window_state},process.env.DEBUG&&(this.logger=new G)}toText(t,e){let i=[{type:"text",text:t}];return e?{content:i,isError:!0}:{content:i}}toJson(t){return this.toText(JSON.stringify(t,null,2))}toImage(t,e,i){return t?{content:[{type:"image",data:t.toString("base64"),mimeType:this.imageMimeType(i),_meta:{screenshot:!0}}]}:this.toText(e,!0)}buildAccessMessage(t,e){let i=t.length===0?["","Computer Use runs on your actual desktop and can send mouse and keyboard input.","","Computer Use wants to control your desktop for this session.","","Apps that are not allowed may be hidden."]:["","Computer Use runs on your actual desktop and can send mouse and keyboard input to the apps you share.","","Computer Use wants to control these apps:","",...t.map(n=>`- ${n}`),"","Apps that are not allowed may be hidden."];return e&&i.push("","Reason:",e),i.join(`
|
|
41
|
+
`)}imageMimeType(t){return t!==void 0?"image/jpeg":"image/png"}collectDisplayIds(t,e){let i=new Set;for(let n of t)!n.isMinimized&&n.displayId&&(!e||n.applicationId===e)&&i.add(n.displayId);return[...i]}buildAllowedAppsInfo(t,e){return t.map(i=>({appId:i.id,name:i.displayName,displayIds:this.collectDisplayIds(e,i.id)}))}create(){let t=["This MCP server provides desktop automation tools (mouse, keyboard, screenshots, clipboard).",this.options.yolo?"YOLO mode is enabled. Call `request_access` once to auto-allow all current and future apps for this session.":"Before using access-gated tools (screenshot, click, type, clipboard, etc.), you MUST call `request_access` to start an access session.",this.options.window_state?"For a named target app, call `request_access` directly; it can find the app, select its display, and start the access session without capturing a screenshot. Use `list_applications` only for discovery or ambiguity.":"For a named target app, call `request_access` directly; it can find the app, select its display, and return the first screenshot. Use `list_applications` only for discovery or ambiguity.","Calling `request_access` with an empty apps array allows all visible apps on the selected display. If none are visible, it falls back to desktop access and empty desktop screenshots until apps are allowed.","Screenshots and zoom captures are composited to show ONLY the windows of allowed applications (plus system UI like the Dock). Disallowed app windows are not visible.",this.options.window_state?"For speed, use `get_window_state` after `request_access`; do not call `screenshot` unless visual pixels are needed.":"For speed, use returned screenshots instead of post-action `screenshot`, and batch predictable actions. Returned screenshots already include a small settle delay; do not add waits just for screenshot timing.","Prefer keyboard shortcuts, `type`, and `key` over visual menu navigation when reliable.","If `request_access` returns `allowAll=true`, all current and future apps are allowed for the rest of this session and you do not need to call `request_access` again.","Otherwise, if you launch a new app during the session, call `request_access` again to add it. Use `list_applications` first if you need to discover the app or its display."];this.options.window_state&&t.push("Call `get_window_state` once per turn before `click`, `scroll`, `drag`, `type_text`, `press_key`, `set_value`, or `invoke_action`. Pass the returned `state_token`, and prefer nodeId/element_index targets from `get_window_state` over screenshot coordinates.","`get_window_state` returns compact semantic text and is much faster than a desktop screenshot. Default `image_mode=omit` skips screenshot capture; pass `image` only when visual coordinates are needed. Its output labels action coordinates as window-local screenshot x/y with origin at the target window top-left; AX frames are screen/global and should not be passed directly to click/drag. Pass `diff_since` with a prior state_token for compact changes when useful, and use `format=json` or `format=both` only for sanitized debug metadata.","For background windows, close documents by pressing the close button element from `get_window_state`, then handle any sheet/dialog buttons from a refreshed app-level `get_window_state`. Do not use `press_key` for shortcuts like Cmd+W unless the app is foreground, and do not use `invoke_action` with an action that is not listed on that exact element.","Window-state actions show an animated non-interactive overlay cursor on macOS so the user can see what the window-scoped action is doing, even when the action uses Accessibility instead of a physical mouse click.","Raw coordinate/HID input tools (`left_click_drag`, `mouse_move`, `left_mouse_down`, `left_mouse_up`, `batch`) and the cross-platform `key`/`type` tools are intentionally NOT available on this platform. The window-scoped tools (`click`, `drag`, `scroll`, `type_text`, `press_key`, `set_value`, `invoke_action`) target windows directly, support stable element ids, and don't move the user's real cursor. For perception use `get_window_state`, `screenshot`, or `zoom`.");let e={name:"computer-use",version:"1.0.0"};this.server=new te(e,{instructions:t.join(`
|
|
42
|
+
`)});let i=d.object({method:d.literal("notifications/copilot"),params:d.object({type:d.string()}).passthrough()});return this.server.server.setNotificationHandler(i,n=>{switch(n.params.type){case"assistant.turn_start":this._locked===null&&(this._locked=!1),this.unlock();break;case"assistant.turn_end":this.unlock();break;case"user.abort":case"assistant.abort":this.unlock();break}}),this.server.registerTool("list_applications",{description:"List running apps and `selectedDisplay`. Use for discovery only; `request_access` can resolve named apps directly.",inputSchema:re.shape,annotations:P},n=>this.list_applications(n)),this.server.registerTool("request_access",{description:"Request app access by friendly name or appId; pass [] for all visible apps. For a named app, call this directly; it finds the app and selects its display. "+(this.options.window_state?"Returns JSON (`allowed`, `allowAll`, `message`, `allowedApps?`, `selectedDisplay?`); call `get_window_state` next for perception. ":"Returns JSON (`allowed`, `allowAll`, `message`, `allowedApps?`, `selectedDisplay?`) and, when allowed, a screenshot. ")+"`allowAll=true` covers future apps for this session.",inputSchema:{apps:d.array(se).describe('Applications to allow. Each entry can be a friendly name (e.g. "Outlook") or a stable appId, if already known. Pass an empty array to allow all apps on the selected display.'),reason:d.string().min(1).optional().describe("Optional reason text shown in the access dialog.")},annotations:dt},(n,s)=>this.request_access(n,s.signal)),this.server.registerTool("get_clipboard",{description:"Get the current text contents of the system clipboard."+v,annotations:P},()=>this.get_clipboard()),this.server.registerTool("set_clipboard",{description:"Set the system clipboard to the specified text."+v,inputSchema:{text:Rt},annotations:dt},n=>this.set_clipboard(n)),this.server.registerTool("wait",{description:"Pause, then return an updated screenshot if access is active. Use after a screenshot shows the UI is not ready yet.",inputSchema:{duration:ae.optional(),seconds:d.number().nonnegative().max(100).optional().describe("Alias for `duration`. Either name works.")},annotations:{...P,idempotentHint:!0}},n=>this.wait({duration:n.duration??n.seconds??0})),this.options.window_state?this.registerWindowStateTools():this.registerCoordinateTools(),this.server}registerCoordinateTools(){this.server.registerTool("list_displays",{description:"List available displays and the currently selected display used for screenshots, zoom, cursor position, and coordinate-based actions.",annotations:P},()=>this.list_displays()),this.server.registerTool("select_display",{description:"Select the active display used for screenshots, zoom, cursor position, and coordinate-based actions. Omit display_id or pass an empty string to use the default display.",inputSchema:oe.shape,annotations:dt},e=>this.select_display(e)),this.server.registerTool("screenshot",{description:"Capture the current filtered screen. "+bt+v,annotations:P},()=>this.screenshot()),this.server.registerTool("zoom",{description:"Capture a filtered screen region at full resolution."+bt+v,inputSchema:{region:ne.describe("{x1, y1, x2, y2} coordinates defining top-left and bottom-right corners of the region to capture.")},annotations:P},e=>this.zoom(e)),this.server.registerTool("cursor_position",{description:"Get the current cursor position in pixel coordinates. Returns {x, y}."+v,annotations:P},()=>this.cursor_position());let t={coordinate:_t,button:kt,count:At};this.server.registerTool("click",{description:"Click a mouse button. Defaults to a single left click; use `button` for right/middle click and `count` 2 or 3 for double/triple-click. Optionally move to a coordinate first."+v,inputSchema:t,annotations:b},e=>this.click(e)),this.server.registerTool("mouse_move",{description:"Move the mouse cursor to the specified pixel coordinate."+v,inputSchema:{coordinate:It},annotations:b},e=>this.mouse_move(e)),this.server.registerTool("left_click_drag",{description:"Click and drag from a start coordinate to an end coordinate."+v,inputSchema:{start_coordinate:xt,coordinate:$t},annotations:b},e=>this.left_click_drag(e)),this.server.registerTool("left_mouse_down",{description:"Press and hold the left mouse button at the current cursor position."+v,annotations:b},()=>this.left_mouse_down()),this.server.registerTool("left_mouse_up",{description:"Release the left mouse button at the current cursor position."+v,annotations:b},()=>this.left_mouse_up()),this.server.registerTool("type",{description:"Type a string for text input fields. Use `key` for physical-key controls."+v,inputSchema:{text:St},annotations:b},e=>this.type(e)),this.server.registerTool("key",{description:"Press a key or combo, e.g. 'Return', 'ctrl+s', 'alt+Tab', '1'."+v,inputSchema:{text:Pt},annotations:b},e=>this.key(e)),this.server.registerTool("scroll",{description:"Scroll the screen in a given direction at an optional coordinate."+v,inputSchema:{scroll_direction:Ct,coordinate:Wt,scroll_amount:Tt},annotations:b},e=>this.scroll(e)),this.server.registerTool("batch",{description:"Run ordered actions and return one final screenshot after a built-in settle delay. Use for predictable sequences; wait is intentionally not supported. Supported actions: click, mouse_move, left_click_drag, left_mouse_down, left_mouse_up, type, key, scroll, set_clipboard.",inputSchema:Te.shape,annotations:b},e=>this.batch(e))}registerWindowStateTools(){let t=" Call `get_window_state` once per turn before using window-state actions. Pass `state_token`, and prefer a nodeId/element_index target over screenshot coordinates; if coordinates are needed, use local_center/window-local hints, not screen@ AX frames. Actions show an animated overlay cursor on macOS.";this.server.registerTool("get_window_state",{description:"Get compact semantic state for a target app window. Defaults to text; use format=json/both only for sanitized debug metadata. Screenshots are returned according to `image_mode`: MCP image content, local file path, or omitted; base64 is never embedded in text. Output coordinate contract: click/drag x,y are window-local screenshot coordinates with origin at the target window top-left; screen@/AX frames are global screen coordinates and should not be used directly as action coordinates. Pass `diff_since` with a prior state_token for compact semantic changes when useful.",inputSchema:he.shape,annotations:P},e=>this.get_window_state(e)),this.server.registerTool("click",{description:"Click an element by numeric index or stable nodeId in `element_index`, or a window-local screenshot coordinate/local_center from `get_window_state` as fallback."+t,inputSchema:we.shape,annotations:b},e=>this.window_click(e)),this.server.registerTool("invoke_action",{description:"Invoke an accessibility action exposed by an element in `get_window_state`; the `action` string must come from that element's listed actions such as {press} or {confirm}, not an inferred command like `close` unless it is explicitly listed."+t,inputSchema:me.shape,annotations:b},e=>this.invoke_action(e)),this.server.registerTool("scroll",{description:"Scroll an element from `get_window_state` in an app window."+t,inputSchema:ge.shape,annotations:b},e=>this.scroll_window(e)),this.server.registerTool("drag",{description:"Drag between window-local screenshot coordinates from `get_window_state` (origin is the target window's top-left; do not use screen@ AX frame coordinates directly).",inputSchema:fe.shape,annotations:b},e=>this.drag_window(e)),this.server.registerTool("type_text",{description:"Type literal text into an app window, optionally targeting an element from `get_window_state`."+t,inputSchema:ye.shape,annotations:b},e=>this.type_text(e)),this.server.registerTool("press_key",{description:"Press a key or key-combination against an app window. Background apps cannot receive global shortcuts without activation; for background document close use the close button element from `get_window_state` instead of Cmd+W."+t,inputSchema:be.shape,annotations:b},e=>this.press_key(e)),this.server.registerTool("set_value",{description:"Set the value of a settable accessibility element from `get_window_state`."+t,inputSchema:ve.shape,annotations:b},e=>this.set_value(e))}lock(){this._locked===null||!this._computer||(this._locked||(this._locked=this._computer.lock(()=>{this.unlock(),this.server.server.notification({method:"notifications/copilot",params:{type:"user.abort"}}).catch(()=>{});let t=this.accessStore.getState().hostWindowId;t&&this._computer&&this._computer.activateWindow(t).catch(()=>{})})),this._safetyTimer&&clearTimeout(this._safetyTimer),this._safetyTimer=setTimeout(()=>this.unlock(),l.SAFETY_TIMEOUT_MS))}unlock(){this._computer&&this._computer.hideAppCursorOverlay(),!(this._locked===null||!this._computer)&&(this._safetyTimer&&(clearTimeout(this._safetyTimer),this._safetyTimer=void 0),this._locked&&this._computer.unlock(),this._locked=!1)}async computer(){return this._computer||(this._computer=tt.create(this.logger),this.logger&&(this._computer=new Z(this._computer,this.logger))),this._computer}logEvent(t,e){this.logger?.log("info","Server",t,e)}async capabilities(){if(!this._caps){let t=await this.computer();this._caps=await t.capabilities()}return this._caps}async requestPermission(){switch(process.platform){case"linux":{await(await this.computer()).display();break}case"darwin":{let t=await this.computer(),e=[],i=await t.checkPermissions("accessibility");i||e.push("Accessibility");let n=await t.checkPermissions("screen");if(n||e.push("Screen Recording"),e.length>0){if(!this.server.server.getClientCapabilities()?.elicitation?.form)break;let o=["",`Computer Use needs the following macOS permission${e.length>1?"s":""} to control your desktop:`,"",...e.map(a=>`\u2022 ${a}`),"",'Clicking "Open System Settings" will open the relevant settings page(s).',`Enable the permission${e.length>1?"s":""} for your terminal app, then click "I've granted the permissions".`,"","Note: You may need to restart your terminal after granting permissions for them to take effect."];await this.showHostWindow(t);let r=await this.server.server.elicitInput({mode:"form",message:o.join(`
|
|
43
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:`Grant ${e.join(" and ")} permission${e.length>1?"s":""}`,oneOf:[{const:"open",title:"1. Open System Settings"},{const:"done",title:"2. I've granted the permissions"},{const:"skip",title:"3. Skip (things may not work)"}]}},required:["action"]}}),c=r.action==="accept"?r.content?.action:void 0;if(c==="open"){i||await t.requestPermissions("accessibility"),n||await t.requestPermissions("screen"),await this.showHostWindow(t);let a=await this.server.server.elicitInput({mode:"form",message:["","System Settings has been opened.","",`Enable ${e.join(" and ")} for your terminal app.`,e.length>1?"Both settings pages have been opened \u2014 check each one.":"","",`After granting permissions, click "I've granted the permissions" below.`,"If the toggle was already on, try removing and re-adding your terminal app."].filter(Boolean).join(`
|
|
44
|
+
`),requestedSchema:{type:"object",properties:{action:{type:"string",title:"Confirm permissions",oneOf:[{const:"done",title:"1. I've granted the permissions"},{const:"skip",title:"2. Skip (things may not work)"}]}},required:["action"]}});if((a.action==="accept"?a.content?.action:void 0)!=="done")break}else if(c!=="done")break;if(i=await t.checkPermissions("accessibility"),n=await t.checkPermissions("screen"),!i||!n){let a=[];throw i||a.push("Accessibility"),n||a.push("Screen Recording"),new Error(`${a.join(" and ")} permission${a.length>1?"s are":" is"} still not detected. This usually means you need to restart your terminal after granting the permission. Please quit and reopen your terminal, then try again.`)}}break}}}async activeDisplayId(t){return this.selectedDisplay!==""?this.selectedDisplay:(await t.listDisplays()).find(i=>i.isPrimary)?.displayId??""}async requireAccess(){let t=this.accessStore.getState();if(!t.accessActive)throw new Error("No access session is active. Call request_access first to start a session.");let e=await this.computer();if(t.allowAll&&(await this.capabilities()).discovery){let{windows:n}=await this.filterHostWindow(e),s=T(n);this.accessStore.rememberApplications(s),this.appNameResolver.updateApplications(s),t=this.accessStore.getState()}return{computer:e,allowedAppIds:t.allowAll?null:t.allowedAppIds,hostWindowId:t.hostWindowId}}async hostWindowId(t){let e=this.accessStore.getState().hostWindowId;if(e)return e;if(!(await this.capabilities()).discovery)return null;let n=await t.getActiveWindow();return n&&(await t.listWindows()).some(o=>o.windowId===n.windowId)?n.windowId:null}async showHostWindow(t){let e=await this.hostWindowId(t);if(e)try{await t.activateWindow(e)}catch{}}async filterHostWindow(t,e){let i=e??await t.listWindows(),n=await this.hostWindowId(t);return{windows:n?i.filter(s=>s.windowId!==n):i,hostWindowId:n}}async prepareForInput(t,e,i){if(!(await this.capabilities()).discovery)return;let s=await this.activeDisplayId(t),o=i!==void 0;if((e===null||e.length===0)&&!o)return;if(!await t.prepareForInput(s,e,o?i:void 0)){if(o)throw new Error("Input blocked: a disallowed app at the target could not be hidden. Call `list_applications` to see what's there, then `request_access` to allow it.");let c=await t.getActiveWindow();throw c&&e!==null&&!e.includes(c.applicationId)?new Error(this.keyboardInputBlockedMessage(c)):new Error("Keyboard input blocked: an allowed application could not be focused. Click an allowed application first, or call `request_access` to allow it.")}}keyboardInputBlockedMessage(t){return`Keyboard input blocked: the focused application ("${t.applicationName}") is not allowed. Click an allowed application first, or call \`request_access\` to allow it.`}async validateActiveWindow(t,e){if(e===null||e.length===0||!(await this.capabilities()).discovery)return;let n=await t.getActiveWindow();if(n&&!e.includes(n.applicationId))throw new Error(this.keyboardInputBlockedMessage(n))}pointerInputBlockedMessage(t){return`Pointer input blocked: the focused application ("${t.applicationName}") is not allowed. Focus an allowed application first, or call \`request_access\` to allow it.`}async validatePointerInput(t,e,i){if(e===null||e.length===0||!(await this.capabilities()).discovery)return;let s=await t.getActiveWindow();if(s&&!e.includes(s.applicationId))throw new Error(this.pointerInputBlockedMessage(s));let o=await this.activeDisplayId(t),r=await t.windowAtPoint(o,i.x,i.y);if(!r||e.includes(r.applicationId))return;let c=r.applicationName||r.title||r.applicationId;throw new Error(`Pointer input blocked: the target location is covered by disallowed application ("${c}"). Call \`list_applications\` to see what's there, then \`request_access\` to allow it.`)}async list_displays(){let t=await this.computer(),e=await t.listDisplays(),i=await this.activeDisplayId(t);return this.toJson({displays:e,selectedDisplay:i})}async select_display({display_id:t}){let e=t??"",i=await this.computer(),n=await i.listDisplays();if(e!==""&&!n.some(r=>r.displayId===e))throw new Error(`Unknown display id '${e}'. Call list_displays first.`);this.selectedDisplay=e;let s=await this.activeDisplayId(i),o=n.find(r=>r.displayId===s)??null;return this.toJson({selectedDisplay:s,status:"selected",message:e===""?"Using the default display for screenshots, zoom, cursor position, and coordinate-based actions.":`Selected display '${o?.label??e}' for screenshots, zoom, cursor position, and coordinate-based actions.`})}async list_applications({display_id:t}={}){let e=await this.computer(),{windows:i}=await this.filterHostWindow(e),n=T(i);this.accessStore.rememberApplications(n),this.appNameResolver.updateApplications(n);let s=t??"";if(s!==""&&!(await e.listDisplays()).some(u=>u.displayId===s))throw new Error(`Unknown display id '${s}'. Call list_displays first.`);let o=s===""?i:i.filter(a=>!a.isMinimized&&a.displayId===s),r=T(o),c=await this.activeDisplayId(e);return this.toJson({selectedDisplay:c,applications:r})}async request_access({apps:t,reason:e},i){this.logEvent("request_access",`Start: apps=[${t.join(", ")}]${e?` reason="${e}"`:""}`),await this.requestPermission();let n={const:"allow",title:"Allow"},s={const:"allow_all",title:"Allow all apps (don't ask again)"},o={const:"deny",title:"Deny (Esc)"},r,c=async(g,y,L)=>{this.logEvent("request_access",`End: allowed=${g} allowAll=${y.allowAll} selectedDisplay=${this.selectedDisplay||"(default)"} hostWindowId=${y.hostWindowId??"null"} message="${L}"`);let $=r??(y.allowAll?void 0:y.allowedAppIds.map(_=>{let C=this.accessStore.tryGetKnownApplication(_);return{appId:_,name:C?.displayName,displayIds:this.collectDisplayIds(C?.windows??[])}})),M=this.toJson({allowed:g,allowAll:y.allowAll,...$?{allowedApps:$}:{},selectedDisplay:this.selectedDisplay||void 0,message:L});if(!g||this.options.window_state)return M;let O=await this.computer();this.lock();let st=y.hostWindowId?[y.hostWindowId]:[];return this.addScreenshot(M,O,y.allowAll?null:y.allowedAppIds,st)};if(this.options.yolo){let g=this.accessStore.allowApplications([],!0);return await c(!0,g,"YOLO mode is enabled. Auto-allowing full desktop access for this session.")}let a=await this.capabilities();if(!this.server.server.getClientCapabilities()?.elicitation?.form){let g=this.accessStore.allowApplications([],!0);return await c(!0,g,"Elicitation is not supported by this client. Auto-allowing full desktop access for this session.")}let p,h,m,E,it;if(a.discovery){let g=await this.computer(),y=this.accessStore.getState().hostWindowId,L=await g.listWindows(),$=y?L.filter(f=>f.windowId!==y):L;it=$;let M=T($);this.accessStore.rememberApplications(M),this.appNameResolver.updateApplications(M);let O=t&&t.length>0?"":await this.activeDisplayId(g);O&&(this.selectedDisplay=O);let st=new Set(y?[y]:[]),_;if(t&&t.length>0){let f=new Map;for(let k of t){let z=this.appNameResolver.resolve(k);if(z.length===0)throw new Error(`No matching application found for '${k}'. The app may not be running. On macOS, launch it first (e.g. via 'open -a "${k}"' through a shell tool), then call request_access again. Or call list_applications to see what is currently running.`);for(let W of z)if(!f.has(W.appId)){let J=this.accessStore.tryGetKnownApplication(W.appId);f.set(W.appId,J??{id:W.appId,displayName:W.displayName,windows:[]})}}_=[...f.values()];let Mt=new Set(f.keys()),V=new Map;for(let k of $)Mt.has(k.applicationId)&&!k.isMinimized&&k.displayId&&V.set(k.displayId,(V.get(k.displayId)??0)+1);if(V.size>0){let k="",z=0;for(let[W,J]of V)J>z&&(k=W,z=J);this.selectedDisplay=k}r=this.buildAllowedAppsInfo(_,$)}else _=T($.filter(f=>!f.isMinimized&&(O===""||f.displayId===O)&&!st.has(f.windowId)));let C=_.map(f=>f.id),ot=this.accessStore.getState(),F=_.length;h=["allow","allow_all"],m=C.length===0?ot.allowAll||ot.accessActive:this.accessStore.areAllowedForAccess(C),E=C.length===0?ot.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.":F===1?`'${_[0].displayName}' is already shared for this session.`:"The requested apps are already shared for this session.",p={choices:[n,s,o],message:this.buildAccessMessage(_.map(f=>f.displayName),e),denyMessage:F===0?"The user declined desktop access.":F===1?`The user declined to share '${_[0].displayName}'.`:"The user declined to share the requested apps.",allow:f=>this.accessStore.allowApplications(C,f==="allow_all"),allowedMessage:f=>f==="allow_all"?"All current and future apps are allowed for the rest of this session, so you do not need to call request_access again.":F===0?"Desktop access is active for this session.":F===1?`Access session started for '${_[0].displayName}'.`:"Access session started for the requested apps."}}else{let g=this.accessStore.getState();h=["allow_all"],m=g.allowAll||g.accessActive,E=g.allowAll?"Access is already configured to auto-allow future requests for this session, so you do not need to call request_access again.":"Desktop access is already active for this session.",p={choices:[s,o],message:this.buildAccessMessage([],e),denyMessage:"The user declined desktop access.",allow:()=>this.accessStore.allowApplications([],!0),allowedMessage:()=>"Access session started for the desktop. All current and future apps are allowed for the rest of this session, so you do not need to call request_access again."}}if(m){let g=this.accessStore.getState();return await c(!0,g,E)}this.accessStore.getState().hostWindowId&&await this.showHostWindow(await this.computer());let nt=await this.server.server.request({method:"elicitation/create",params:{mode:"form",message:p.message,requestedSchema:{type:"object",properties:{decision:{type:"string",title:"Allow access for this session?",oneOf:p.choices}},required:["decision"]}}},ee,{signal:i,timeout:300*1e3}),Dt=await this.computer(),pt=await this.hostWindowId(Dt);pt&&this.accessStore.setHostWindowId(pt);let x=nt.action==="accept"?nt.content?.decision:void 0;if(nt.action!=="accept"||!x||typeof x!="string"||!h.includes(x)){let g=this.accessStore.getState(),y=typeof x=="string"&&x!==o.const?`The user did not approve this access request. User response: ${x}`:p.denyMessage;return await c(!1,g,y)}let ut=p.allow(x);return ut.allowAll&&it&&(r=this.buildAllowedAppsInfo(this.accessStore.allKnownApplications(),it)),await c(!0,ut,p.allowedMessage(x))}windowStateAuthCache=new Map;windowStateAuthCacheKey(t,e,i){return`${e}\0${t.toLowerCase()}\0${i?.toLowerCase()??""}`}async requireWindowStateAccess(t,e,i){let{computer:n,allowedAppIds:s}=await this.requireAccess();if(e){let h=this.windowStateAuthCacheKey(t,e,i),m=this.windowStateAuthCache.get(h);if(m&&(s===null||s.includes(m.appId)))return n}let o=await n.listWindows();this.appNameResolver.updateApplications(T(o));let r=new Set(this.appNameResolver.resolve(t).map(h=>h.appId)),c=h=>r.has(h.applicationId),a=(e?o.filter(h=>h.windowId===e):o.filter(c)).filter(h=>!i||h.title.toLowerCase().includes(i.toLowerCase()));if(a.length===0)throw new Error(`Window-state target not found for "${t}". Call list_applications, then pass the intended app/window_id.`);if(e&&!a.some(c)){let h=a[0];throw new Error(`Window-state target blocked: window_id "${e}" belongs to "${h.applicationName}", not "${t}".`)}let u=[...new Set(a.map(h=>h.applicationId))];if(u.length!==1)throw new Error(`Window-state target is ambiguous for "${t}". Candidate apps: ${a.map(h=>`${h.applicationName} (${h.applicationId})`).join(", ")}.`);let p=u[0];if(s!==null&&!s.includes(p)){let h=a[0]?.applicationName??t;throw new Error(`Window-state input blocked: "${h}" is not shared. Call request_access to allow it.`)}return e&&this.windowStateAuthCache.set(this.windowStateAuthCacheKey(t,e,i),{appId:p}),n}async getWindowStateAction(t,e,i){let n=await e,s=n.ok?this.toJson(n):{...this.toJson(n),isError:!0},o=i.return_state??"none";if(!n.ok||o==="none")return s;await new Promise(p=>setTimeout(p,150));let r=o==="image"||o==="path"?o:"omit",c=ue(n),a=c!==void 0&&pe.has(c),u=async(p,h)=>{let m=await t.getWindowState(i.app,p,h,r!=="omit",!0,i.max_nodes),E=new B(m).toToolResult({imageMode:r,format:"text",screenshotOutFile:i.screenshot_out_file});return{content:[...s.content,...E.content],isError:s.isError}};try{return await u(a?void 0:i.window_title_contains,a?void 0:i.window_id)}catch(p){if(vt(p,c))return s;if(!a)try{return await u()}catch(m){if(vt(m,c))return s}let h=`post_state_error: ${p instanceof Error?p.message:String(p)}`;return this.toJson({...n,warnings:[...n.warnings??[],h]})}}async get_window_state(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains),i=t.image_mode??(t.include_screenshot===!0?"image":"omit");if(t.include_screenshot===!1&&i!=="omit")throw new Error("get_window_state include_screenshot=false conflicts with image_mode. Use image_mode=omit.");if(t.include_screenshot===!0&&t.image_mode==="omit")throw new Error("get_window_state include_screenshot=true conflicts with image_mode=omit.");let n=i!=="omit";return new B(await e.getWindowState(t.app,t.window_title_contains,t.window_id,n,t.include_tree,t.max_nodes,t.diff_since??t.diff_from_state_token)).toToolResult({imageMode:i,format:t.format??"text",screenshotOutFile:t.screenshot_out_file})}async window_click(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);if(!t.element_index&&(t.x===void 0||t.y===void 0))throw new Error("click requires either element_index or both x and y from get_window_state.");if(t.element_index&&(t.x!==void 0||t.y!==void 0))throw new Error("click accepts either element_index or coordinates, not both.");return this.getWindowStateAction(e,e.windowClick(t.app,t.element_index,t.x,t.y,t.click_count,t.mouse_button,t.window_title_contains,t.window_id,t.state_token),t)}async invoke_action(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.invokeAction(t.app,t.element_index,t.action,t.window_title_contains,t.window_id,t.state_token),t)}async scroll_window(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.windowScroll(t.app,t.element_index,t.direction,t.pages,t.window_title_contains,t.window_id,t.state_token),t)}async drag_window(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.windowDrag(t.app,t.from_x,t.from_y,t.to_x,t.to_y,t.window_title_contains,t.window_id,t.state_token),t)}async type_text(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.typeText(t.app,t.text,t.element_index,t.window_title_contains,t.window_id,t.state_token),t)}async press_key(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains),i=t.key??t.keys;if(!i)throw new Error("press_key requires `key` (or its alias `keys`).");return this.getWindowStateAction(e,e.pressKey(t.app,i,t.window_title_contains,t.window_id,t.state_token),t)}async set_value(t){let e=await this.requireWindowStateAccess(t.app,t.window_id,t.window_title_contains);return this.getWindowStateAction(e,e.setValue(t.app,t.element_index,t.value,t.window_title_contains,t.window_id,t.state_token),t)}async screenshot(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[];return this.captureScreenshot(t,e,n)}logScreenshotFilter(t,e){let i=r=>{let c=this.accessStore.tryGetKnownApplication(r);return c?`${c.displayName} (${r})`:r},n=r=>{let c=r.includes("|")?r.split("|")[1]:r;return i(c)},s=(r,c)=>c.length===1?`${r}: ${c[0]}`:`${r}:
|
|
36
45
|
${c.map(a=>`- ${a}`).join(`
|
|
37
|
-
`)}`,
|
|
38
|
-
`))}async captureScreenshot(t,e,i){this.logScreenshotFilter(e,i);let
|
|
46
|
+
`)}`,o=[t===null?"allowed: (all apps)":t.length?s("allowed",t.map(i)):"allowed: (no app windows)"];e.length&&o.push(s("blocked",e.map(n))),this.logEvent("screenshot filter",o.join(`
|
|
47
|
+
`))}async captureScreenshot(t,e,i){this.logScreenshotFilter(e,i);let n=await t.screenshot(this.selectedDisplay,e,i,0,0,void 0,j),s=this.toImage(n,"Screenshot failed",j);if(n){let o=await this.unsharedWindowsNote(t,e,i);this.appendUnsharedWindowsNote(s,o,!0)}return s}async addScreenshot(t,e,i,n){await new Promise(r=>setTimeout(r,ie)),this.logScreenshotFilter(i,n);let s=null;try{s=await e.screenshot(this.selectedDisplay,i,n,0,0,void 0,j)}catch{}if(!s)return t.content.push({type:"text",text:"Screenshot capture failed"}),t;t.content.push({type:"image",data:s.toString("base64"),mimeType:this.imageMimeType(j),_meta:{screenshot:!0}});let o=await this.unsharedWindowsNote(e,i,n);return this.appendUnsharedWindowsNote(t,o,!1),t}appendUnsharedWindowsNote(t,e,i){if(!e){this.lastUnsharedWindowsNote=null;return}(i||e!==this.lastUnsharedWindowsNote)&&t.content.push({type:"text",text:e}),this.lastUnsharedWindowsNote=e}async unsharedWindowsNote(t,e,i){if(e===null||!(await this.capabilities()).discovery)return null;let s=await this.activeDisplayId(t),o=new Set(e),r=new Set(i),{windows:c}=await this.filterHostWindow(t),a=c.filter(p=>!p.isMinimized&&(s===""||p.displayId===s)&&!o.has(p.applicationId)&&!r.has(p.windowId));if(a.length===0)return null;let u=a.length===1?"window":"windows";return`${a.length} other ${u} on this display ${a.length===1?"is":"are"} from an unshared app. The screenshot may not show everything that's running. Use \`list_applications\` to see what else is there if you need to.`}async cursor_position(){let e=await(await this.computer()).cursorPosition(this.selectedDisplay);return this.toText(`${e.x},${e.y}`)}async _click(t,e,i,n,s,o,r){let c;switch(o){case 1:c="";break;case 2:c="double ";break;case 3:c="triple ";break;default:throw new Error(`Invalid click count '${o}'. Expected 1, 2, or 3.`)}let a=r??await t.cursorPosition(n);await this.prepareForInput(t,e,{x:a.x,y:a.y,blockedWindowIds:i}),await this.validatePointerInput(t,e,a),r&&await t.move(a.x,a.y,n),await t.click(a.x,a.y,s,o,n);let u=r?` at (${r.x},${r.y})`:"",p=s==="left"?"":`${s} `;return`${c}${p}click${u}`}async click({coordinate:t,button:e,count:i}){let{computer:n,allowedAppIds:s,hostWindowId:o}=await this.requireAccess();this.lock();let r=o?[o]:[],c=await this._click(n,s,r,this.selectedDisplay,e??"left",i??1,t);return this.addScreenshot(this.toText(c),n,s,r)}async mouse_move({coordinate:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await this.prepareForInput(e,i,{x:t.x,y:t.y,blockedWindowIds:s}),await this.validatePointerInput(e,i,t),await e.move(t.x,t.y,this.selectedDisplay),this.addScreenshot(this.toText(`Moved to (${t.x},${t.y})`),e,i,s)}async left_click_drag({start_coordinate:t,coordinate:e}){let{computer:i,allowedAppIds:n,hostWindowId:s}=await this.requireAccess();this.lock();let o=s?[s]:[];return await this.prepareForInput(i,n,{x:t.x,y:t.y,blockedWindowIds:o}),await this.prepareForInput(i,n,{x:e.x,y:e.y,blockedWindowIds:o}),await this.validatePointerInput(i,n,t),await this.validatePointerInput(i,n,e),await i.drag(t.x,t.y,e.x,e.y,this.selectedDisplay),this.addScreenshot(this.toText(`Dragged (${t.x},${t.y}) -> (${e.x},${e.y})`),i,n,o)}async left_mouse_down(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[],s=await t.cursorPosition(this.selectedDisplay);return await this.prepareForInput(t,e,{x:s.x,y:s.y,blockedWindowIds:n}),await this.validatePointerInput(t,e,s),await t.mouseDown(s.x,s.y,this.selectedDisplay),this.addScreenshot(this.toText(`Mouse down at (${s.x},${s.y})`),t,e,n)}async left_mouse_up(){let{computer:t,allowedAppIds:e,hostWindowId:i}=await this.requireAccess();this.lock();let n=i?[i]:[],s=await t.cursorPosition(this.selectedDisplay);return await this.prepareForInput(t,e,{x:s.x,y:s.y,blockedWindowIds:n}),await this.validatePointerInput(t,e,s),await t.mouseUp(s.x,s.y,this.selectedDisplay),this.addScreenshot(this.toText(`Mouse up at (${s.x},${s.y})`),t,e,n)}async type({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await this.prepareForInput(e,i),await this.validateActiveWindow(e,i),await e.type(t),this.addScreenshot(this.toText(`Typed ${t.length} chars`),e,i,s)}async key({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];await this.prepareForInput(e,i),await this.validateActiveWindow(e,i);let o=rt(t);return await e.key(o),this.addScreenshot(this.toText(`Pressed ${o}`),e,i,s)}async scroll({scroll_direction:t,coordinate:e,scroll_amount:i}){let{computer:n,allowedAppIds:s,hostWindowId:o}=await this.requireAccess();this.lock();let r=o?[o]:[],c=i??3,a=await n.cursorPosition(this.selectedDisplay),u=e?.x??a.x,p=e?.y??a.y;await this.prepareForInput(n,s,{x:u,y:p,blockedWindowIds:r}),await this.validatePointerInput(n,s,{x:u,y:p});let h=t==="left"?-c:t==="right"?c:0,m=t==="down"?c:t==="up"?-c:0;return await n.scroll(u,p,h,m,this.selectedDisplay),this.addScreenshot(this.toText(`Scrolled ${t} ${c} at (${u},${p})`),n,s,r)}async wait({duration:t}){this.logEvent("wait",`duration=${String(t)}s`),await new Promise(r=>setTimeout(r,t*1e3));let e=this.toText(`Waited ${t}s`);if(!this.accessStore.getState().accessActive)return e;let{computer:i,allowedAppIds:n,hostWindowId:s}=await this.requireAccess(),o=s?[s]:[];return this.addScreenshot(e,i,n,o)}async get_clipboard(){let{computer:t}=await this.requireAccess();this.lock();let e=await t.getClipboard();return this.toText(e)}async set_clipboard({text:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[];return await e.setClipboard(t),this.addScreenshot(this.toText(`Clipboard set (${t.length} chars)`),e,i,s)}async zoom({region:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock();let s=n?[n]:[],o=await e.screenshot(this.selectedDisplay,i,s,0,0,[t.x1,t.y1,t.x2,t.y2],j);return this.toImage(o,"Zoom screenshot failed",j)}async batch({actions:t}){let{computer:e,allowedAppIds:i,hostWindowId:n}=await this.requireAccess();this.lock(),this.logEvent("batch",`${t.length} actions: ${t.map(c=>c.action).join(", ")}`);let s=this.selectedDisplay,o=n?[n]:[],r=[];for(let c=0;c<t.length;c++){let a=t[c];try{switch(a.action){case"click":{let u=await this._click(e,i,o,s,a.button??"left",a.count??1,a.coordinate);r.push(`[${c}] ${u}`);break}case"mouse_move":await this.prepareForInput(e,i,{x:a.coordinate.x,y:a.coordinate.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,a.coordinate),await e.move(a.coordinate.x,a.coordinate.y,s),r.push(`[${c}] moved to (${a.coordinate.x},${a.coordinate.y})`);break;case"left_click_drag":await this.prepareForInput(e,i,{x:a.start_coordinate.x,y:a.start_coordinate.y,blockedWindowIds:o}),await this.prepareForInput(e,i,{x:a.coordinate.x,y:a.coordinate.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,a.start_coordinate),await this.validatePointerInput(e,i,a.coordinate),await e.drag(a.start_coordinate.x,a.start_coordinate.y,a.coordinate.x,a.coordinate.y,s),r.push(`[${c}] dragged (${a.start_coordinate.x},${a.start_coordinate.y}) -> (${a.coordinate.x},${a.coordinate.y})`);break;case"left_mouse_down":{let u=await e.cursorPosition(s);await this.prepareForInput(e,i,{x:u.x,y:u.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,u),await e.mouseDown(u.x,u.y,s),r.push(`[${c}] mouse down at (${u.x},${u.y})`);break}case"left_mouse_up":{let u=await e.cursorPosition(s);await this.prepareForInput(e,i,{x:u.x,y:u.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,u),await e.mouseUp(u.x,u.y,s),r.push(`[${c}] mouse up at (${u.x},${u.y})`);break}case"type":await this.prepareForInput(e,i),await this.validateActiveWindow(e,i),await e.type(a.text),r.push(`[${c}] typed ${a.text.length} chars`);break;case"key":{await this.prepareForInput(e,i),await this.validateActiveWindow(e,i);let u=rt(a.text);await e.key(u),r.push(`[${c}] key ${a.text}`);break}case"scroll":{let u=a.scroll_amount??3,p=a.coordinate??await e.cursorPosition(s);await this.prepareForInput(e,i,{x:p.x,y:p.y,blockedWindowIds:o}),await this.validatePointerInput(e,i,p);let h=a.scroll_direction==="left"?-u:a.scroll_direction==="right"?u:0,m=a.scroll_direction==="down"?u:a.scroll_direction==="up"?-u:0;await e.scroll(p.x,p.y,h,m,s),r.push(`[${c}] scroll ${a.scroll_direction}${a.coordinate?` at (${p.x},${p.y})`:""}`);break}case"set_clipboard":await e.setClipboard(a.text),r.push(`[${c}] clipboard set`);break}}catch(u){return r.push(`[${c}] ${a.action}: FAILED - ${u instanceof Error?u.message:String(u)}`),this.toText(r.join(`
|
|
39
48
|
`),!0)}}return this.addScreenshot(this.toText(r.join(`
|
|
40
|
-
`)),e,i,
|
|
41
|
-
`)
|
|
42
|
-
`)),process.exit(s==="-h"||s==="--help"?0:1));let t=vt({yolo:d});async function e(){await t.close(),process.exit(0)}process.on("SIGINT",()=>{e()}),process.on("SIGTERM",()=>{e()});let i=new Xt;await t.connect(i)}Kt().catch(d=>{console.error(d),process.exit(1)});
|
|
49
|
+
`)),e,i,o)}};function Nt(l={}){return process.env.COPILOT_COMPUTER_USE_WINDOW_STATE&&(l.window_state=!0),process.env.COPILOT_COMPUTER_USE_YOLO&&(l.yolo=!0),new et(l).create()}async function Ne(){let l=!1;for(let n of process.argv.slice(2))n==="--yolo"?l=!0:(console.log(["Usage: computer-use-mcp [--yolo]","","Options:"," --yolo Auto-allow full desktop access when request_access is called."," -h, --help Show this help message."].join(`
|
|
50
|
+
`)),process.exit(n==="-h"||n==="--help"?0:1));let t=Nt({yolo:l});async function e(){await t.close(),process.exit(0)}process.on("SIGINT",()=>{e()}),process.on("SIGTERM",()=>{e()});let i=new Re;await t.connect(i)}Ne().catch(l=>{console.error(l),process.exit(1)});
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@github/computer-use-mcp",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.30",
|
|
4
4
|
"description": "Computer Use MCP Server",
|
|
5
5
|
"author": "GitHub",
|
|
6
6
|
"repository": {
|
|
@@ -24,14 +24,16 @@
|
|
|
24
24
|
"dist/prebuilds"
|
|
25
25
|
],
|
|
26
26
|
"scripts": {
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
27
|
+
"clean": "node package.js clean",
|
|
28
|
+
"build": "node package.js build",
|
|
29
|
+
"start": "node package.js start",
|
|
30
|
+
"serve": "npm run build:js && node dist/main.js",
|
|
31
|
+
"build:js": "esbuild src/index.ts src/main.ts --bundle --minify --legal-comments=none --format=esm --platform=node --packages=external --outdir=dist && dts-bundle-generator src/index.ts -o dist/index.d.ts --no-banner --export-referenced-types false",
|
|
30
32
|
"build:linux": "cd computer/linux && bash build.sh",
|
|
31
33
|
"build:mac": "cd computer/mac && bash build.sh",
|
|
32
34
|
"build:win": "cd computer/win && powershell -ExecutionPolicy Bypass -File build.ps1",
|
|
33
|
-
"lint": "
|
|
34
|
-
"test": "
|
|
35
|
+
"lint": "node package.js lint",
|
|
36
|
+
"test": "node package.js test",
|
|
35
37
|
"publish:patch": "npm version patch -m \"v%s\" && git push && git push --tags",
|
|
36
38
|
"publish:minor": "npm version minor -m \"v%s\" && git push && git push --tags",
|
|
37
39
|
"publish:major": "npm version major -m \"v%s\" && git push && git push --tags"
|