polarisagi-computer-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +310 -0
- package/package.json +24 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
4
|
+
if (k2 === undefined) k2 = k;
|
|
5
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
6
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
7
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
8
|
+
}
|
|
9
|
+
Object.defineProperty(o, k2, desc);
|
|
10
|
+
}) : (function(o, m, k, k2) {
|
|
11
|
+
if (k2 === undefined) k2 = k;
|
|
12
|
+
o[k2] = m[k];
|
|
13
|
+
}));
|
|
14
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
15
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
16
|
+
}) : function(o, v) {
|
|
17
|
+
o["default"] = v;
|
|
18
|
+
});
|
|
19
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
20
|
+
var ownKeys = function(o) {
|
|
21
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
22
|
+
var ar = [];
|
|
23
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
24
|
+
return ar;
|
|
25
|
+
};
|
|
26
|
+
return ownKeys(o);
|
|
27
|
+
};
|
|
28
|
+
return function (mod) {
|
|
29
|
+
if (mod && mod.__esModule) return mod;
|
|
30
|
+
var result = {};
|
|
31
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
32
|
+
__setModuleDefault(result, mod);
|
|
33
|
+
return result;
|
|
34
|
+
};
|
|
35
|
+
})();
|
|
36
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
+
const readline = __importStar(require("readline"));
|
|
38
|
+
const nut_js_1 = require("@nut-tree-fork/nut-js");
|
|
39
|
+
const child_process_1 = require("child_process");
|
|
40
|
+
const util_1 = require("util");
|
|
41
|
+
const fs = __importStar(require("fs/promises"));
|
|
42
|
+
const os = __importStar(require("os"));
|
|
43
|
+
const path = __importStar(require("path"));
|
|
44
|
+
const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
|
|
45
|
+
async function captureScreen() {
|
|
46
|
+
const tmpPath = path.join(os.tmpdir(), `screenshot-${Date.now()}.png`);
|
|
47
|
+
try {
|
|
48
|
+
if (process.platform === 'darwin') {
|
|
49
|
+
await execFileAsync('screencapture', ['-x', tmpPath]);
|
|
50
|
+
}
|
|
51
|
+
else if (process.platform === 'win32') {
|
|
52
|
+
const psScript = `
|
|
53
|
+
Add-Type -AssemblyName System.Windows.Forms;
|
|
54
|
+
Add-Type -AssemblyName System.Drawing;
|
|
55
|
+
$Screen = [System.Windows.Forms.SystemInformation]::VirtualScreen;
|
|
56
|
+
$Width = $Screen.Width; $Height = $Screen.Height;
|
|
57
|
+
$Left = $Screen.Left; $Top = $Screen.Top;
|
|
58
|
+
$bitmap = New-Object System.Drawing.Bitmap $Width, $Height;
|
|
59
|
+
$graphic = [System.Drawing.Graphics]::FromImage($bitmap);
|
|
60
|
+
$graphic.CopyFromScreen($Left, $Top, 0, 0, $bitmap.Size);
|
|
61
|
+
$bitmap.Save('${tmpPath}');
|
|
62
|
+
`;
|
|
63
|
+
await execFileAsync('powershell', ['-Command', psScript]);
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
try {
|
|
67
|
+
// Try scrot first
|
|
68
|
+
await execFileAsync('scrot', [tmpPath]);
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
// Fallback to ImageMagick (import)
|
|
72
|
+
await execFileAsync('import', ['-window', 'root', tmpPath]);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const buffer = await fs.readFile(tmpPath);
|
|
76
|
+
return buffer.toString('base64');
|
|
77
|
+
}
|
|
78
|
+
finally {
|
|
79
|
+
try {
|
|
80
|
+
await fs.unlink(tmpPath);
|
|
81
|
+
}
|
|
82
|
+
catch (e) {
|
|
83
|
+
// ignore
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const rl = readline.createInterface({
|
|
88
|
+
input: process.stdin,
|
|
89
|
+
output: process.stdout,
|
|
90
|
+
terminal: false
|
|
91
|
+
});
|
|
92
|
+
function sendResult(id, result) {
|
|
93
|
+
if (id === undefined || id === null)
|
|
94
|
+
return;
|
|
95
|
+
const resp = {
|
|
96
|
+
jsonrpc: "2.0",
|
|
97
|
+
id,
|
|
98
|
+
result
|
|
99
|
+
};
|
|
100
|
+
console.log(JSON.stringify(resp));
|
|
101
|
+
}
|
|
102
|
+
function sendError(id, code, message) {
|
|
103
|
+
const resp = {
|
|
104
|
+
jsonrpc: "2.0",
|
|
105
|
+
id,
|
|
106
|
+
error: { code, message }
|
|
107
|
+
};
|
|
108
|
+
console.log(JSON.stringify(resp));
|
|
109
|
+
}
|
|
110
|
+
rl.on('line', async (line) => {
|
|
111
|
+
if (!line.trim())
|
|
112
|
+
return;
|
|
113
|
+
let req;
|
|
114
|
+
try {
|
|
115
|
+
req = JSON.parse(line);
|
|
116
|
+
}
|
|
117
|
+
catch (e) {
|
|
118
|
+
sendError(null, -32700, "Parse error: " + e);
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
const id = req.id;
|
|
122
|
+
try {
|
|
123
|
+
if (req.method === "initialize") {
|
|
124
|
+
sendResult(id, {
|
|
125
|
+
protocolVersion: "2024-11-05",
|
|
126
|
+
capabilities: { tools: {}, prompts: {} },
|
|
127
|
+
serverInfo: {
|
|
128
|
+
name: "polaris-computer-mcp",
|
|
129
|
+
version: "0.1.0"
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
else if (req.method === "prompts/list") {
|
|
134
|
+
sendResult(id, {
|
|
135
|
+
prompts: [{
|
|
136
|
+
name: "computer_use_guidelines",
|
|
137
|
+
description: "Standard system guidelines for AI agents using the computer plugin.",
|
|
138
|
+
arguments: []
|
|
139
|
+
}]
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
else if (req.method === "prompts/get") {
|
|
143
|
+
const promptName = req.params?.name;
|
|
144
|
+
if (promptName === "computer_use_guidelines") {
|
|
145
|
+
sendResult(id, {
|
|
146
|
+
description: "System Prompt for Computer Use",
|
|
147
|
+
messages: [
|
|
148
|
+
{
|
|
149
|
+
role: "user",
|
|
150
|
+
content: {
|
|
151
|
+
type: "text",
|
|
152
|
+
text: "You have permission to operate a real computer. Please strictly follow these visual recognition and operation guidelines:\n\n1. Visual Feedback Loop: Before executing any click (`left_click`) or input (`type`/`paste`), you MUST call `screenshot` to get the current screen state. Analyze the screenshot carefully to find the absolute coordinates `[x, y]` of the target UI element before clicking.\n\n2. Handling UI States: UI elements may have loading animations or network delays. After interacting with an element (e.g. clicking 'search'), do not assume the action completed instantly. You MUST call `screenshot` again to verify the new UI state (like checking if a dropdown menu appeared) before proceeding.\n\n3. Text Input Rules: When inputting Chinese characters or long text, you MUST prioritize the `paste` action. This bypasses input method editor (IME) interference and prevents text truncation. Only use `type` or `key` for pure English characters or shortcuts.\n\n4. Error Recovery: If the screenshot does not match your expectations (e.g. click failed or text was typed in the wrong place), analyze the screen, use `mouse_move` to move the focus away, or send `key: escape` to cancel the current state, and retry."
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
sendError(id, -32602, "Prompt not found");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
else if (req.method === "tools/list") {
|
|
163
|
+
sendResult(id, {
|
|
164
|
+
tools: [{
|
|
165
|
+
name: "computer",
|
|
166
|
+
description: "Execute computer actions like clicking, typing, and taking screenshots.",
|
|
167
|
+
inputSchema: {
|
|
168
|
+
type: "object",
|
|
169
|
+
properties: {
|
|
170
|
+
action: {
|
|
171
|
+
type: "string",
|
|
172
|
+
enum: ["screenshot", "left_click", "right_click", "middle_click", "double_click", "mouse_move", "left_click_drag", "cursor_position", "type", "key", "paste"],
|
|
173
|
+
description: "The action to perform. left_click_drag: press at current position, move to coordinate, release. paste: copy text to clipboard and simulate Cmd+V/Ctrl+V."
|
|
174
|
+
},
|
|
175
|
+
coordinate: {
|
|
176
|
+
type: "array",
|
|
177
|
+
items: { type: "number" },
|
|
178
|
+
description: "[x, y] coordinates for mouse actions. For left_click_drag, this is the drag destination."
|
|
179
|
+
},
|
|
180
|
+
text: {
|
|
181
|
+
type: "string",
|
|
182
|
+
description: "Text to type (action=type) or key name to press (action=key)."
|
|
183
|
+
}
|
|
184
|
+
},
|
|
185
|
+
required: ["action"]
|
|
186
|
+
}
|
|
187
|
+
}]
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
else if (req.method === "tools/call") {
|
|
191
|
+
const params = req.params || {};
|
|
192
|
+
if (params.name === "computer") {
|
|
193
|
+
const args = params.arguments || {};
|
|
194
|
+
const content = await handleComputerUse(args);
|
|
195
|
+
sendResult(id, { content });
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
sendError(id, -32601, "Tool not found");
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
if (id !== undefined) {
|
|
203
|
+
sendError(id, -32601, "Method not found: " + req.method);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
catch (e) {
|
|
208
|
+
if (id !== undefined) {
|
|
209
|
+
sendError(id, -32603, "Execution error: " + e.message);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
async function handleComputerUse(args) {
|
|
214
|
+
const action = args.action;
|
|
215
|
+
if (!action)
|
|
216
|
+
throw new Error("Missing action parameter");
|
|
217
|
+
let x = 0, y = 0;
|
|
218
|
+
if (Array.isArray(args.coordinate) && args.coordinate.length >= 2) {
|
|
219
|
+
x = Math.round(Number(args.coordinate[0]) || 0);
|
|
220
|
+
y = Math.round(Number(args.coordinate[1]) || 0);
|
|
221
|
+
}
|
|
222
|
+
switch (action) {
|
|
223
|
+
case "screenshot":
|
|
224
|
+
const b64 = await captureScreen();
|
|
225
|
+
return [{
|
|
226
|
+
type: "image",
|
|
227
|
+
data: b64,
|
|
228
|
+
mimeType: "image/png"
|
|
229
|
+
}];
|
|
230
|
+
case "mouse_move":
|
|
231
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
232
|
+
return [{ type: "text", text: "success" }];
|
|
233
|
+
case "left_click":
|
|
234
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
235
|
+
await nut_js_1.mouse.leftClick();
|
|
236
|
+
return [{ type: "text", text: "success" }];
|
|
237
|
+
case "right_click":
|
|
238
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
239
|
+
await nut_js_1.mouse.rightClick();
|
|
240
|
+
return [{ type: "text", text: "success" }];
|
|
241
|
+
case "middle_click":
|
|
242
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
243
|
+
await nut_js_1.mouse.pressButton(nut_js_1.Button.MIDDLE);
|
|
244
|
+
await nut_js_1.mouse.releaseButton(nut_js_1.Button.MIDDLE);
|
|
245
|
+
return [{ type: "text", text: "success" }];
|
|
246
|
+
case "double_click":
|
|
247
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
248
|
+
await nut_js_1.mouse.leftClick();
|
|
249
|
+
await nut_js_1.mouse.leftClick();
|
|
250
|
+
return [{ type: "text", text: "success" }];
|
|
251
|
+
case "left_click_drag":
|
|
252
|
+
await nut_js_1.mouse.pressButton(nut_js_1.Button.LEFT);
|
|
253
|
+
await nut_js_1.mouse.setPosition(new nut_js_1.Point(x, y));
|
|
254
|
+
await nut_js_1.mouse.releaseButton(nut_js_1.Button.LEFT);
|
|
255
|
+
return [{ type: "text", text: "success" }];
|
|
256
|
+
case "cursor_position":
|
|
257
|
+
const pos = await nut_js_1.mouse.getPosition();
|
|
258
|
+
return [{ type: "text", text: `X: ${pos.x}, Y: ${pos.y}` }];
|
|
259
|
+
case "type":
|
|
260
|
+
if (!args.text)
|
|
261
|
+
throw new Error("Missing text parameter");
|
|
262
|
+
await nut_js_1.keyboard.type(args.text);
|
|
263
|
+
return [{ type: "text", text: "success" }];
|
|
264
|
+
case "paste":
|
|
265
|
+
if (!args.text)
|
|
266
|
+
throw new Error("Missing text parameter");
|
|
267
|
+
await nut_js_1.clipboard.setContent(args.text);
|
|
268
|
+
// wait a tiny bit to ensure clipboard is synced
|
|
269
|
+
await new Promise(r => setTimeout(r, 200));
|
|
270
|
+
if (process.platform === 'darwin') {
|
|
271
|
+
await nut_js_1.keyboard.pressKey(nut_js_1.Key.LeftSuper, nut_js_1.Key.V);
|
|
272
|
+
await nut_js_1.keyboard.releaseKey(nut_js_1.Key.LeftSuper, nut_js_1.Key.V);
|
|
273
|
+
}
|
|
274
|
+
else {
|
|
275
|
+
await nut_js_1.keyboard.pressKey(nut_js_1.Key.LeftControl, nut_js_1.Key.V);
|
|
276
|
+
await nut_js_1.keyboard.releaseKey(nut_js_1.Key.LeftControl, nut_js_1.Key.V);
|
|
277
|
+
}
|
|
278
|
+
return [{ type: "text", text: "success" }];
|
|
279
|
+
case "key":
|
|
280
|
+
if (!args.text)
|
|
281
|
+
throw new Error("Missing text parameter");
|
|
282
|
+
const keyText = args.text.toLowerCase();
|
|
283
|
+
const keyMap = {
|
|
284
|
+
"return": nut_js_1.Key.Enter, "enter": nut_js_1.Key.Enter,
|
|
285
|
+
"escape": nut_js_1.Key.Escape, "esc": nut_js_1.Key.Escape,
|
|
286
|
+
"tab": nut_js_1.Key.Tab, "space": nut_js_1.Key.Space,
|
|
287
|
+
"backspace": nut_js_1.Key.Backspace, "delete": nut_js_1.Key.Delete, "del": nut_js_1.Key.Delete,
|
|
288
|
+
"up": nut_js_1.Key.Up, "down": nut_js_1.Key.Down, "left": nut_js_1.Key.Left, "right": nut_js_1.Key.Right,
|
|
289
|
+
"home": nut_js_1.Key.Home, "end": nut_js_1.Key.End,
|
|
290
|
+
"pageup": nut_js_1.Key.PageUp, "page_up": nut_js_1.Key.PageUp,
|
|
291
|
+
"pagedown": nut_js_1.Key.PageDown, "page_down": nut_js_1.Key.PageDown,
|
|
292
|
+
"ctrl": nut_js_1.Key.LeftControl, "control": nut_js_1.Key.LeftControl,
|
|
293
|
+
"alt": nut_js_1.Key.LeftAlt, "shift": nut_js_1.Key.LeftShift,
|
|
294
|
+
"meta": nut_js_1.Key.LeftSuper, "command": nut_js_1.Key.LeftSuper, "win": nut_js_1.Key.LeftSuper
|
|
295
|
+
};
|
|
296
|
+
let nutKey = keyMap[keyText];
|
|
297
|
+
if (!nutKey && keyText.length === 1) {
|
|
298
|
+
await nut_js_1.keyboard.type(keyText);
|
|
299
|
+
return [{ type: "text", text: "success" }];
|
|
300
|
+
}
|
|
301
|
+
else if (!nutKey) {
|
|
302
|
+
throw new Error(`Unsupported key: ${args.text}`);
|
|
303
|
+
}
|
|
304
|
+
await nut_js_1.keyboard.pressKey(nutKey);
|
|
305
|
+
await nut_js_1.keyboard.releaseKey(nutKey);
|
|
306
|
+
return [{ type: "text", text: "success" }];
|
|
307
|
+
default:
|
|
308
|
+
throw new Error(`Action '${action}' is not fully implemented yet`);
|
|
309
|
+
}
|
|
310
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "polarisagi-computer-mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "TypeScript MCP server for computer automation",
|
|
5
|
+
"files": [
|
|
6
|
+
"dist"
|
|
7
|
+
],
|
|
8
|
+
"main": "dist/index.js",
|
|
9
|
+
"bin": {
|
|
10
|
+
"polarisagi-computer-mcp": "dist/index.js"
|
|
11
|
+
},
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "tsc",
|
|
14
|
+
"start": "node dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
18
|
+
"@nut-tree-fork/nut-js": "^4.2.6"
|
|
19
|
+
},
|
|
20
|
+
"devDependencies": {
|
|
21
|
+
"@types/node": "^20.0.0",
|
|
22
|
+
"typescript": "^5.0.0"
|
|
23
|
+
}
|
|
24
|
+
}
|