screenpipe-mcp 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +1 -558
  2. package/package.json +2 -2
  3. package/src/index.ts +1 -605
package/dist/index.js CHANGED
@@ -54,11 +54,6 @@ function getCurrentDateInfo() {
54
54
  }),
55
55
  };
56
56
  }
57
- // Detect OS
58
- const CURRENT_OS = process.platform;
59
- const IS_MACOS = CURRENT_OS === "darwin";
60
- const IS_WINDOWS = CURRENT_OS === "win32";
61
- const IS_LINUX = CURRENT_OS === "linux";
62
57
  // Parse command line arguments
63
58
  const args = process.argv.slice(2);
64
59
  let port = 3030;
@@ -148,54 +143,6 @@ const BASE_TOOLS = [
148
143
  },
149
144
  },
150
145
  },
151
- {
152
- name: "pixel-control",
153
- description: "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
154
- "Use this to type text, press keys, move the mouse, and click buttons.",
155
- annotations: {
156
- title: "Pixel Control",
157
- destructiveHint: true,
158
- },
159
- inputSchema: {
160
- type: "object",
161
- properties: {
162
- action_type: {
163
- type: "string",
164
- enum: ["WriteText", "KeyPress", "MouseMove", "MouseClick"],
165
- description: "Type of input action to perform",
166
- },
167
- data: {
168
- oneOf: [
169
- {
170
- type: "string",
171
- description: "Text to type or key to press (for WriteText and KeyPress)",
172
- },
173
- {
174
- type: "object",
175
- properties: {
176
- x: {
177
- type: "integer",
178
- description: "X coordinate for mouse movement",
179
- },
180
- y: {
181
- type: "integer",
182
- description: "Y coordinate for mouse movement",
183
- },
184
- },
185
- description: "Coordinates for MouseMove",
186
- },
187
- {
188
- type: "string",
189
- enum: ["left", "right", "middle"],
190
- description: "Button to click for MouseClick",
191
- },
192
- ],
193
- description: "Action-specific data",
194
- },
195
- },
196
- required: ["action_type", "data"],
197
- },
198
- },
199
146
  {
200
147
  name: "export-video",
201
148
  description: "Export a video of screen recordings for a specific time range. " +
@@ -231,229 +178,9 @@ const BASE_TOOLS = [
231
178
  },
232
179
  },
233
180
  ];
234
- const MACOS_TOOLS = [
235
- {
236
- name: "find-elements",
237
- description: "Find UI elements with a specific role in an application. " +
238
- "This tool is especially useful for identifying interactive elements. " +
239
- "\n\nMacOS Accessibility Roles Guide:\n" +
240
- "- Basic roles: 'button', 'textfield', 'checkbox', 'menu', 'list'\n" +
241
- "- MacOS specific roles: 'AXButton', 'AXTextField', 'AXCheckBox', 'AXMenu', etc.\n" +
242
- "- Text inputs can be: 'AXTextField', 'AXTextArea', 'AXComboBox', 'AXSearchField'\n" +
243
- "- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
244
- "- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
245
- "Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
246
- annotations: {
247
- title: "Find Elements",
248
- readOnlyHint: true,
249
- },
250
- inputSchema: {
251
- type: "object",
252
- properties: {
253
- app: {
254
- type: "string",
255
- description: "The name of the application (e.g., 'Chrome', 'Finder', 'Terminal')",
256
- },
257
- window: {
258
- type: "string",
259
- description: "The window name or title (optional)",
260
- },
261
- role: {
262
- type: "string",
263
- description: "The role to search for (e.g., 'button', 'textfield', 'AXButton', 'AXTextField'). For best results, use MacOS AX prefixed roles.",
264
- },
265
- max_results: {
266
- type: "integer",
267
- description: "Maximum number of elements to return",
268
- default: 10,
269
- },
270
- max_depth: {
271
- type: "integer",
272
- description: "Maximum depth of element tree to search",
273
- },
274
- use_background_apps: {
275
- type: "boolean",
276
- description: "Whether to look in background apps",
277
- default: true,
278
- },
279
- activate_app: {
280
- type: "boolean",
281
- description: "Whether to activate the app before searching",
282
- default: true,
283
- },
284
- },
285
- required: ["app", "role"],
286
- },
287
- },
288
- {
289
- name: "click-element",
290
- description: "Click an element in an application using its id (MacOS only)",
291
- annotations: {
292
- title: "Click Element",
293
- destructiveHint: true,
294
- },
295
- inputSchema: {
296
- type: "object",
297
- properties: {
298
- app: {
299
- type: "string",
300
- description: "The name of the application",
301
- },
302
- window: {
303
- type: "string",
304
- description: "The window name (optional)",
305
- },
306
- id: {
307
- type: "string",
308
- description: "The id of the element to click",
309
- },
310
- use_background_apps: {
311
- type: "boolean",
312
- description: "Whether to look in background apps",
313
- default: true,
314
- },
315
- activate_app: {
316
- type: "boolean",
317
- description: "Whether to activate the app before clicking",
318
- default: true,
319
- },
320
- },
321
- required: ["app", "id"],
322
- },
323
- },
324
- {
325
- name: "fill-element",
326
- description: "Type text into an element in an application (MacOS only)",
327
- annotations: {
328
- title: "Fill Element",
329
- destructiveHint: true,
330
- },
331
- inputSchema: {
332
- type: "object",
333
- properties: {
334
- app: {
335
- type: "string",
336
- description: "The name of the application",
337
- },
338
- window: {
339
- type: "string",
340
- description: "The window name (optional)",
341
- },
342
- id: {
343
- type: "string",
344
- description: "The id of the element to fill",
345
- },
346
- text: {
347
- type: "string",
348
- description: "The text to type into the element",
349
- },
350
- use_background_apps: {
351
- type: "boolean",
352
- description: "Whether to look in background apps",
353
- default: true,
354
- },
355
- activate_app: {
356
- type: "boolean",
357
- description: "Whether to activate the app before typing",
358
- default: true,
359
- },
360
- },
361
- required: ["app", "id", "text"],
362
- },
363
- },
364
- {
365
- name: "scroll-element",
366
- description: "Scroll an element in a specific direction (MacOS only)",
367
- annotations: {
368
- title: "Scroll Element",
369
- destructiveHint: true,
370
- },
371
- inputSchema: {
372
- type: "object",
373
- properties: {
374
- app: {
375
- type: "string",
376
- description: "The name of the application",
377
- },
378
- window: {
379
- type: "string",
380
- description: "The window name (optional)",
381
- },
382
- id: {
383
- type: "string",
384
- description: "The id of the element to scroll",
385
- },
386
- direction: {
387
- type: "string",
388
- enum: ["up", "down", "left", "right"],
389
- description: "The direction to scroll",
390
- },
391
- amount: {
392
- type: "integer",
393
- description: "The amount to scroll in pixels",
394
- },
395
- use_background_apps: {
396
- type: "boolean",
397
- description: "Whether to look in background apps",
398
- default: true,
399
- },
400
- activate_app: {
401
- type: "boolean",
402
- description: "Whether to activate the app before scrolling",
403
- default: true,
404
- },
405
- },
406
- required: ["app", "id", "direction", "amount"],
407
- },
408
- },
409
- {
410
- name: "open-application",
411
- description: "Open an application by name",
412
- annotations: {
413
- title: "Open Application",
414
- destructiveHint: true,
415
- },
416
- inputSchema: {
417
- type: "object",
418
- properties: {
419
- app_name: {
420
- type: "string",
421
- description: "The name of the application to open",
422
- },
423
- },
424
- required: ["app_name"],
425
- },
426
- },
427
- {
428
- name: "open-url",
429
- description: "Open a URL in a browser",
430
- annotations: {
431
- title: "Open URL",
432
- destructiveHint: true,
433
- },
434
- inputSchema: {
435
- type: "object",
436
- properties: {
437
- url: {
438
- type: "string",
439
- description: "The URL to open",
440
- },
441
- browser: {
442
- type: "string",
443
- description: "The browser to use (optional)",
444
- },
445
- },
446
- required: ["url"],
447
- },
448
- },
449
- ];
450
181
  // List tools handler
451
182
  server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
452
- const tools = [...BASE_TOOLS];
453
- if (IS_MACOS) {
454
- tools.push(...MACOS_TOOLS);
455
- }
456
- return { tools };
183
+ return { tools: BASE_TOOLS };
457
184
  });
458
185
  // MCP Resources - provide dynamic context data
459
186
  const RESOURCES = [
@@ -731,25 +458,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
731
458
  if (!args) {
732
459
  throw new Error("Missing arguments");
733
460
  }
734
- // Check if the tool is MacOS-only and we're not on MacOS
735
- const macosOnlyTools = [
736
- "click-element",
737
- "fill-element",
738
- "find-elements",
739
- "scroll-element",
740
- "open-application",
741
- "open-url",
742
- ];
743
- if (macosOnlyTools.includes(name) && !IS_MACOS) {
744
- return {
745
- content: [
746
- {
747
- type: "text",
748
- text: `The '${name}' tool is only available on MacOS. Current platform: ${CURRENT_OS}`,
749
- },
750
- ],
751
- };
752
- }
753
461
  try {
754
462
  switch (name) {
755
463
  case "search-content": {
@@ -821,47 +529,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
821
529
  }
822
530
  return { content: contentItems };
823
531
  }
824
- case "pixel-control": {
825
- const action = {
826
- type: args.action_type,
827
- data: args.data,
828
- };
829
- const response = await fetchAPI("/experimental/operator/pixel", {
830
- method: "POST",
831
- body: JSON.stringify({ action }),
832
- });
833
- if (!response.ok) {
834
- throw new Error(`HTTP error: ${response.status}`);
835
- }
836
- const data = await response.json();
837
- if (!data.success) {
838
- return {
839
- content: [
840
- {
841
- type: "text",
842
- text: `Failed to perform input control: ${data.error || "unknown error"}`,
843
- },
844
- ],
845
- };
846
- }
847
- let resultText = "Successfully performed input control action";
848
- if (args.action_type === "WriteText") {
849
- resultText = `Successfully typed text: '${args.data}'`;
850
- }
851
- else if (args.action_type === "KeyPress") {
852
- resultText = `Successfully pressed key: '${args.data}'`;
853
- }
854
- else if (args.action_type === "MouseMove") {
855
- const coords = args.data;
856
- resultText = `Successfully moved mouse to coordinates: x=${coords.x}, y=${coords.y}`;
857
- }
858
- else if (args.action_type === "MouseClick") {
859
- resultText = `Successfully clicked ${args.data} mouse button`;
860
- }
861
- return {
862
- content: [{ type: "text", text: resultText }],
863
- };
864
- }
865
532
  case "export-video": {
866
533
  const startTime = args.start_time;
867
534
  const endTime = args.end_time;
@@ -1007,230 +674,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1007
674
  };
1008
675
  }
1009
676
  }
1010
- case "click-element": {
1011
- const selector = {
1012
- app_name: args.app,
1013
- window_name: args.window,
1014
- locator: `#${args.id}`,
1015
- use_background_apps: args.use_background_apps ?? true,
1016
- activate_app: args.activate_app ?? true,
1017
- };
1018
- const response = await fetchAPI("/experimental/operator/click", {
1019
- method: "POST",
1020
- body: JSON.stringify({ selector }),
1021
- });
1022
- if (!response.ok) {
1023
- throw new Error(`HTTP error: ${response.status}`);
1024
- }
1025
- const data = await response.json();
1026
- if (!data.success) {
1027
- return {
1028
- content: [
1029
- {
1030
- type: "text",
1031
- text: `Failed to click element: ${data.error || "unknown error"}`,
1032
- },
1033
- ],
1034
- };
1035
- }
1036
- const result = data.result || {};
1037
- const method = result.method || "unknown";
1038
- const details = result.details || "click operation completed";
1039
- return {
1040
- content: [
1041
- {
1042
- type: "text",
1043
- text: `Successfully clicked element using ${method}. ${details}`,
1044
- },
1045
- ],
1046
- };
1047
- }
1048
- case "fill-element": {
1049
- const selector = {
1050
- app_name: args.app,
1051
- window_name: args.window,
1052
- locator: `#${args.id}`,
1053
- use_background_apps: args.use_background_apps ?? true,
1054
- activate_app: args.activate_app ?? true,
1055
- };
1056
- const response = await fetchAPI("/experimental/operator/type", {
1057
- method: "POST",
1058
- body: JSON.stringify({ selector, text: args.text || "" }),
1059
- });
1060
- if (!response.ok) {
1061
- throw new Error(`HTTP error: ${response.status}`);
1062
- }
1063
- const data = await response.json();
1064
- if (!data.success) {
1065
- return {
1066
- content: [
1067
- {
1068
- type: "text",
1069
- text: `Failed to fill element: ${data.error || "unknown error"}`,
1070
- },
1071
- ],
1072
- };
1073
- }
1074
- return {
1075
- content: [
1076
- { type: "text", text: "Successfully filled element with text" },
1077
- ],
1078
- };
1079
- }
1080
- case "find-elements": {
1081
- const selector = {
1082
- app_name: args.app,
1083
- window_name: args.window,
1084
- locator: args.role || "",
1085
- use_background_apps: args.use_background_apps ?? true,
1086
- activate_app: args.activate_app ?? true,
1087
- };
1088
- const response = await fetchAPI("/experimental/operator", {
1089
- method: "POST",
1090
- body: JSON.stringify({
1091
- selector,
1092
- max_results: args.max_results || 10,
1093
- max_depth: args.max_depth,
1094
- }),
1095
- });
1096
- if (!response.ok) {
1097
- throw new Error(`HTTP error: ${response.status}`);
1098
- }
1099
- const data = await response.json();
1100
- if (!data.success) {
1101
- return {
1102
- content: [
1103
- {
1104
- type: "text",
1105
- text: `Failed to find elements: ${data.error || "unknown error"}`,
1106
- },
1107
- ],
1108
- };
1109
- }
1110
- const elements = data.data || [];
1111
- if (elements.length === 0) {
1112
- return {
1113
- content: [
1114
- {
1115
- type: "text",
1116
- text: `No elements found matching role '${args.role}' in app '${args.app}'`,
1117
- },
1118
- ],
1119
- };
1120
- }
1121
- let resultText = `Found ${elements.length} elements matching role '${args.role}' in app '${args.app}':\n\n`;
1122
- elements.forEach((element, i) => {
1123
- resultText +=
1124
- `Element ${i + 1}:\n` +
1125
- `ID: ${element.id || "N/A"}\n` +
1126
- `Role: ${element.role || "N/A"}\n` +
1127
- `Text: ${element.text || "N/A"}\n` +
1128
- `Description: ${element.description || "N/A"}\n` +
1129
- "---\n";
1130
- });
1131
- return {
1132
- content: [{ type: "text", text: resultText }],
1133
- };
1134
- }
1135
- case "scroll-element": {
1136
- const selector = {
1137
- app_name: args.app,
1138
- window_name: args.window,
1139
- locator: `#${args.id}`,
1140
- use_background_apps: args.use_background_apps ?? true,
1141
- activate_app: args.activate_app ?? true,
1142
- };
1143
- const response = await fetchAPI("/experimental/operator/scroll", {
1144
- method: "POST",
1145
- body: JSON.stringify({
1146
- selector,
1147
- direction: args.direction,
1148
- amount: args.amount,
1149
- }),
1150
- });
1151
- if (!response.ok) {
1152
- throw new Error(`HTTP error: ${response.status}`);
1153
- }
1154
- const data = await response.json();
1155
- if (!data.success) {
1156
- return {
1157
- content: [
1158
- {
1159
- type: "text",
1160
- text: `Failed to scroll element: ${data.error || "unknown error"}`,
1161
- },
1162
- ],
1163
- };
1164
- }
1165
- return {
1166
- content: [
1167
- {
1168
- type: "text",
1169
- text: `Successfully scrolled element ${args.direction} by ${args.amount} pixels`,
1170
- },
1171
- ],
1172
- };
1173
- }
1174
- case "open-application": {
1175
- const response = await fetchAPI("/experimental/operator/open-application", {
1176
- method: "POST",
1177
- body: JSON.stringify({ app_name: args.app_name || "" }),
1178
- });
1179
- if (!response.ok) {
1180
- throw new Error(`HTTP error: ${response.status}`);
1181
- }
1182
- const data = await response.json();
1183
- if (!data.success) {
1184
- return {
1185
- content: [
1186
- {
1187
- type: "text",
1188
- text: `Failed to open application: ${data.error || "unknown error"}`,
1189
- },
1190
- ],
1191
- };
1192
- }
1193
- return {
1194
- content: [
1195
- {
1196
- type: "text",
1197
- text: `Successfully opened application '${args.app_name}'`,
1198
- },
1199
- ],
1200
- };
1201
- }
1202
- case "open-url": {
1203
- const response = await fetchAPI("/experimental/operator/open-url", {
1204
- method: "POST",
1205
- body: JSON.stringify({
1206
- url: args.url || "",
1207
- browser: args.browser,
1208
- }),
1209
- });
1210
- if (!response.ok) {
1211
- throw new Error(`HTTP error: ${response.status}`);
1212
- }
1213
- const data = await response.json();
1214
- if (!data.success) {
1215
- return {
1216
- content: [
1217
- {
1218
- type: "text",
1219
- text: `Failed to open URL: ${data.error || "unknown error"}`,
1220
- },
1221
- ],
1222
- };
1223
- }
1224
- const browserInfo = args.browser ? ` using ${args.browser}` : "";
1225
- return {
1226
- content: [
1227
- {
1228
- type: "text",
1229
- text: `Successfully opened URL '${args.url}'${browserInfo}`,
1230
- },
1231
- ],
1232
- };
1233
- }
1234
677
  default:
1235
678
  throw new Error(`Unknown tool: ${name}`);
1236
679
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "screenpipe-mcp",
3
- "version": "0.5.0",
4
- "description": "MCP server for screenpipe - search your screen recordings, audio transcriptions, and control your computer",
3
+ "version": "0.6.0",
4
+ "description": "MCP server for screenpipe - search your screen recordings and audio transcriptions",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
7
7
  "screenpipe-mcp": "dist/index.js"
package/src/index.ts CHANGED
@@ -30,12 +30,6 @@ function getCurrentDateInfo(): { isoDate: string; localDate: string } {
30
30
  };
31
31
  }
32
32
 
33
- // Detect OS
34
- const CURRENT_OS = process.platform;
35
- const IS_MACOS = CURRENT_OS === "darwin";
36
- const IS_WINDOWS = CURRENT_OS === "win32";
37
- const IS_LINUX = CURRENT_OS === "linux";
38
-
39
33
  // Parse command line arguments
40
34
  const args = process.argv.slice(2);
41
35
  let port = 3030;
@@ -132,56 +126,6 @@ const BASE_TOOLS: Tool[] = [
132
126
  },
133
127
  },
134
128
  },
135
- {
136
- name: "pixel-control",
137
- description:
138
- "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
139
- "Use this to type text, press keys, move the mouse, and click buttons.",
140
- annotations: {
141
- title: "Pixel Control",
142
- destructiveHint: true,
143
- },
144
- inputSchema: {
145
- type: "object",
146
- properties: {
147
- action_type: {
148
- type: "string",
149
- enum: ["WriteText", "KeyPress", "MouseMove", "MouseClick"],
150
- description: "Type of input action to perform",
151
- },
152
- data: {
153
- oneOf: [
154
- {
155
- type: "string",
156
- description:
157
- "Text to type or key to press (for WriteText and KeyPress)",
158
- },
159
- {
160
- type: "object",
161
- properties: {
162
- x: {
163
- type: "integer",
164
- description: "X coordinate for mouse movement",
165
- },
166
- y: {
167
- type: "integer",
168
- description: "Y coordinate for mouse movement",
169
- },
170
- },
171
- description: "Coordinates for MouseMove",
172
- },
173
- {
174
- type: "string",
175
- enum: ["left", "right", "middle"],
176
- description: "Button to click for MouseClick",
177
- },
178
- ],
179
- description: "Action-specific data",
180
- },
181
- },
182
- required: ["action_type", "data"],
183
- },
184
- },
185
129
  {
186
130
  name: "export-video",
187
131
  description:
@@ -222,234 +166,9 @@ const BASE_TOOLS: Tool[] = [
222
166
  },
223
167
  ];
224
168
 
225
- const MACOS_TOOLS: Tool[] = [
226
- {
227
- name: "find-elements",
228
- description:
229
- "Find UI elements with a specific role in an application. " +
230
- "This tool is especially useful for identifying interactive elements. " +
231
- "\n\nMacOS Accessibility Roles Guide:\n" +
232
- "- Basic roles: 'button', 'textfield', 'checkbox', 'menu', 'list'\n" +
233
- "- MacOS specific roles: 'AXButton', 'AXTextField', 'AXCheckBox', 'AXMenu', etc.\n" +
234
- "- Text inputs can be: 'AXTextField', 'AXTextArea', 'AXComboBox', 'AXSearchField'\n" +
235
- "- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
236
- "- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
237
- "Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
238
- annotations: {
239
- title: "Find Elements",
240
- readOnlyHint: true,
241
- },
242
- inputSchema: {
243
- type: "object",
244
- properties: {
245
- app: {
246
- type: "string",
247
- description:
248
- "The name of the application (e.g., 'Chrome', 'Finder', 'Terminal')",
249
- },
250
- window: {
251
- type: "string",
252
- description: "The window name or title (optional)",
253
- },
254
- role: {
255
- type: "string",
256
- description:
257
- "The role to search for (e.g., 'button', 'textfield', 'AXButton', 'AXTextField'). For best results, use MacOS AX prefixed roles.",
258
- },
259
- max_results: {
260
- type: "integer",
261
- description: "Maximum number of elements to return",
262
- default: 10,
263
- },
264
- max_depth: {
265
- type: "integer",
266
- description: "Maximum depth of element tree to search",
267
- },
268
- use_background_apps: {
269
- type: "boolean",
270
- description: "Whether to look in background apps",
271
- default: true,
272
- },
273
- activate_app: {
274
- type: "boolean",
275
- description: "Whether to activate the app before searching",
276
- default: true,
277
- },
278
- },
279
- required: ["app", "role"],
280
- },
281
- },
282
- {
283
- name: "click-element",
284
- description:
285
- "Click an element in an application using its id (MacOS only)",
286
- annotations: {
287
- title: "Click Element",
288
- destructiveHint: true,
289
- },
290
- inputSchema: {
291
- type: "object",
292
- properties: {
293
- app: {
294
- type: "string",
295
- description: "The name of the application",
296
- },
297
- window: {
298
- type: "string",
299
- description: "The window name (optional)",
300
- },
301
- id: {
302
- type: "string",
303
- description: "The id of the element to click",
304
- },
305
- use_background_apps: {
306
- type: "boolean",
307
- description: "Whether to look in background apps",
308
- default: true,
309
- },
310
- activate_app: {
311
- type: "boolean",
312
- description: "Whether to activate the app before clicking",
313
- default: true,
314
- },
315
- },
316
- required: ["app", "id"],
317
- },
318
- },
319
- {
320
- name: "fill-element",
321
- description: "Type text into an element in an application (MacOS only)",
322
- annotations: {
323
- title: "Fill Element",
324
- destructiveHint: true,
325
- },
326
- inputSchema: {
327
- type: "object",
328
- properties: {
329
- app: {
330
- type: "string",
331
- description: "The name of the application",
332
- },
333
- window: {
334
- type: "string",
335
- description: "The window name (optional)",
336
- },
337
- id: {
338
- type: "string",
339
- description: "The id of the element to fill",
340
- },
341
- text: {
342
- type: "string",
343
- description: "The text to type into the element",
344
- },
345
- use_background_apps: {
346
- type: "boolean",
347
- description: "Whether to look in background apps",
348
- default: true,
349
- },
350
- activate_app: {
351
- type: "boolean",
352
- description: "Whether to activate the app before typing",
353
- default: true,
354
- },
355
- },
356
- required: ["app", "id", "text"],
357
- },
358
- },
359
- {
360
- name: "scroll-element",
361
- description: "Scroll an element in a specific direction (MacOS only)",
362
- annotations: {
363
- title: "Scroll Element",
364
- destructiveHint: true,
365
- },
366
- inputSchema: {
367
- type: "object",
368
- properties: {
369
- app: {
370
- type: "string",
371
- description: "The name of the application",
372
- },
373
- window: {
374
- type: "string",
375
- description: "The window name (optional)",
376
- },
377
- id: {
378
- type: "string",
379
- description: "The id of the element to scroll",
380
- },
381
- direction: {
382
- type: "string",
383
- enum: ["up", "down", "left", "right"],
384
- description: "The direction to scroll",
385
- },
386
- amount: {
387
- type: "integer",
388
- description: "The amount to scroll in pixels",
389
- },
390
- use_background_apps: {
391
- type: "boolean",
392
- description: "Whether to look in background apps",
393
- default: true,
394
- },
395
- activate_app: {
396
- type: "boolean",
397
- description: "Whether to activate the app before scrolling",
398
- default: true,
399
- },
400
- },
401
- required: ["app", "id", "direction", "amount"],
402
- },
403
- },
404
- {
405
- name: "open-application",
406
- description: "Open an application by name",
407
- annotations: {
408
- title: "Open Application",
409
- destructiveHint: true,
410
- },
411
- inputSchema: {
412
- type: "object",
413
- properties: {
414
- app_name: {
415
- type: "string",
416
- description: "The name of the application to open",
417
- },
418
- },
419
- required: ["app_name"],
420
- },
421
- },
422
- {
423
- name: "open-url",
424
- description: "Open a URL in a browser",
425
- annotations: {
426
- title: "Open URL",
427
- destructiveHint: true,
428
- },
429
- inputSchema: {
430
- type: "object",
431
- properties: {
432
- url: {
433
- type: "string",
434
- description: "The URL to open",
435
- },
436
- browser: {
437
- type: "string",
438
- description: "The browser to use (optional)",
439
- },
440
- },
441
- required: ["url"],
442
- },
443
- },
444
- ];
445
-
446
169
  // List tools handler
447
170
  server.setRequestHandler(ListToolsRequestSchema, async () => {
448
- const tools = [...BASE_TOOLS];
449
- if (IS_MACOS) {
450
- tools.push(...MACOS_TOOLS);
451
- }
452
- return { tools };
171
+ return { tools: BASE_TOOLS };
453
172
  });
454
173
 
455
174
  // MCP Resources - provide dynamic context data
@@ -750,27 +469,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
750
469
  throw new Error("Missing arguments");
751
470
  }
752
471
 
753
- // Check if the tool is MacOS-only and we're not on MacOS
754
- const macosOnlyTools = [
755
- "click-element",
756
- "fill-element",
757
- "find-elements",
758
- "scroll-element",
759
- "open-application",
760
- "open-url",
761
- ];
762
-
763
- if (macosOnlyTools.includes(name) && !IS_MACOS) {
764
- return {
765
- content: [
766
- {
767
- type: "text",
768
- text: `The '${name}' tool is only available on MacOS. Current platform: ${CURRENT_OS}`,
769
- },
770
- ],
771
- };
772
- }
773
-
774
472
  try {
775
473
  switch (name) {
776
474
  case "search-content": {
@@ -860,50 +558,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
860
558
  return { content: contentItems };
861
559
  }
862
560
 
863
- case "pixel-control": {
864
- const action = {
865
- type: args.action_type,
866
- data: args.data,
867
- };
868
-
869
- const response = await fetchAPI("/experimental/operator/pixel", {
870
- method: "POST",
871
- body: JSON.stringify({ action }),
872
- });
873
-
874
- if (!response.ok) {
875
- throw new Error(`HTTP error: ${response.status}`);
876
- }
877
-
878
- const data = await response.json();
879
- if (!data.success) {
880
- return {
881
- content: [
882
- {
883
- type: "text",
884
- text: `Failed to perform input control: ${data.error || "unknown error"}`,
885
- },
886
- ],
887
- };
888
- }
889
-
890
- let resultText = "Successfully performed input control action";
891
- if (args.action_type === "WriteText") {
892
- resultText = `Successfully typed text: '${args.data}'`;
893
- } else if (args.action_type === "KeyPress") {
894
- resultText = `Successfully pressed key: '${args.data}'`;
895
- } else if (args.action_type === "MouseMove") {
896
- const coords = args.data as { x: number; y: number };
897
- resultText = `Successfully moved mouse to coordinates: x=${coords.x}, y=${coords.y}`;
898
- } else if (args.action_type === "MouseClick") {
899
- resultText = `Successfully clicked ${args.data} mouse button`;
900
- }
901
-
902
- return {
903
- content: [{ type: "text", text: resultText }],
904
- };
905
- }
906
-
907
561
  case "export-video": {
908
562
  const startTime = args.start_time as string;
909
563
  const endTime = args.end_time as string;
@@ -1070,264 +724,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1070
724
  }
1071
725
  }
1072
726
 
1073
- case "click-element": {
1074
- const selector = {
1075
- app_name: args.app,
1076
- window_name: args.window,
1077
- locator: `#${args.id}`,
1078
- use_background_apps: args.use_background_apps ?? true,
1079
- activate_app: args.activate_app ?? true,
1080
- };
1081
-
1082
- const response = await fetchAPI("/experimental/operator/click", {
1083
- method: "POST",
1084
- body: JSON.stringify({ selector }),
1085
- });
1086
-
1087
- if (!response.ok) {
1088
- throw new Error(`HTTP error: ${response.status}`);
1089
- }
1090
-
1091
- const data = await response.json();
1092
- if (!data.success) {
1093
- return {
1094
- content: [
1095
- {
1096
- type: "text",
1097
- text: `Failed to click element: ${data.error || "unknown error"}`,
1098
- },
1099
- ],
1100
- };
1101
- }
1102
-
1103
- const result = data.result || {};
1104
- const method = result.method || "unknown";
1105
- const details = result.details || "click operation completed";
1106
-
1107
- return {
1108
- content: [
1109
- {
1110
- type: "text",
1111
- text: `Successfully clicked element using ${method}. ${details}`,
1112
- },
1113
- ],
1114
- };
1115
- }
1116
-
1117
- case "fill-element": {
1118
- const selector = {
1119
- app_name: args.app,
1120
- window_name: args.window,
1121
- locator: `#${args.id}`,
1122
- use_background_apps: args.use_background_apps ?? true,
1123
- activate_app: args.activate_app ?? true,
1124
- };
1125
-
1126
- const response = await fetchAPI("/experimental/operator/type", {
1127
- method: "POST",
1128
- body: JSON.stringify({ selector, text: args.text || "" }),
1129
- });
1130
-
1131
- if (!response.ok) {
1132
- throw new Error(`HTTP error: ${response.status}`);
1133
- }
1134
-
1135
- const data = await response.json();
1136
- if (!data.success) {
1137
- return {
1138
- content: [
1139
- {
1140
- type: "text",
1141
- text: `Failed to fill element: ${data.error || "unknown error"}`,
1142
- },
1143
- ],
1144
- };
1145
- }
1146
-
1147
- return {
1148
- content: [
1149
- { type: "text", text: "Successfully filled element with text" },
1150
- ],
1151
- };
1152
- }
1153
-
1154
- case "find-elements": {
1155
- const selector = {
1156
- app_name: args.app,
1157
- window_name: args.window,
1158
- locator: args.role || "",
1159
- use_background_apps: args.use_background_apps ?? true,
1160
- activate_app: args.activate_app ?? true,
1161
- };
1162
-
1163
- const response = await fetchAPI("/experimental/operator", {
1164
- method: "POST",
1165
- body: JSON.stringify({
1166
- selector,
1167
- max_results: args.max_results || 10,
1168
- max_depth: args.max_depth,
1169
- }),
1170
- });
1171
-
1172
- if (!response.ok) {
1173
- throw new Error(`HTTP error: ${response.status}`);
1174
- }
1175
-
1176
- const data = await response.json();
1177
- if (!data.success) {
1178
- return {
1179
- content: [
1180
- {
1181
- type: "text",
1182
- text: `Failed to find elements: ${data.error || "unknown error"}`,
1183
- },
1184
- ],
1185
- };
1186
- }
1187
-
1188
- const elements = data.data || [];
1189
- if (elements.length === 0) {
1190
- return {
1191
- content: [
1192
- {
1193
- type: "text",
1194
- text: `No elements found matching role '${args.role}' in app '${args.app}'`,
1195
- },
1196
- ],
1197
- };
1198
- }
1199
-
1200
- let resultText = `Found ${elements.length} elements matching role '${args.role}' in app '${args.app}':\n\n`;
1201
- elements.forEach((element: any, i: number) => {
1202
- resultText +=
1203
- `Element ${i + 1}:\n` +
1204
- `ID: ${element.id || "N/A"}\n` +
1205
- `Role: ${element.role || "N/A"}\n` +
1206
- `Text: ${element.text || "N/A"}\n` +
1207
- `Description: ${element.description || "N/A"}\n` +
1208
- "---\n";
1209
- });
1210
-
1211
- return {
1212
- content: [{ type: "text", text: resultText }],
1213
- };
1214
- }
1215
-
1216
- case "scroll-element": {
1217
- const selector = {
1218
- app_name: args.app,
1219
- window_name: args.window,
1220
- locator: `#${args.id}`,
1221
- use_background_apps: args.use_background_apps ?? true,
1222
- activate_app: args.activate_app ?? true,
1223
- };
1224
-
1225
- const response = await fetchAPI("/experimental/operator/scroll", {
1226
- method: "POST",
1227
- body: JSON.stringify({
1228
- selector,
1229
- direction: args.direction,
1230
- amount: args.amount,
1231
- }),
1232
- });
1233
-
1234
- if (!response.ok) {
1235
- throw new Error(`HTTP error: ${response.status}`);
1236
- }
1237
-
1238
- const data = await response.json();
1239
- if (!data.success) {
1240
- return {
1241
- content: [
1242
- {
1243
- type: "text",
1244
- text: `Failed to scroll element: ${data.error || "unknown error"}`,
1245
- },
1246
- ],
1247
- };
1248
- }
1249
-
1250
- return {
1251
- content: [
1252
- {
1253
- type: "text",
1254
- text: `Successfully scrolled element ${args.direction} by ${args.amount} pixels`,
1255
- },
1256
- ],
1257
- };
1258
- }
1259
-
1260
- case "open-application": {
1261
- const response = await fetchAPI(
1262
- "/experimental/operator/open-application",
1263
- {
1264
- method: "POST",
1265
- body: JSON.stringify({ app_name: args.app_name || "" }),
1266
- }
1267
- );
1268
-
1269
- if (!response.ok) {
1270
- throw new Error(`HTTP error: ${response.status}`);
1271
- }
1272
-
1273
- const data = await response.json();
1274
- if (!data.success) {
1275
- return {
1276
- content: [
1277
- {
1278
- type: "text",
1279
- text: `Failed to open application: ${data.error || "unknown error"}`,
1280
- },
1281
- ],
1282
- };
1283
- }
1284
-
1285
- return {
1286
- content: [
1287
- {
1288
- type: "text",
1289
- text: `Successfully opened application '${args.app_name}'`,
1290
- },
1291
- ],
1292
- };
1293
- }
1294
-
1295
- case "open-url": {
1296
- const response = await fetchAPI("/experimental/operator/open-url", {
1297
- method: "POST",
1298
- body: JSON.stringify({
1299
- url: args.url || "",
1300
- browser: args.browser,
1301
- }),
1302
- });
1303
-
1304
- if (!response.ok) {
1305
- throw new Error(`HTTP error: ${response.status}`);
1306
- }
1307
-
1308
- const data = await response.json();
1309
- if (!data.success) {
1310
- return {
1311
- content: [
1312
- {
1313
- type: "text",
1314
- text: `Failed to open URL: ${data.error || "unknown error"}`,
1315
- },
1316
- ],
1317
- };
1318
- }
1319
-
1320
- const browserInfo = args.browser ? ` using ${args.browser}` : "";
1321
- return {
1322
- content: [
1323
- {
1324
- type: "text",
1325
- text: `Successfully opened URL '${args.url}'${browserInfo}`,
1326
- },
1327
- ],
1328
- };
1329
- }
1330
-
1331
727
  default:
1332
728
  throw new Error(`Unknown tool: ${name}`);
1333
729
  }