screenpipe-mcp 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -54,11 +54,6 @@ function getCurrentDateInfo() {
54
54
  }),
55
55
  };
56
56
  }
57
- // Detect OS
58
- const CURRENT_OS = process.platform;
59
- const IS_MACOS = CURRENT_OS === "darwin";
60
- const IS_WINDOWS = CURRENT_OS === "win32";
61
- const IS_LINUX = CURRENT_OS === "linux";
62
57
  // Parse command line arguments
63
58
  const args = process.argv.slice(2);
64
59
  let port = 3030;
@@ -71,7 +66,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
71
66
  // Initialize server
72
67
  const server = new index_js_1.Server({
73
68
  name: "screenpipe",
74
- version: "0.4.0",
69
+ version: "0.5.0",
75
70
  }, {
76
71
  capabilities: {
77
72
  tools: {},
@@ -148,54 +143,6 @@ const BASE_TOOLS = [
148
143
  },
149
144
  },
150
145
  },
151
- {
152
- name: "pixel-control",
153
- description: "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
154
- "Use this to type text, press keys, move the mouse, and click buttons.",
155
- annotations: {
156
- title: "Pixel Control",
157
- destructiveHint: true,
158
- },
159
- inputSchema: {
160
- type: "object",
161
- properties: {
162
- action_type: {
163
- type: "string",
164
- enum: ["WriteText", "KeyPress", "MouseMove", "MouseClick"],
165
- description: "Type of input action to perform",
166
- },
167
- data: {
168
- oneOf: [
169
- {
170
- type: "string",
171
- description: "Text to type or key to press (for WriteText and KeyPress)",
172
- },
173
- {
174
- type: "object",
175
- properties: {
176
- x: {
177
- type: "integer",
178
- description: "X coordinate for mouse movement",
179
- },
180
- y: {
181
- type: "integer",
182
- description: "Y coordinate for mouse movement",
183
- },
184
- },
185
- description: "Coordinates for MouseMove",
186
- },
187
- {
188
- type: "string",
189
- enum: ["left", "right", "middle"],
190
- description: "Button to click for MouseClick",
191
- },
192
- ],
193
- description: "Action-specific data",
194
- },
195
- },
196
- required: ["action_type", "data"],
197
- },
198
- },
199
146
  {
200
147
  name: "export-video",
201
148
  description: "Export a video of screen recordings for a specific time range. " +
@@ -231,229 +178,9 @@ const BASE_TOOLS = [
231
178
  },
232
179
  },
233
180
  ];
234
- const MACOS_TOOLS = [
235
- {
236
- name: "find-elements",
237
- description: "Find UI elements with a specific role in an application. " +
238
- "This tool is especially useful for identifying interactive elements. " +
239
- "\n\nMacOS Accessibility Roles Guide:\n" +
240
- "- Basic roles: 'button', 'textfield', 'checkbox', 'menu', 'list'\n" +
241
- "- MacOS specific roles: 'AXButton', 'AXTextField', 'AXCheckBox', 'AXMenu', etc.\n" +
242
- "- Text inputs can be: 'AXTextField', 'AXTextArea', 'AXComboBox', 'AXSearchField'\n" +
243
- "- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
244
- "- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
245
- "Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
246
- annotations: {
247
- title: "Find Elements",
248
- readOnlyHint: true,
249
- },
250
- inputSchema: {
251
- type: "object",
252
- properties: {
253
- app: {
254
- type: "string",
255
- description: "The name of the application (e.g., 'Chrome', 'Finder', 'Terminal')",
256
- },
257
- window: {
258
- type: "string",
259
- description: "The window name or title (optional)",
260
- },
261
- role: {
262
- type: "string",
263
- description: "The role to search for (e.g., 'button', 'textfield', 'AXButton', 'AXTextField'). For best results, use MacOS AX prefixed roles.",
264
- },
265
- max_results: {
266
- type: "integer",
267
- description: "Maximum number of elements to return",
268
- default: 10,
269
- },
270
- max_depth: {
271
- type: "integer",
272
- description: "Maximum depth of element tree to search",
273
- },
274
- use_background_apps: {
275
- type: "boolean",
276
- description: "Whether to look in background apps",
277
- default: true,
278
- },
279
- activate_app: {
280
- type: "boolean",
281
- description: "Whether to activate the app before searching",
282
- default: true,
283
- },
284
- },
285
- required: ["app", "role"],
286
- },
287
- },
288
- {
289
- name: "click-element",
290
- description: "Click an element in an application using its id (MacOS only)",
291
- annotations: {
292
- title: "Click Element",
293
- destructiveHint: true,
294
- },
295
- inputSchema: {
296
- type: "object",
297
- properties: {
298
- app: {
299
- type: "string",
300
- description: "The name of the application",
301
- },
302
- window: {
303
- type: "string",
304
- description: "The window name (optional)",
305
- },
306
- id: {
307
- type: "string",
308
- description: "The id of the element to click",
309
- },
310
- use_background_apps: {
311
- type: "boolean",
312
- description: "Whether to look in background apps",
313
- default: true,
314
- },
315
- activate_app: {
316
- type: "boolean",
317
- description: "Whether to activate the app before clicking",
318
- default: true,
319
- },
320
- },
321
- required: ["app", "id"],
322
- },
323
- },
324
- {
325
- name: "fill-element",
326
- description: "Type text into an element in an application (MacOS only)",
327
- annotations: {
328
- title: "Fill Element",
329
- destructiveHint: true,
330
- },
331
- inputSchema: {
332
- type: "object",
333
- properties: {
334
- app: {
335
- type: "string",
336
- description: "The name of the application",
337
- },
338
- window: {
339
- type: "string",
340
- description: "The window name (optional)",
341
- },
342
- id: {
343
- type: "string",
344
- description: "The id of the element to fill",
345
- },
346
- text: {
347
- type: "string",
348
- description: "The text to type into the element",
349
- },
350
- use_background_apps: {
351
- type: "boolean",
352
- description: "Whether to look in background apps",
353
- default: true,
354
- },
355
- activate_app: {
356
- type: "boolean",
357
- description: "Whether to activate the app before typing",
358
- default: true,
359
- },
360
- },
361
- required: ["app", "id", "text"],
362
- },
363
- },
364
- {
365
- name: "scroll-element",
366
- description: "Scroll an element in a specific direction (MacOS only)",
367
- annotations: {
368
- title: "Scroll Element",
369
- destructiveHint: true,
370
- },
371
- inputSchema: {
372
- type: "object",
373
- properties: {
374
- app: {
375
- type: "string",
376
- description: "The name of the application",
377
- },
378
- window: {
379
- type: "string",
380
- description: "The window name (optional)",
381
- },
382
- id: {
383
- type: "string",
384
- description: "The id of the element to scroll",
385
- },
386
- direction: {
387
- type: "string",
388
- enum: ["up", "down", "left", "right"],
389
- description: "The direction to scroll",
390
- },
391
- amount: {
392
- type: "integer",
393
- description: "The amount to scroll in pixels",
394
- },
395
- use_background_apps: {
396
- type: "boolean",
397
- description: "Whether to look in background apps",
398
- default: true,
399
- },
400
- activate_app: {
401
- type: "boolean",
402
- description: "Whether to activate the app before scrolling",
403
- default: true,
404
- },
405
- },
406
- required: ["app", "id", "direction", "amount"],
407
- },
408
- },
409
- {
410
- name: "open-application",
411
- description: "Open an application by name",
412
- annotations: {
413
- title: "Open Application",
414
- destructiveHint: true,
415
- },
416
- inputSchema: {
417
- type: "object",
418
- properties: {
419
- app_name: {
420
- type: "string",
421
- description: "The name of the application to open",
422
- },
423
- },
424
- required: ["app_name"],
425
- },
426
- },
427
- {
428
- name: "open-url",
429
- description: "Open a URL in a browser",
430
- annotations: {
431
- title: "Open URL",
432
- destructiveHint: true,
433
- },
434
- inputSchema: {
435
- type: "object",
436
- properties: {
437
- url: {
438
- type: "string",
439
- description: "The URL to open",
440
- },
441
- browser: {
442
- type: "string",
443
- description: "The browser to use (optional)",
444
- },
445
- },
446
- required: ["url"],
447
- },
448
- },
449
- ];
450
181
  // List tools handler
451
182
  server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
452
- const tools = [...BASE_TOOLS];
453
- if (IS_MACOS) {
454
- tools.push(...MACOS_TOOLS);
455
- }
456
- return { tools };
183
+ return { tools: BASE_TOOLS };
457
184
  });
458
185
  // MCP Resources - provide dynamic context data
459
186
  const RESOURCES = [
@@ -469,6 +196,12 @@ const RESOURCES = [
469
196
  description: "How to use screenpipe search effectively",
470
197
  mimeType: "text/markdown",
471
198
  },
199
+ {
200
+ uri: "ui://search",
201
+ name: "Search Dashboard",
202
+ description: "Interactive search UI for exploring screen recordings and audio transcriptions",
203
+ mimeType: "text/html",
204
+ },
472
205
  ];
473
206
  // List resources handler
474
207
  server.setRequestHandler(types_js_1.ListResourcesRequestSchema, async () => {
@@ -541,6 +274,56 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
541
274
  },
542
275
  ],
543
276
  };
277
+ case "ui://search": {
278
+ // MCP App UI - Interactive search dashboard
279
+ const uiHtmlPath = path.join(__dirname, "..", "ui", "search.html");
280
+ let htmlContent;
281
+ try {
282
+ htmlContent = fs.readFileSync(uiHtmlPath, "utf-8");
283
+ }
284
+ catch {
285
+ // Fallback: serve embedded minimal UI if file not found
286
+ htmlContent = `<!DOCTYPE html>
287
+ <html>
288
+ <head>
289
+ <style>
290
+ body { font-family: system-ui; background: #0a0a0a; color: #fff; padding: 20px; }
291
+ input { width: 100%; padding: 10px; margin-bottom: 10px; background: #1a1a1a; border: 1px solid #333; color: #fff; border-radius: 6px; }
292
+ button { padding: 10px 20px; background: #fff; color: #000; border: none; border-radius: 6px; cursor: pointer; }
293
+ #results { margin-top: 20px; }
294
+ .result { background: #1a1a1a; padding: 12px; margin: 8px 0; border-radius: 8px; border: 1px solid #333; }
295
+ </style>
296
+ </head>
297
+ <body>
298
+ <h2>screenpipe search</h2>
299
+ <input id="q" placeholder="search..." onkeydown="if(event.key==='Enter')search()"/>
300
+ <button onclick="search()">search</button>
301
+ <div id="results"></div>
302
+ <script>
303
+ function search() {
304
+ window.parent.postMessage({jsonrpc:'2.0',method:'tools/call',params:{name:'search-content',arguments:{q:document.getElementById('q').value,limit:20}}},'*');
305
+ }
306
+ window.addEventListener('message',e=>{
307
+ if(e.data?.result||e.data?.method==='tool/result'){
308
+ const r=e.data.result||e.data.params?.result;
309
+ const d=r?.data||r||[];
310
+ document.getElementById('results').innerHTML=d.map(x=>'<div class="result"><b>'+((x.type||'')+'</b> '+(x.content?.app_name||'')+': '+(x.content?.text||x.content?.transcription||'').substring(0,200))+'</div>').join('');
311
+ }
312
+ });
313
+ </script>
314
+ </body>
315
+ </html>`;
316
+ }
317
+ return {
318
+ contents: [
319
+ {
320
+ uri,
321
+ mimeType: "text/html",
322
+ text: htmlContent,
323
+ },
324
+ ],
325
+ };
326
+ }
544
327
  default:
545
328
  throw new Error(`Unknown resource: ${uri}`);
546
329
  }
@@ -675,25 +458,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
675
458
  if (!args) {
676
459
  throw new Error("Missing arguments");
677
460
  }
678
- // Check if the tool is MacOS-only and we're not on MacOS
679
- const macosOnlyTools = [
680
- "click-element",
681
- "fill-element",
682
- "find-elements",
683
- "scroll-element",
684
- "open-application",
685
- "open-url",
686
- ];
687
- if (macosOnlyTools.includes(name) && !IS_MACOS) {
688
- return {
689
- content: [
690
- {
691
- type: "text",
692
- text: `The '${name}' tool is only available on MacOS. Current platform: ${CURRENT_OS}`,
693
- },
694
- ],
695
- };
696
- }
697
461
  try {
698
462
  switch (name) {
699
463
  case "search-content": {
@@ -765,47 +529,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
765
529
  }
766
530
  return { content: contentItems };
767
531
  }
768
- case "pixel-control": {
769
- const action = {
770
- type: args.action_type,
771
- data: args.data,
772
- };
773
- const response = await fetchAPI("/experimental/operator/pixel", {
774
- method: "POST",
775
- body: JSON.stringify({ action }),
776
- });
777
- if (!response.ok) {
778
- throw new Error(`HTTP error: ${response.status}`);
779
- }
780
- const data = await response.json();
781
- if (!data.success) {
782
- return {
783
- content: [
784
- {
785
- type: "text",
786
- text: `Failed to perform input control: ${data.error || "unknown error"}`,
787
- },
788
- ],
789
- };
790
- }
791
- let resultText = "Successfully performed input control action";
792
- if (args.action_type === "WriteText") {
793
- resultText = `Successfully typed text: '${args.data}'`;
794
- }
795
- else if (args.action_type === "KeyPress") {
796
- resultText = `Successfully pressed key: '${args.data}'`;
797
- }
798
- else if (args.action_type === "MouseMove") {
799
- const coords = args.data;
800
- resultText = `Successfully moved mouse to coordinates: x=${coords.x}, y=${coords.y}`;
801
- }
802
- else if (args.action_type === "MouseClick") {
803
- resultText = `Successfully clicked ${args.data} mouse button`;
804
- }
805
- return {
806
- content: [{ type: "text", text: resultText }],
807
- };
808
- }
809
532
  case "export-video": {
810
533
  const startTime = args.start_time;
811
534
  const endTime = args.end_time;
@@ -951,230 +674,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
951
674
  };
952
675
  }
953
676
  }
954
- case "click-element": {
955
- const selector = {
956
- app_name: args.app,
957
- window_name: args.window,
958
- locator: `#${args.id}`,
959
- use_background_apps: args.use_background_apps ?? true,
960
- activate_app: args.activate_app ?? true,
961
- };
962
- const response = await fetchAPI("/experimental/operator/click", {
963
- method: "POST",
964
- body: JSON.stringify({ selector }),
965
- });
966
- if (!response.ok) {
967
- throw new Error(`HTTP error: ${response.status}`);
968
- }
969
- const data = await response.json();
970
- if (!data.success) {
971
- return {
972
- content: [
973
- {
974
- type: "text",
975
- text: `Failed to click element: ${data.error || "unknown error"}`,
976
- },
977
- ],
978
- };
979
- }
980
- const result = data.result || {};
981
- const method = result.method || "unknown";
982
- const details = result.details || "click operation completed";
983
- return {
984
- content: [
985
- {
986
- type: "text",
987
- text: `Successfully clicked element using ${method}. ${details}`,
988
- },
989
- ],
990
- };
991
- }
992
- case "fill-element": {
993
- const selector = {
994
- app_name: args.app,
995
- window_name: args.window,
996
- locator: `#${args.id}`,
997
- use_background_apps: args.use_background_apps ?? true,
998
- activate_app: args.activate_app ?? true,
999
- };
1000
- const response = await fetchAPI("/experimental/operator/type", {
1001
- method: "POST",
1002
- body: JSON.stringify({ selector, text: args.text || "" }),
1003
- });
1004
- if (!response.ok) {
1005
- throw new Error(`HTTP error: ${response.status}`);
1006
- }
1007
- const data = await response.json();
1008
- if (!data.success) {
1009
- return {
1010
- content: [
1011
- {
1012
- type: "text",
1013
- text: `Failed to fill element: ${data.error || "unknown error"}`,
1014
- },
1015
- ],
1016
- };
1017
- }
1018
- return {
1019
- content: [
1020
- { type: "text", text: "Successfully filled element with text" },
1021
- ],
1022
- };
1023
- }
1024
- case "find-elements": {
1025
- const selector = {
1026
- app_name: args.app,
1027
- window_name: args.window,
1028
- locator: args.role || "",
1029
- use_background_apps: args.use_background_apps ?? true,
1030
- activate_app: args.activate_app ?? true,
1031
- };
1032
- const response = await fetchAPI("/experimental/operator", {
1033
- method: "POST",
1034
- body: JSON.stringify({
1035
- selector,
1036
- max_results: args.max_results || 10,
1037
- max_depth: args.max_depth,
1038
- }),
1039
- });
1040
- if (!response.ok) {
1041
- throw new Error(`HTTP error: ${response.status}`);
1042
- }
1043
- const data = await response.json();
1044
- if (!data.success) {
1045
- return {
1046
- content: [
1047
- {
1048
- type: "text",
1049
- text: `Failed to find elements: ${data.error || "unknown error"}`,
1050
- },
1051
- ],
1052
- };
1053
- }
1054
- const elements = data.data || [];
1055
- if (elements.length === 0) {
1056
- return {
1057
- content: [
1058
- {
1059
- type: "text",
1060
- text: `No elements found matching role '${args.role}' in app '${args.app}'`,
1061
- },
1062
- ],
1063
- };
1064
- }
1065
- let resultText = `Found ${elements.length} elements matching role '${args.role}' in app '${args.app}':\n\n`;
1066
- elements.forEach((element, i) => {
1067
- resultText +=
1068
- `Element ${i + 1}:\n` +
1069
- `ID: ${element.id || "N/A"}\n` +
1070
- `Role: ${element.role || "N/A"}\n` +
1071
- `Text: ${element.text || "N/A"}\n` +
1072
- `Description: ${element.description || "N/A"}\n` +
1073
- "---\n";
1074
- });
1075
- return {
1076
- content: [{ type: "text", text: resultText }],
1077
- };
1078
- }
1079
- case "scroll-element": {
1080
- const selector = {
1081
- app_name: args.app,
1082
- window_name: args.window,
1083
- locator: `#${args.id}`,
1084
- use_background_apps: args.use_background_apps ?? true,
1085
- activate_app: args.activate_app ?? true,
1086
- };
1087
- const response = await fetchAPI("/experimental/operator/scroll", {
1088
- method: "POST",
1089
- body: JSON.stringify({
1090
- selector,
1091
- direction: args.direction,
1092
- amount: args.amount,
1093
- }),
1094
- });
1095
- if (!response.ok) {
1096
- throw new Error(`HTTP error: ${response.status}`);
1097
- }
1098
- const data = await response.json();
1099
- if (!data.success) {
1100
- return {
1101
- content: [
1102
- {
1103
- type: "text",
1104
- text: `Failed to scroll element: ${data.error || "unknown error"}`,
1105
- },
1106
- ],
1107
- };
1108
- }
1109
- return {
1110
- content: [
1111
- {
1112
- type: "text",
1113
- text: `Successfully scrolled element ${args.direction} by ${args.amount} pixels`,
1114
- },
1115
- ],
1116
- };
1117
- }
1118
- case "open-application": {
1119
- const response = await fetchAPI("/experimental/operator/open-application", {
1120
- method: "POST",
1121
- body: JSON.stringify({ app_name: args.app_name || "" }),
1122
- });
1123
- if (!response.ok) {
1124
- throw new Error(`HTTP error: ${response.status}`);
1125
- }
1126
- const data = await response.json();
1127
- if (!data.success) {
1128
- return {
1129
- content: [
1130
- {
1131
- type: "text",
1132
- text: `Failed to open application: ${data.error || "unknown error"}`,
1133
- },
1134
- ],
1135
- };
1136
- }
1137
- return {
1138
- content: [
1139
- {
1140
- type: "text",
1141
- text: `Successfully opened application '${args.app_name}'`,
1142
- },
1143
- ],
1144
- };
1145
- }
1146
- case "open-url": {
1147
- const response = await fetchAPI("/experimental/operator/open-url", {
1148
- method: "POST",
1149
- body: JSON.stringify({
1150
- url: args.url || "",
1151
- browser: args.browser,
1152
- }),
1153
- });
1154
- if (!response.ok) {
1155
- throw new Error(`HTTP error: ${response.status}`);
1156
- }
1157
- const data = await response.json();
1158
- if (!data.success) {
1159
- return {
1160
- content: [
1161
- {
1162
- type: "text",
1163
- text: `Failed to open URL: ${data.error || "unknown error"}`,
1164
- },
1165
- ],
1166
- };
1167
- }
1168
- const browserInfo = args.browser ? ` using ${args.browser}` : "";
1169
- return {
1170
- content: [
1171
- {
1172
- type: "text",
1173
- text: `Successfully opened URL '${args.url}'${browserInfo}`,
1174
- },
1175
- ],
1176
- };
1177
- }
1178
677
  default:
1179
678
  throw new Error(`Unknown tool: ${name}`);
1180
679
  }
package/manifest.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "manifest_version": "0.3",
3
3
  "name": "screenpipe",
4
4
  "display_name": "Screenpipe",
5
- "version": "0.3.1",
5
+ "version": "0.5.0",
6
6
  "description": "Search your screen recordings, audio transcriptions, and control your computer with AI",
7
7
  "long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory and computer control capabilities.",
8
8
  "author": {
@@ -30,6 +30,10 @@
30
30
  "name": "search-content",
31
31
  "description": "Search through recorded screen content, audio transcriptions, and UI elements"
32
32
  },
33
+ {
34
+ "name": "export-video",
35
+ "description": "Export screen recordings as MP4 video for a specific time range"
36
+ },
33
37
  {
34
38
  "name": "pixel-control",
35
39
  "description": "Control mouse and keyboard (type text, press keys, move mouse, click)"