screenpipe-mcp 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,7 +21,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
21
21
  // Initialize server
22
22
  const server = new index_js_1.Server({
23
23
  name: "screenpipe",
24
- version: "0.2.0",
24
+ version: "0.3.1",
25
25
  }, {
26
26
  capabilities: {
27
27
  tools: {},
@@ -33,7 +33,8 @@ const BASE_TOOLS = [
33
33
  name: "search-content",
34
34
  description: "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
35
35
  "Use this to find specific content that has appeared on your screen or been spoken. " +
36
- "Results include timestamps, app context, and the content itself.",
36
+ "Results include timestamps, app context, and the content itself. " +
37
+ "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
37
38
  inputSchema: {
38
39
  type: "object",
39
40
  properties: {
@@ -83,6 +84,13 @@ const BASE_TOOLS = [
83
84
  type: "integer",
84
85
  description: "Maximum content length in characters",
85
86
  },
87
+ include_frames: {
88
+ type: "boolean",
89
+ description: "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
90
+ "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
91
+ "Note: Images are limited to ~1MB each. Default: false",
92
+ default: false,
93
+ },
86
94
  },
87
95
  },
88
96
  },
@@ -370,6 +378,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
370
378
  try {
371
379
  switch (name) {
372
380
  case "search-content": {
381
+ const includeFrames = args.include_frames === true;
373
382
  const params = new URLSearchParams();
374
383
  for (const [key, value] of Object.entries(args)) {
375
384
  if (value !== null && value !== undefined) {
@@ -387,42 +396,68 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
387
396
  content: [{ type: "text", text: "No results found" }],
388
397
  };
389
398
  }
390
- const formattedResults = results
391
- .map((result) => {
399
+ // Build content array with text and optional images
400
+ const contentItems = [];
401
+ const formattedResults = [];
402
+ const images = [];
403
+ for (const result of results) {
392
404
  const content = result.content;
393
405
  if (!content)
394
- return null;
406
+ continue;
395
407
  if (result.type === "OCR") {
396
- return (`OCR Text: ${content.text || "N/A"}\n` +
408
+ const textResult = `OCR Text: ${content.text || "N/A"}\n` +
397
409
  `App: ${content.app_name || "N/A"}\n` +
398
410
  `Window: ${content.window_name || "N/A"}\n` +
399
411
  `Time: ${content.timestamp || "N/A"}\n` +
400
- "---");
412
+ `Frame ID: ${content.frame_id || "N/A"}\n` +
413
+ "---";
414
+ formattedResults.push(textResult);
415
+ // Collect frame if available and requested
416
+ if (includeFrames && content.frame) {
417
+ images.push({
418
+ data: content.frame,
419
+ context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
420
+ });
421
+ }
401
422
  }
402
423
  else if (result.type === "Audio") {
403
- return (`Audio Transcription: ${content.transcription || "N/A"}\n` +
424
+ formattedResults.push(`Audio Transcription: ${content.transcription || "N/A"}\n` +
404
425
  `Device: ${content.device_name || "N/A"}\n` +
405
426
  `Time: ${content.timestamp || "N/A"}\n` +
406
427
  "---");
407
428
  }
408
429
  else if (result.type === "UI") {
409
- return (`UI Text: ${content.text || "N/A"}\n` +
430
+ formattedResults.push(`UI Text: ${content.text || "N/A"}\n` +
410
431
  `App: ${content.app_name || "N/A"}\n` +
411
432
  `Window: ${content.window_name || "N/A"}\n` +
412
433
  `Time: ${content.timestamp || "N/A"}\n` +
413
434
  "---");
414
435
  }
415
- return null;
416
- })
417
- .filter(Boolean);
418
- return {
419
- content: [
420
- {
421
- type: "text",
422
- text: "Search Results:\n\n" + formattedResults.join("\n"),
423
- },
424
- ],
425
- };
436
+ }
437
+ // Add text results
438
+ contentItems.push({
439
+ type: "text",
440
+ text: "Search Results:\n\n" +
441
+ formattedResults.join("\n") +
442
+ (images.length > 0
443
+ ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
444
+ : ""),
445
+ });
446
+ // Add images if requested and available
447
+ for (const img of images) {
448
+ // Add context for the image
449
+ contentItems.push({
450
+ type: "text",
451
+ text: `\nšŸ“· ${img.context}`,
452
+ });
453
+ // Add the image itself
454
+ contentItems.push({
455
+ type: "image",
456
+ data: img.data,
457
+ mimeType: "image/png",
458
+ });
459
+ }
460
+ return { content: contentItems };
426
461
  }
427
462
  case "pixel-control": {
428
463
  const action = {
package/manifest.json CHANGED
@@ -1,19 +1,29 @@
1
1
  {
2
+ "manifest_version": "0.3",
2
3
  "name": "screenpipe",
3
- "version": "0.2.0",
4
+ "display_name": "Screenpipe",
5
+ "version": "0.3.1",
4
6
  "description": "Search your screen recordings, audio transcriptions, and control your computer with AI",
7
+ "long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory and computer control capabilities.",
5
8
  "author": {
6
- "name": "Mediar AI",
9
+ "name": "screenpipe",
7
10
  "url": "https://screenpi.pe"
8
11
  },
9
12
  "repository": {
10
13
  "type": "git",
11
14
  "url": "https://github.com/mediar-ai/screenpipe"
12
15
  },
16
+ "homepage": "https://screenpi.pe",
17
+ "documentation": "https://github.com/mediar-ai/screenpipe/tree/main/screenpipe-integrations/screenpipe-mcp",
18
+ "support": "https://github.com/mediar-ai/screenpipe/issues",
13
19
  "license": "MIT",
14
20
  "server": {
15
21
  "type": "node",
16
- "entry": "dist/index.js"
22
+ "entry_point": "dist/index.js",
23
+ "mcp_config": {
24
+ "command": "node",
25
+ "args": ["${__dirname}/dist/index.js"]
26
+ }
17
27
  },
18
28
  "tools": [
19
29
  {
@@ -49,10 +59,11 @@
49
59
  "description": "Open URLs in browser (macOS only)"
50
60
  }
51
61
  ],
52
- "requirements": {
53
- "screenpipe": "Requires screenpipe to be running on localhost:3030"
62
+ "compatibility": {
63
+ "platforms": ["darwin", "win32", "linux"],
64
+ "runtimes": {
65
+ "node": ">=18.0.0"
66
+ }
54
67
  },
55
- "icon": "icon.png",
56
- "categories": ["productivity", "automation", "search"],
57
- "keywords": ["screen-recording", "ocr", "audio-transcription", "automation", "memory"]
68
+ "keywords": ["screen-recording", "ocr", "audio-transcription", "automation", "memory", "search"]
58
69
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "screenpipe-mcp",
3
- "version": "0.2.0",
3
+ "version": "0.3.1",
4
4
  "description": "MCP server for screenpipe - search your screen recordings, audio transcriptions, and control your computer",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
Binary file
package/src/index.ts CHANGED
@@ -29,7 +29,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
29
29
  const server = new Server(
30
30
  {
31
31
  name: "screenpipe",
32
- version: "0.2.0",
32
+ version: "0.3.1",
33
33
  },
34
34
  {
35
35
  capabilities: {
@@ -45,7 +45,8 @@ const BASE_TOOLS: Tool[] = [
45
45
  description:
46
46
  "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
47
47
  "Use this to find specific content that has appeared on your screen or been spoken. " +
48
- "Results include timestamps, app context, and the content itself.",
48
+ "Results include timestamps, app context, and the content itself. " +
49
+ "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
49
50
  inputSchema: {
50
51
  type: "object",
51
52
  properties: {
@@ -99,6 +100,14 @@ const BASE_TOOLS: Tool[] = [
99
100
  type: "integer",
100
101
  description: "Maximum content length in characters",
101
102
  },
103
+ include_frames: {
104
+ type: "boolean",
105
+ description:
106
+ "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
107
+ "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
108
+ "Note: Images are limited to ~1MB each. Default: false",
109
+ default: false,
110
+ },
102
111
  },
103
112
  },
104
113
  },
@@ -403,6 +412,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
403
412
  try {
404
413
  switch (name) {
405
414
  case "search-content": {
415
+ const includeFrames = args.include_frames === true;
406
416
  const params = new URLSearchParams();
407
417
  for (const [key, value] of Object.entries(args)) {
408
418
  if (value !== null && value !== undefined) {
@@ -424,47 +434,81 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
424
434
  };
425
435
  }
426
436
 
427
- const formattedResults = results
428
- .map((result: any) => {
429
- const content = result.content;
430
- if (!content) return null;
431
-
432
- if (result.type === "OCR") {
433
- return (
434
- `OCR Text: ${content.text || "N/A"}\n` +
435
- `App: ${content.app_name || "N/A"}\n` +
436
- `Window: ${content.window_name || "N/A"}\n` +
437
- `Time: ${content.timestamp || "N/A"}\n` +
438
- "---"
439
- );
440
- } else if (result.type === "Audio") {
441
- return (
442
- `Audio Transcription: ${content.transcription || "N/A"}\n` +
437
+ // Build content array with text and optional images
438
+ const contentItems: Array<
439
+ | { type: "text"; text: string }
440
+ | { type: "image"; data: string; mimeType: string }
441
+ > = [];
442
+
443
+ const formattedResults: string[] = [];
444
+ const images: Array<{ data: string; context: string }> = [];
445
+
446
+ for (const result of results) {
447
+ const content = result.content;
448
+ if (!content) continue;
449
+
450
+ if (result.type === "OCR") {
451
+ const textResult =
452
+ `OCR Text: ${content.text || "N/A"}\n` +
453
+ `App: ${content.app_name || "N/A"}\n` +
454
+ `Window: ${content.window_name || "N/A"}\n` +
455
+ `Time: ${content.timestamp || "N/A"}\n` +
456
+ `Frame ID: ${content.frame_id || "N/A"}\n` +
457
+ "---";
458
+ formattedResults.push(textResult);
459
+
460
+ // Collect frame if available and requested
461
+ if (includeFrames && content.frame) {
462
+ images.push({
463
+ data: content.frame,
464
+ context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
465
+ });
466
+ }
467
+ } else if (result.type === "Audio") {
468
+ formattedResults.push(
469
+ `Audio Transcription: ${content.transcription || "N/A"}\n` +
443
470
  `Device: ${content.device_name || "N/A"}\n` +
444
471
  `Time: ${content.timestamp || "N/A"}\n` +
445
472
  "---"
446
- );
447
- } else if (result.type === "UI") {
448
- return (
449
- `UI Text: ${content.text || "N/A"}\n` +
473
+ );
474
+ } else if (result.type === "UI") {
475
+ formattedResults.push(
476
+ `UI Text: ${content.text || "N/A"}\n` +
450
477
  `App: ${content.app_name || "N/A"}\n` +
451
478
  `Window: ${content.window_name || "N/A"}\n` +
452
479
  `Time: ${content.timestamp || "N/A"}\n` +
453
480
  "---"
454
- );
455
- }
456
- return null;
457
- })
458
- .filter(Boolean);
481
+ );
482
+ }
483
+ }
459
484
 
460
- return {
461
- content: [
462
- {
463
- type: "text",
464
- text: "Search Results:\n\n" + formattedResults.join("\n"),
465
- },
466
- ],
467
- };
485
+ // Add text results
486
+ contentItems.push({
487
+ type: "text",
488
+ text:
489
+ "Search Results:\n\n" +
490
+ formattedResults.join("\n") +
491
+ (images.length > 0
492
+ ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
493
+ : ""),
494
+ });
495
+
496
+ // Add images if requested and available
497
+ for (const img of images) {
498
+ // Add context for the image
499
+ contentItems.push({
500
+ type: "text",
501
+ text: `\nšŸ“· ${img.context}`,
502
+ });
503
+ // Add the image itself
504
+ contentItems.push({
505
+ type: "image",
506
+ data: img.data,
507
+ mimeType: "image/png",
508
+ });
509
+ }
510
+
511
+ return { content: contentItems };
468
512
  }
469
513
 
470
514
  case "pixel-control": {