npm - screenpipe-mcp - Versions diffs - 0.2.0 → 0.3.1 - Mend

screenpipe-mcp 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -21,7 +21,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
 // Initialize server
 const server = new index_js_1.Server({
     name: "screenpipe",
-    version: "0.2.0",
+    version: "0.3.1",
 }, {
     capabilities: {
         tools: {},
@@ -33,7 +33,8 @@ const BASE_TOOLS = [
         name: "search-content",
         description: "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
             "Use this to find specific content that has appeared on your screen or been spoken. " +
-            "Results include timestamps, app context, and the content itself.",
+            "Results include timestamps, app context, and the content itself. " +
+            "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
         inputSchema: {
             type: "object",
             properties: {
@@ -83,6 +84,13 @@ const BASE_TOOLS = [
                     type: "integer",
                     description: "Maximum content length in characters",
                 },
+                include_frames: {
+                    type: "boolean",
+                    description: "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
+                        "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
+                        "Note: Images are limited to ~1MB each. Default: false",
+                    default: false,
+                },
             },
         },
     },
@@ -370,6 +378,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
     try {
         switch (name) {
             case "search-content": {
+                const includeFrames = args.include_frames === true;
                 const params = new URLSearchParams();
                 for (const [key, value] of Object.entries(args)) {
                     if (value !== null && value !== undefined) {
@@ -387,42 +396,68 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
                         content: [{ type: "text", text: "No results found" }],
                     };
                 }
-                const formattedResults = results
-                    .map((result) => {
+                // Build content array with text and optional images
+                const contentItems = [];
+                const formattedResults = [];
+                const images = [];
+                for (const result of results) {
                     const content = result.content;
                     if (!content)
-                        return null;
+                        continue;
                     if (result.type === "OCR") {
-                        return (`OCR Text: ${content.text || "N/A"}\n` +
+                        const textResult = `OCR Text: ${content.text || "N/A"}\n` +
                             `App: ${content.app_name || "N/A"}\n` +
                             `Window: ${content.window_name || "N/A"}\n` +
                             `Time: ${content.timestamp || "N/A"}\n` +
-                            "---");
+                            `Frame ID: ${content.frame_id || "N/A"}\n` +
+                            "---";
+                        formattedResults.push(textResult);
+                        // Collect frame if available and requested
+                        if (includeFrames && content.frame) {
+                            images.push({
+                                data: content.frame,
+                                context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
+                            });
+                        }
                     }
                     else if (result.type === "Audio") {
-                        return (`Audio Transcription: ${content.transcription || "N/A"}\n` +
+                        formattedResults.push(`Audio Transcription: ${content.transcription || "N/A"}\n` +
                             `Device: ${content.device_name || "N/A"}\n` +
                             `Time: ${content.timestamp || "N/A"}\n` +
                             "---");
                     }
                     else if (result.type === "UI") {
-                        return (`UI Text: ${content.text || "N/A"}\n` +
+                        formattedResults.push(`UI Text: ${content.text || "N/A"}\n` +
                             `App: ${content.app_name || "N/A"}\n` +
                             `Window: ${content.window_name || "N/A"}\n` +
                             `Time: ${content.timestamp || "N/A"}\n` +
                             "---");
                     }
-                    return null;
-                })
-                    .filter(Boolean);
-                return {
-                    content: [
-                        {
-                            type: "text",
-                            text: "Search Results:\n\n" + formattedResults.join("\n"),
-                        },
-                    ],
-                };
+                }
+                // Add text results
+                contentItems.push({
+                    type: "text",
+                    text: "Search Results:\n\n" +
+                        formattedResults.join("\n") +
+                        (images.length > 0
+                            ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
+                            : ""),
+                });
+                // Add images if requested and available
+                for (const img of images) {
+                    // Add context for the image
+                    contentItems.push({
+                        type: "text",
+                        text: `\n📷 ${img.context}`,
+                    });
+                    // Add the image itself
+                    contentItems.push({
+                        type: "image",
+                        data: img.data,
+                        mimeType: "image/png",
+                    });
+                }
+                return { content: contentItems };
             }
             case "pixel-control": {
                 const action = {

package/manifest.json CHANGED Viewed

@@ -1,19 +1,29 @@
 {
+  "manifest_version": "0.3",
   "name": "screenpipe",
-  "version": "0.2.0",
+  "display_name": "Screenpipe",
+  "version": "0.3.1",
   "description": "Search your screen recordings, audio transcriptions, and control your computer with AI",
+  "long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory and computer control capabilities.",
   "author": {
-    "name": "Mediar AI",
+    "name": "screenpipe",
     "url": "https://screenpi.pe"
   },
   "repository": {
     "type": "git",
     "url": "https://github.com/mediar-ai/screenpipe"
   },
+  "homepage": "https://screenpi.pe",
+  "documentation": "https://github.com/mediar-ai/screenpipe/tree/main/screenpipe-integrations/screenpipe-mcp",
+  "support": "https://github.com/mediar-ai/screenpipe/issues",
   "license": "MIT",
   "server": {
     "type": "node",
-    "entry": "dist/index.js"
+    "entry_point": "dist/index.js",
+    "mcp_config": {
+      "command": "node",
+      "args": ["${__dirname}/dist/index.js"]
+    }
   },
   "tools": [
     {
@@ -49,10 +59,11 @@
       "description": "Open URLs in browser (macOS only)"
     }
   ],
-  "requirements": {
-    "screenpipe": "Requires screenpipe to be running on localhost:3030"
+  "compatibility": {
+    "platforms": ["darwin", "win32", "linux"],
+    "runtimes": {
+      "node": ">=18.0.0"
+    }
   },
-  "icon": "icon.png",
-  "categories": ["productivity", "automation", "search"],
-  "keywords": ["screen-recording", "ocr", "audio-transcription", "automation", "memory"]
+  "keywords": ["screen-recording", "ocr", "audio-transcription", "automation", "memory", "search"]
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "screenpipe-mcp",
-  "version": "0.2.0",
+  "version": "0.3.1",
   "description": "MCP server for screenpipe - search your screen recordings, audio transcriptions, and control your computer",
   "main": "dist/index.js",
   "bin": {

package/screenpipe-mcp.mcpb ADDED Viewed

Binary file

package/src/index.ts CHANGED Viewed

@@ -29,7 +29,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
 const server = new Server(
   {
     name: "screenpipe",
-    version: "0.2.0",
+    version: "0.3.1",
   },
   {
     capabilities: {
@@ -45,7 +45,8 @@ const BASE_TOOLS: Tool[] = [
     description:
       "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
       "Use this to find specific content that has appeared on your screen or been spoken. " +
-      "Results include timestamps, app context, and the content itself.",
+      "Results include timestamps, app context, and the content itself. " +
+      "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
     inputSchema: {
       type: "object",
       properties: {
@@ -99,6 +100,14 @@ const BASE_TOOLS: Tool[] = [
           type: "integer",
           description: "Maximum content length in characters",
         },
+        include_frames: {
+          type: "boolean",
+          description:
+            "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
+            "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
+            "Note: Images are limited to ~1MB each. Default: false",
+          default: false,
+        },
       },
     },
   },
@@ -403,6 +412,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
   try {
     switch (name) {
       case "search-content": {
+        const includeFrames = args.include_frames === true;
         const params = new URLSearchParams();
         for (const [key, value] of Object.entries(args)) {
           if (value !== null && value !== undefined) {
@@ -424,47 +434,81 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
           };
         }
-        const formattedResults = results
-          .map((result: any) => {
-            const content = result.content;
-            if (!content) return null;
-            if (result.type === "OCR") {
-              return (
-                `OCR Text: ${content.text || "N/A"}\n` +
-                `App: ${content.app_name || "N/A"}\n` +
-                `Window: ${content.window_name || "N/A"}\n` +
-                `Time: ${content.timestamp || "N/A"}\n` +
-                "---"
-              );
-            } else if (result.type === "Audio") {
-              return (
-                `Audio Transcription: ${content.transcription || "N/A"}\n` +
+        // Build content array with text and optional images
+        const contentItems: Array<
+          | { type: "text"; text: string }
+          | { type: "image"; data: string; mimeType: string }
+        > = [];
+        const formattedResults: string[] = [];
+        const images: Array<{ data: string; context: string }> = [];
+        for (const result of results) {
+          const content = result.content;
+          if (!content) continue;
+          if (result.type === "OCR") {
+            const textResult =
+              `OCR Text: ${content.text || "N/A"}\n` +
+              `App: ${content.app_name || "N/A"}\n` +
+              `Window: ${content.window_name || "N/A"}\n` +
+              `Time: ${content.timestamp || "N/A"}\n` +
+              `Frame ID: ${content.frame_id || "N/A"}\n` +
+              "---";
+            formattedResults.push(textResult);
+            // Collect frame if available and requested
+            if (includeFrames && content.frame) {
+              images.push({
+                data: content.frame,
+                context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
+              });
+            }
+          } else if (result.type === "Audio") {
+            formattedResults.push(
+              `Audio Transcription: ${content.transcription || "N/A"}\n` +
                 `Device: ${content.device_name || "N/A"}\n` +
                 `Time: ${content.timestamp || "N/A"}\n` +
                 "---"
-              );
-            } else if (result.type === "UI") {
-              return (
-                `UI Text: ${content.text || "N/A"}\n` +
+            );
+          } else if (result.type === "UI") {
+            formattedResults.push(
+              `UI Text: ${content.text || "N/A"}\n` +
                 `App: ${content.app_name || "N/A"}\n` +
                 `Window: ${content.window_name || "N/A"}\n` +
                 `Time: ${content.timestamp || "N/A"}\n` +
                 "---"
-              );
-            }
-            return null;
-          })
-          .filter(Boolean);
+            );
+          }
+        }
-        return {
-          content: [
-            {
-              type: "text",
-              text: "Search Results:\n\n" + formattedResults.join("\n"),
-            },
-          ],
-        };
+        // Add text results
+        contentItems.push({
+          type: "text",
+          text:
+            "Search Results:\n\n" +
+            formattedResults.join("\n") +
+            (images.length > 0
+              ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
+              : ""),
+        });
+        // Add images if requested and available
+        for (const img of images) {
+          // Add context for the image
+          contentItems.push({
+            type: "text",
+            text: `\n📷 ${img.context}`,
+          });
+          // Add the image itself
+          contentItems.push({
+            type: "image",
+            data: img.data,
+            mimeType: "image/png",
+          });
+        }
+        return { content: contentItems };
       }
       case "pixel-control": {