screenpipe-mcp 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -23
- package/dist/index.js +257 -15
- package/manifest.json +7 -35
- package/package.json +1 -1
- package/src/index.ts +273 -15
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Screenpipe MCP Server
|
|
2
2
|
|
|
3
|
-
<a href="https://www.pulsemcp.com/servers/
|
|
3
|
+
<a href="https://www.pulsemcp.com/servers/screenpipe-screenpipe"><img src="https://www.pulsemcp.com/badge/top-pick/screenpipe-screenpipe" width="400" alt="PulseMCP Badge"></a>
|
|
4
4
|
|
|
5
5
|
<br/>
|
|
6
6
|
|
|
@@ -33,8 +33,8 @@ The easiest way to use screenpipe-mcp is with npx. Edit your Claude Desktop conf
|
|
|
33
33
|
Clone and build from source:
|
|
34
34
|
|
|
35
35
|
```bash
|
|
36
|
-
git clone https://github.com/
|
|
37
|
-
cd screenpipe/screenpipe-integrations/screenpipe-mcp
|
|
36
|
+
git clone https://github.com/screenpipe/screenpipe
|
|
37
|
+
cd screenpipe/crates/screenpipe-integrations/screenpipe-mcp
|
|
38
38
|
npm install
|
|
39
39
|
npm run build
|
|
40
40
|
```
|
|
@@ -64,32 +64,42 @@ npx @modelcontextprotocol/inspector npx screenpipe-mcp
|
|
|
64
64
|
|
|
65
65
|
## Available Tools
|
|
66
66
|
|
|
67
|
-
###
|
|
68
|
-
|
|
69
|
-
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
-
|
|
80
|
-
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
-
|
|
67
|
+
### search-content
|
|
68
|
+
Search through recorded screen content (OCR) and audio transcriptions:
|
|
69
|
+
- Full text search with content type filtering (OCR/Audio/UI)
|
|
70
|
+
- Time range and app/window filtering
|
|
71
|
+
- Speaker filtering (by ID or name)
|
|
72
|
+
- Pagination support
|
|
73
|
+
|
|
74
|
+
### search-ui-events (macOS)
|
|
75
|
+
Search UI input events captured via accessibility APIs. This is the third data modality alongside vision and audio:
|
|
76
|
+
- **Event types**: `click`, `text`, `scroll`, `key`, `app_switch`, `window_focus`, `clipboard`
|
|
77
|
+
- Filter by app, window, time range
|
|
78
|
+
- `text` events show aggregated keyboard input (what was typed)
|
|
79
|
+
- `click` events include accessibility element labels
|
|
80
|
+
- `clipboard` events show copy/paste content
|
|
81
|
+
|
|
82
|
+
### get-ui-event-stats (macOS)
|
|
83
|
+
Get aggregated statistics of UI events:
|
|
84
|
+
- Event counts grouped by app and event type
|
|
85
|
+
- Useful for productivity analysis and app usage tracking
|
|
86
|
+
|
|
87
|
+
### export-video
|
|
88
|
+
Export screen recordings as video files:
|
|
89
|
+
- Specify time range with start/end times
|
|
90
|
+
- Configurable FPS for output video
|
|
85
91
|
|
|
86
92
|
## Example Queries in Claude
|
|
87
93
|
|
|
88
94
|
- "Search for any mentions of 'rust' in my screen recordings"
|
|
89
95
|
- "Find audio transcriptions from the last hour"
|
|
90
96
|
- "Show me what was on my screen in VSCode yesterday"
|
|
91
|
-
- "
|
|
92
|
-
- "Find
|
|
97
|
+
- "Export a video of my screen from 2-3pm today"
|
|
98
|
+
- "Find what John said in our meeting about the database"
|
|
99
|
+
- "What did I type in Slack today?" (uses search-ui-events)
|
|
100
|
+
- "Show me my app usage statistics for the past 3 hours"
|
|
101
|
+
- "What did I copy to clipboard recently?"
|
|
102
|
+
- "Which apps did I switch between most today?"
|
|
93
103
|
|
|
94
104
|
## Requirements
|
|
95
105
|
|
package/dist/index.js
CHANGED
|
@@ -66,7 +66,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
66
66
|
// Initialize server
|
|
67
67
|
const server = new index_js_1.Server({
|
|
68
68
|
name: "screenpipe",
|
|
69
|
-
version: "0.
|
|
69
|
+
version: "0.7.0",
|
|
70
70
|
}, {
|
|
71
71
|
capabilities: {
|
|
72
72
|
tools: {},
|
|
@@ -81,7 +81,11 @@ const BASE_TOOLS = [
|
|
|
81
81
|
description: "Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
|
|
82
82
|
"Returns timestamped results with app context. " +
|
|
83
83
|
"Call with no parameters to get recent activity. " +
|
|
84
|
-
"Use the 'screenpipe://context' resource for current time when building time-based queries
|
|
84
|
+
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
85
|
+
"DEEP LINKS: When referencing specific moments in results, create clickable timeline links:\n" +
|
|
86
|
+
"Format: [readable time](screenpipe://timeline?timestamp=ISO8601_TIMESTAMP)\n" +
|
|
87
|
+
"Example: [10:30 AM](screenpipe://timeline?timestamp=2024-01-15T18:30:00Z)\n" +
|
|
88
|
+
"Users can click these links to jump directly to that moment in their timeline.",
|
|
85
89
|
annotations: {
|
|
86
90
|
title: "Search Content",
|
|
87
91
|
readOnlyHint: true,
|
|
@@ -96,7 +100,7 @@ const BASE_TOOLS = [
|
|
|
96
100
|
content_type: {
|
|
97
101
|
type: "string",
|
|
98
102
|
enum: ["all", "ocr", "audio", "ui"],
|
|
99
|
-
description: "Content type filter. Default: 'all'",
|
|
103
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
100
104
|
default: "all",
|
|
101
105
|
},
|
|
102
106
|
limit: {
|
|
@@ -140,6 +144,14 @@ const BASE_TOOLS = [
|
|
|
140
144
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
141
145
|
default: false,
|
|
142
146
|
},
|
|
147
|
+
speaker_ids: {
|
|
148
|
+
type: "string",
|
|
149
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
150
|
+
},
|
|
151
|
+
speaker_name: {
|
|
152
|
+
type: "string",
|
|
153
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
154
|
+
},
|
|
143
155
|
},
|
|
144
156
|
},
|
|
145
157
|
},
|
|
@@ -177,6 +189,84 @@ const BASE_TOOLS = [
|
|
|
177
189
|
required: ["start_time", "end_time"],
|
|
178
190
|
},
|
|
179
191
|
},
|
|
192
|
+
{
|
|
193
|
+
name: "search-ui-events",
|
|
194
|
+
description: "Search UI input events captured via accessibility APIs (macOS). " +
|
|
195
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
196
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
197
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
198
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
199
|
+
annotations: {
|
|
200
|
+
title: "Search UI Events (Accessibility)",
|
|
201
|
+
readOnlyHint: true,
|
|
202
|
+
},
|
|
203
|
+
inputSchema: {
|
|
204
|
+
type: "object",
|
|
205
|
+
properties: {
|
|
206
|
+
q: {
|
|
207
|
+
type: "string",
|
|
208
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
209
|
+
},
|
|
210
|
+
event_type: {
|
|
211
|
+
type: "string",
|
|
212
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
213
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
214
|
+
},
|
|
215
|
+
app_name: {
|
|
216
|
+
type: "string",
|
|
217
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
218
|
+
},
|
|
219
|
+
window_name: {
|
|
220
|
+
type: "string",
|
|
221
|
+
description: "Filter by window title",
|
|
222
|
+
},
|
|
223
|
+
start_time: {
|
|
224
|
+
type: "string",
|
|
225
|
+
format: "date-time",
|
|
226
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
227
|
+
},
|
|
228
|
+
end_time: {
|
|
229
|
+
type: "string",
|
|
230
|
+
format: "date-time",
|
|
231
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
232
|
+
},
|
|
233
|
+
limit: {
|
|
234
|
+
type: "integer",
|
|
235
|
+
description: "Max results. Default: 50",
|
|
236
|
+
default: 50,
|
|
237
|
+
},
|
|
238
|
+
offset: {
|
|
239
|
+
type: "integer",
|
|
240
|
+
description: "Skip N results for pagination. Default: 0",
|
|
241
|
+
default: 0,
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
name: "get-ui-event-stats",
|
|
248
|
+
description: "Get aggregated statistics of UI events by app and event type. " +
|
|
249
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
250
|
+
annotations: {
|
|
251
|
+
title: "UI Event Statistics",
|
|
252
|
+
readOnlyHint: true,
|
|
253
|
+
},
|
|
254
|
+
inputSchema: {
|
|
255
|
+
type: "object",
|
|
256
|
+
properties: {
|
|
257
|
+
start_time: {
|
|
258
|
+
type: "string",
|
|
259
|
+
format: "date-time",
|
|
260
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
261
|
+
},
|
|
262
|
+
end_time: {
|
|
263
|
+
type: "string",
|
|
264
|
+
format: "date-time",
|
|
265
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
},
|
|
269
|
+
},
|
|
180
270
|
];
|
|
181
271
|
// List tools handler
|
|
182
272
|
server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
|
|
@@ -244,18 +334,20 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
244
334
|
mimeType: "text/markdown",
|
|
245
335
|
text: `# Screenpipe Search Guide
|
|
246
336
|
|
|
337
|
+
## Three Data Modalities
|
|
338
|
+
|
|
339
|
+
Screenpipe captures three types of data:
|
|
340
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
341
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
342
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
343
|
+
|
|
247
344
|
## Quick Start
|
|
248
345
|
- **Get recent activity**: Call search-content with no parameters
|
|
249
346
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
250
|
-
- **
|
|
251
|
-
|
|
252
|
-
## Content Types
|
|
253
|
-
- \`ocr\`: Screen text (what you see)
|
|
254
|
-
- \`audio\`: Transcribed speech
|
|
255
|
-
- \`ui\`: UI element interactions
|
|
256
|
-
- \`all\`: Everything (default)
|
|
347
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
348
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
257
349
|
|
|
258
|
-
##
|
|
350
|
+
## search-content (Vision + Audio)
|
|
259
351
|
| Parameter | Description | Default |
|
|
260
352
|
|-----------|-------------|---------|
|
|
261
353
|
| q | Search query | (none - returns all) |
|
|
@@ -266,11 +358,39 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
266
358
|
| app_name | Filter by app | (no filter) |
|
|
267
359
|
| include_frames | Include screenshots | false |
|
|
268
360
|
|
|
361
|
+
## search-ui-events (Accessibility Data)
|
|
362
|
+
| Parameter | Description | Default |
|
|
363
|
+
|-----------|-------------|---------|
|
|
364
|
+
| q | Search text content, app, window | (none) |
|
|
365
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
366
|
+
| app_name | Filter by application | (no filter) |
|
|
367
|
+
| limit | Max results | 50 |
|
|
368
|
+
|
|
369
|
+
### Event Types
|
|
370
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
371
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
372
|
+
- \`app_switch\`: When user switched applications
|
|
373
|
+
- \`window_focus\`: When window focus changed
|
|
374
|
+
- \`clipboard\`: Copy/paste operations
|
|
375
|
+
- \`scroll\`: Scroll events with delta values
|
|
376
|
+
|
|
269
377
|
## Tips
|
|
270
378
|
1. Read screenpipe://context first to get current timestamps
|
|
271
|
-
2.
|
|
272
|
-
3. Use
|
|
273
|
-
4. Combine
|
|
379
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
380
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
381
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)
|
|
382
|
+
|
|
383
|
+
## Timeline Deep Links
|
|
384
|
+
When showing search results to users, create clickable links to specific moments:
|
|
385
|
+
|
|
386
|
+
**Format:** \`[readable time](screenpipe://timeline?timestamp=ISO8601_TIMESTAMP)\`
|
|
387
|
+
|
|
388
|
+
**Examples:**
|
|
389
|
+
- \`[10:30 AM](screenpipe://timeline?timestamp=2024-01-15T18:30:00Z)\`
|
|
390
|
+
- \`[yesterday at 3pm](screenpipe://timeline?timestamp=2024-01-14T15:00:00Z)\`
|
|
391
|
+
|
|
392
|
+
Users can click these links to jump directly to that moment in their screenpipe timeline.
|
|
393
|
+
Always use the exact timestamp from search results when creating these links.`,
|
|
274
394
|
},
|
|
275
395
|
],
|
|
276
396
|
};
|
|
@@ -592,7 +712,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
592
712
|
// Sort frame IDs
|
|
593
713
|
frameIds.sort((a, b) => a - b);
|
|
594
714
|
// Step 2: Connect to WebSocket and export video
|
|
595
|
-
|
|
715
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
716
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
596
717
|
const exportResult = await new Promise((resolve) => {
|
|
597
718
|
const ws = new ws_1.WebSocket(wsUrl);
|
|
598
719
|
let resolved = false;
|
|
@@ -603,6 +724,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
603
724
|
resolve({ success: false, error: "Export timed out after 5 minutes" });
|
|
604
725
|
}
|
|
605
726
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
727
|
+
ws.on("open", () => {
|
|
728
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
729
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
730
|
+
});
|
|
606
731
|
ws.on("error", (error) => {
|
|
607
732
|
if (!resolved) {
|
|
608
733
|
resolved = true;
|
|
@@ -674,6 +799,123 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
674
799
|
};
|
|
675
800
|
}
|
|
676
801
|
}
|
|
802
|
+
case "search-ui-events": {
|
|
803
|
+
const params = new URLSearchParams();
|
|
804
|
+
for (const [key, value] of Object.entries(args)) {
|
|
805
|
+
if (value !== null && value !== undefined) {
|
|
806
|
+
// Map event_type to the API parameter
|
|
807
|
+
params.append(key, String(value));
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
811
|
+
if (!response.ok) {
|
|
812
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
813
|
+
}
|
|
814
|
+
const data = await response.json();
|
|
815
|
+
const events = data.data || [];
|
|
816
|
+
const pagination = data.pagination || {};
|
|
817
|
+
if (events.length === 0) {
|
|
818
|
+
return {
|
|
819
|
+
content: [
|
|
820
|
+
{
|
|
821
|
+
type: "text",
|
|
822
|
+
text: "No UI events found. This feature requires:\n" +
|
|
823
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
824
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
825
|
+
"Try: broader time range or different event_type filter.",
|
|
826
|
+
},
|
|
827
|
+
],
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
const formattedEvents = [];
|
|
831
|
+
for (const event of events) {
|
|
832
|
+
const parts = [
|
|
833
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
834
|
+
event.app_name || "?",
|
|
835
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
836
|
+
];
|
|
837
|
+
let details = "";
|
|
838
|
+
if (event.event_type === "text" && event.text_content) {
|
|
839
|
+
details = `Text: "${event.text_content}"`;
|
|
840
|
+
}
|
|
841
|
+
else if (event.event_type === "click") {
|
|
842
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
843
|
+
if (event.element?.label) {
|
|
844
|
+
details += ` on "${event.element.label}"`;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
else if (event.event_type === "clipboard" && event.text_content) {
|
|
848
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
849
|
+
}
|
|
850
|
+
else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
851
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
852
|
+
}
|
|
853
|
+
else if (event.event_type === "scroll") {
|
|
854
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
855
|
+
}
|
|
856
|
+
formattedEvents.push(`${parts.join(" ")}\n` +
|
|
857
|
+
`${event.timestamp || ""}\n` +
|
|
858
|
+
`${details}`);
|
|
859
|
+
}
|
|
860
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
861
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
862
|
+
return {
|
|
863
|
+
content: [
|
|
864
|
+
{
|
|
865
|
+
type: "text",
|
|
866
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
867
|
+
},
|
|
868
|
+
],
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
case "get-ui-event-stats": {
|
|
872
|
+
const params = new URLSearchParams();
|
|
873
|
+
if (args.start_time)
|
|
874
|
+
params.append("start_time", String(args.start_time));
|
|
875
|
+
if (args.end_time)
|
|
876
|
+
params.append("end_time", String(args.end_time));
|
|
877
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
878
|
+
if (!response.ok) {
|
|
879
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
880
|
+
}
|
|
881
|
+
const stats = await response.json();
|
|
882
|
+
if (!stats || stats.length === 0) {
|
|
883
|
+
return {
|
|
884
|
+
content: [
|
|
885
|
+
{
|
|
886
|
+
type: "text",
|
|
887
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
888
|
+
},
|
|
889
|
+
],
|
|
890
|
+
};
|
|
891
|
+
}
|
|
892
|
+
// Group by app
|
|
893
|
+
const byApp = {};
|
|
894
|
+
for (const stat of stats) {
|
|
895
|
+
const app = stat.app_name || "Unknown";
|
|
896
|
+
if (!byApp[app]) {
|
|
897
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
898
|
+
}
|
|
899
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
900
|
+
byApp[app].total += stat.count;
|
|
901
|
+
}
|
|
902
|
+
// Sort by total events
|
|
903
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
904
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
905
|
+
const eventDetails = Object.entries(events)
|
|
906
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
907
|
+
.join(", ");
|
|
908
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
909
|
+
});
|
|
910
|
+
return {
|
|
911
|
+
content: [
|
|
912
|
+
{
|
|
913
|
+
type: "text",
|
|
914
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
915
|
+
},
|
|
916
|
+
],
|
|
917
|
+
};
|
|
918
|
+
}
|
|
677
919
|
default:
|
|
678
920
|
throw new Error(`Unknown tool: ${name}`);
|
|
679
921
|
}
|
package/manifest.json
CHANGED
|
@@ -2,20 +2,20 @@
|
|
|
2
2
|
"manifest_version": "0.3",
|
|
3
3
|
"name": "screenpipe",
|
|
4
4
|
"display_name": "Screenpipe",
|
|
5
|
-
"version": "0.
|
|
6
|
-
"description": "Search your screen recordings
|
|
7
|
-
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory
|
|
5
|
+
"version": "0.8.0",
|
|
6
|
+
"description": "Search your screen recordings and audio transcriptions with AI",
|
|
7
|
+
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory.",
|
|
8
8
|
"author": {
|
|
9
9
|
"name": "screenpipe",
|
|
10
10
|
"url": "https://screenpi.pe"
|
|
11
11
|
},
|
|
12
12
|
"repository": {
|
|
13
13
|
"type": "git",
|
|
14
|
-
"url": "https://github.com/
|
|
14
|
+
"url": "https://github.com/screenpipe/screenpipe"
|
|
15
15
|
},
|
|
16
16
|
"homepage": "https://screenpi.pe",
|
|
17
|
-
"documentation": "https://github.com/
|
|
18
|
-
"support": "https://github.com/
|
|
17
|
+
"documentation": "https://github.com/screenpipe/screenpipe/tree/main/crates/screenpipe-integrations/screenpipe-mcp",
|
|
18
|
+
"support": "https://github.com/screenpipe/screenpipe/issues",
|
|
19
19
|
"license": "MIT",
|
|
20
20
|
"server": {
|
|
21
21
|
"type": "node",
|
|
@@ -28,39 +28,11 @@
|
|
|
28
28
|
"tools": [
|
|
29
29
|
{
|
|
30
30
|
"name": "search-content",
|
|
31
|
-
"description": "Search through recorded screen content, audio transcriptions, and UI elements"
|
|
31
|
+
"description": "Search through recorded screen content, audio transcriptions, and UI elements with speaker filtering"
|
|
32
32
|
},
|
|
33
33
|
{
|
|
34
34
|
"name": "export-video",
|
|
35
35
|
"description": "Export screen recordings as MP4 video for a specific time range"
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
"name": "pixel-control",
|
|
39
|
-
"description": "Control mouse and keyboard (type text, press keys, move mouse, click)"
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
"name": "find-elements",
|
|
43
|
-
"description": "Find UI elements in applications by role (macOS only)"
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"name": "click-element",
|
|
47
|
-
"description": "Click UI elements by ID (macOS only)"
|
|
48
|
-
},
|
|
49
|
-
{
|
|
50
|
-
"name": "fill-element",
|
|
51
|
-
"description": "Type text into UI elements (macOS only)"
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"name": "scroll-element",
|
|
55
|
-
"description": "Scroll UI elements (macOS only)"
|
|
56
|
-
},
|
|
57
|
-
{
|
|
58
|
-
"name": "open-application",
|
|
59
|
-
"description": "Open applications by name (macOS only)"
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"name": "open-url",
|
|
63
|
-
"description": "Open URLs in browser (macOS only)"
|
|
64
36
|
}
|
|
65
37
|
],
|
|
66
38
|
"compatibility": {
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -45,7 +45,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
45
45
|
const server = new Server(
|
|
46
46
|
{
|
|
47
47
|
name: "screenpipe",
|
|
48
|
-
version: "0.
|
|
48
|
+
version: "0.7.0",
|
|
49
49
|
},
|
|
50
50
|
{
|
|
51
51
|
capabilities: {
|
|
@@ -64,7 +64,11 @@ const BASE_TOOLS: Tool[] = [
|
|
|
64
64
|
"Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
|
|
65
65
|
"Returns timestamped results with app context. " +
|
|
66
66
|
"Call with no parameters to get recent activity. " +
|
|
67
|
-
"Use the 'screenpipe://context' resource for current time when building time-based queries
|
|
67
|
+
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
68
|
+
"DEEP LINKS: When referencing specific moments in results, create clickable timeline links:\n" +
|
|
69
|
+
"Format: [readable time](screenpipe://timeline?timestamp=ISO8601_TIMESTAMP)\n" +
|
|
70
|
+
"Example: [10:30 AM](screenpipe://timeline?timestamp=2024-01-15T18:30:00Z)\n" +
|
|
71
|
+
"Users can click these links to jump directly to that moment in their timeline.",
|
|
68
72
|
annotations: {
|
|
69
73
|
title: "Search Content",
|
|
70
74
|
readOnlyHint: true,
|
|
@@ -79,7 +83,7 @@ const BASE_TOOLS: Tool[] = [
|
|
|
79
83
|
content_type: {
|
|
80
84
|
type: "string",
|
|
81
85
|
enum: ["all", "ocr", "audio", "ui"],
|
|
82
|
-
description: "Content type filter. Default: 'all'",
|
|
86
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
83
87
|
default: "all",
|
|
84
88
|
},
|
|
85
89
|
limit: {
|
|
@@ -123,6 +127,14 @@ const BASE_TOOLS: Tool[] = [
|
|
|
123
127
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
124
128
|
default: false,
|
|
125
129
|
},
|
|
130
|
+
speaker_ids: {
|
|
131
|
+
type: "string",
|
|
132
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
133
|
+
},
|
|
134
|
+
speaker_name: {
|
|
135
|
+
type: "string",
|
|
136
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
137
|
+
},
|
|
126
138
|
},
|
|
127
139
|
},
|
|
128
140
|
},
|
|
@@ -164,6 +176,86 @@ const BASE_TOOLS: Tool[] = [
|
|
|
164
176
|
required: ["start_time", "end_time"],
|
|
165
177
|
},
|
|
166
178
|
},
|
|
179
|
+
{
|
|
180
|
+
name: "search-ui-events",
|
|
181
|
+
description:
|
|
182
|
+
"Search UI input events captured via accessibility APIs (macOS). " +
|
|
183
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
184
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
185
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
186
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
187
|
+
annotations: {
|
|
188
|
+
title: "Search UI Events (Accessibility)",
|
|
189
|
+
readOnlyHint: true,
|
|
190
|
+
},
|
|
191
|
+
inputSchema: {
|
|
192
|
+
type: "object",
|
|
193
|
+
properties: {
|
|
194
|
+
q: {
|
|
195
|
+
type: "string",
|
|
196
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
197
|
+
},
|
|
198
|
+
event_type: {
|
|
199
|
+
type: "string",
|
|
200
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
201
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
202
|
+
},
|
|
203
|
+
app_name: {
|
|
204
|
+
type: "string",
|
|
205
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
206
|
+
},
|
|
207
|
+
window_name: {
|
|
208
|
+
type: "string",
|
|
209
|
+
description: "Filter by window title",
|
|
210
|
+
},
|
|
211
|
+
start_time: {
|
|
212
|
+
type: "string",
|
|
213
|
+
format: "date-time",
|
|
214
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
215
|
+
},
|
|
216
|
+
end_time: {
|
|
217
|
+
type: "string",
|
|
218
|
+
format: "date-time",
|
|
219
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
220
|
+
},
|
|
221
|
+
limit: {
|
|
222
|
+
type: "integer",
|
|
223
|
+
description: "Max results. Default: 50",
|
|
224
|
+
default: 50,
|
|
225
|
+
},
|
|
226
|
+
offset: {
|
|
227
|
+
type: "integer",
|
|
228
|
+
description: "Skip N results for pagination. Default: 0",
|
|
229
|
+
default: 0,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
name: "get-ui-event-stats",
|
|
236
|
+
description:
|
|
237
|
+
"Get aggregated statistics of UI events by app and event type. " +
|
|
238
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
239
|
+
annotations: {
|
|
240
|
+
title: "UI Event Statistics",
|
|
241
|
+
readOnlyHint: true,
|
|
242
|
+
},
|
|
243
|
+
inputSchema: {
|
|
244
|
+
type: "object",
|
|
245
|
+
properties: {
|
|
246
|
+
start_time: {
|
|
247
|
+
type: "string",
|
|
248
|
+
format: "date-time",
|
|
249
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
250
|
+
},
|
|
251
|
+
end_time: {
|
|
252
|
+
type: "string",
|
|
253
|
+
format: "date-time",
|
|
254
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
255
|
+
},
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
},
|
|
167
259
|
];
|
|
168
260
|
|
|
169
261
|
// List tools handler
|
|
@@ -237,18 +329,20 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
237
329
|
mimeType: "text/markdown",
|
|
238
330
|
text: `# Screenpipe Search Guide
|
|
239
331
|
|
|
332
|
+
## Three Data Modalities
|
|
333
|
+
|
|
334
|
+
Screenpipe captures three types of data:
|
|
335
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
336
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
337
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
338
|
+
|
|
240
339
|
## Quick Start
|
|
241
340
|
- **Get recent activity**: Call search-content with no parameters
|
|
242
341
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
243
|
-
- **
|
|
244
|
-
|
|
245
|
-
## Content Types
|
|
246
|
-
- \`ocr\`: Screen text (what you see)
|
|
247
|
-
- \`audio\`: Transcribed speech
|
|
248
|
-
- \`ui\`: UI element interactions
|
|
249
|
-
- \`all\`: Everything (default)
|
|
342
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
343
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
250
344
|
|
|
251
|
-
##
|
|
345
|
+
## search-content (Vision + Audio)
|
|
252
346
|
| Parameter | Description | Default |
|
|
253
347
|
|-----------|-------------|---------|
|
|
254
348
|
| q | Search query | (none - returns all) |
|
|
@@ -259,11 +353,39 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
259
353
|
| app_name | Filter by app | (no filter) |
|
|
260
354
|
| include_frames | Include screenshots | false |
|
|
261
355
|
|
|
356
|
+
## search-ui-events (Accessibility Data)
|
|
357
|
+
| Parameter | Description | Default |
|
|
358
|
+
|-----------|-------------|---------|
|
|
359
|
+
| q | Search text content, app, window | (none) |
|
|
360
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
361
|
+
| app_name | Filter by application | (no filter) |
|
|
362
|
+
| limit | Max results | 50 |
|
|
363
|
+
|
|
364
|
+
### Event Types
|
|
365
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
366
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
367
|
+
- \`app_switch\`: When user switched applications
|
|
368
|
+
- \`window_focus\`: When window focus changed
|
|
369
|
+
- \`clipboard\`: Copy/paste operations
|
|
370
|
+
- \`scroll\`: Scroll events with delta values
|
|
371
|
+
|
|
262
372
|
## Tips
|
|
263
373
|
1. Read screenpipe://context first to get current timestamps
|
|
264
|
-
2.
|
|
265
|
-
3. Use
|
|
266
|
-
4. Combine
|
|
374
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
375
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
376
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)
|
|
377
|
+
|
|
378
|
+
## Timeline Deep Links
|
|
379
|
+
When showing search results to users, create clickable links to specific moments:
|
|
380
|
+
|
|
381
|
+
**Format:** \`[readable time](screenpipe://timeline?timestamp=ISO8601_TIMESTAMP)\`
|
|
382
|
+
|
|
383
|
+
**Examples:**
|
|
384
|
+
- \`[10:30 AM](screenpipe://timeline?timestamp=2024-01-15T18:30:00Z)\`
|
|
385
|
+
- \`[yesterday at 3pm](screenpipe://timeline?timestamp=2024-01-14T15:00:00Z)\`
|
|
386
|
+
|
|
387
|
+
Users can click these links to jump directly to that moment in their screenpipe timeline.
|
|
388
|
+
Always use the exact timestamp from search results when creating these links.`,
|
|
267
389
|
},
|
|
268
390
|
],
|
|
269
391
|
};
|
|
@@ -630,7 +752,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
630
752
|
frameIds.sort((a, b) => a - b);
|
|
631
753
|
|
|
632
754
|
// Step 2: Connect to WebSocket and export video
|
|
633
|
-
|
|
755
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
756
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
634
757
|
|
|
635
758
|
const exportResult = await new Promise<{
|
|
636
759
|
success: boolean;
|
|
@@ -649,6 +772,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
649
772
|
}
|
|
650
773
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
651
774
|
|
|
775
|
+
ws.on("open", () => {
|
|
776
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
777
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
778
|
+
});
|
|
779
|
+
|
|
652
780
|
ws.on("error", (error) => {
|
|
653
781
|
if (!resolved) {
|
|
654
782
|
resolved = true;
|
|
@@ -724,6 +852,136 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
724
852
|
}
|
|
725
853
|
}
|
|
726
854
|
|
|
855
|
+
case "search-ui-events": {
|
|
856
|
+
const params = new URLSearchParams();
|
|
857
|
+
for (const [key, value] of Object.entries(args)) {
|
|
858
|
+
if (value !== null && value !== undefined) {
|
|
859
|
+
// Map event_type to the API parameter
|
|
860
|
+
params.append(key, String(value));
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
865
|
+
if (!response.ok) {
|
|
866
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
const data = await response.json();
|
|
870
|
+
const events = data.data || [];
|
|
871
|
+
const pagination = data.pagination || {};
|
|
872
|
+
|
|
873
|
+
if (events.length === 0) {
|
|
874
|
+
return {
|
|
875
|
+
content: [
|
|
876
|
+
{
|
|
877
|
+
type: "text",
|
|
878
|
+
text: "No UI events found. This feature requires:\n" +
|
|
879
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
880
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
881
|
+
"Try: broader time range or different event_type filter.",
|
|
882
|
+
},
|
|
883
|
+
],
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
const formattedEvents: string[] = [];
|
|
888
|
+
for (const event of events) {
|
|
889
|
+
const parts = [
|
|
890
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
891
|
+
event.app_name || "?",
|
|
892
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
893
|
+
];
|
|
894
|
+
|
|
895
|
+
let details = "";
|
|
896
|
+
if (event.event_type === "text" && event.text_content) {
|
|
897
|
+
details = `Text: "${event.text_content}"`;
|
|
898
|
+
} else if (event.event_type === "click") {
|
|
899
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
900
|
+
if (event.element?.label) {
|
|
901
|
+
details += ` on "${event.element.label}"`;
|
|
902
|
+
}
|
|
903
|
+
} else if (event.event_type === "clipboard" && event.text_content) {
|
|
904
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
905
|
+
} else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
906
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
907
|
+
} else if (event.event_type === "scroll") {
|
|
908
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
formattedEvents.push(
|
|
912
|
+
`${parts.join(" ")}\n` +
|
|
913
|
+
`${event.timestamp || ""}\n` +
|
|
914
|
+
`${details}`
|
|
915
|
+
);
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
919
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
920
|
+
|
|
921
|
+
return {
|
|
922
|
+
content: [
|
|
923
|
+
{
|
|
924
|
+
type: "text",
|
|
925
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
926
|
+
},
|
|
927
|
+
],
|
|
928
|
+
};
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
case "get-ui-event-stats": {
|
|
932
|
+
const params = new URLSearchParams();
|
|
933
|
+
if (args.start_time) params.append("start_time", String(args.start_time));
|
|
934
|
+
if (args.end_time) params.append("end_time", String(args.end_time));
|
|
935
|
+
|
|
936
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
937
|
+
if (!response.ok) {
|
|
938
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
const stats = await response.json();
|
|
942
|
+
|
|
943
|
+
if (!stats || stats.length === 0) {
|
|
944
|
+
return {
|
|
945
|
+
content: [
|
|
946
|
+
{
|
|
947
|
+
type: "text",
|
|
948
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
949
|
+
},
|
|
950
|
+
],
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
// Group by app
|
|
955
|
+
const byApp: Record<string, { app: string; events: Record<string, number>; total: number }> = {};
|
|
956
|
+
for (const stat of stats) {
|
|
957
|
+
const app = stat.app_name || "Unknown";
|
|
958
|
+
if (!byApp[app]) {
|
|
959
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
960
|
+
}
|
|
961
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
962
|
+
byApp[app].total += stat.count;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
// Sort by total events
|
|
966
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
967
|
+
|
|
968
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
969
|
+
const eventDetails = Object.entries(events)
|
|
970
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
971
|
+
.join(", ");
|
|
972
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
973
|
+
});
|
|
974
|
+
|
|
975
|
+
return {
|
|
976
|
+
content: [
|
|
977
|
+
{
|
|
978
|
+
type: "text",
|
|
979
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
980
|
+
},
|
|
981
|
+
],
|
|
982
|
+
};
|
|
983
|
+
}
|
|
984
|
+
|
|
727
985
|
default:
|
|
728
986
|
throw new Error(`Unknown tool: ${name}`);
|
|
729
987
|
}
|