screenpipe-mcp 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -20
- package/dist/index.js +240 -14
- package/manifest.json +3 -31
- package/package.json +1 -1
- package/src/index.ts +256 -14
package/README.md
CHANGED
|
@@ -64,32 +64,42 @@ npx @modelcontextprotocol/inspector npx screenpipe-mcp
|
|
|
64
64
|
|
|
65
65
|
## Available Tools
|
|
66
66
|
|
|
67
|
-
###
|
|
68
|
-
|
|
69
|
-
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
-
|
|
80
|
-
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
-
|
|
67
|
+
### search-content
|
|
68
|
+
Search through recorded screen content (OCR) and audio transcriptions:
|
|
69
|
+
- Full text search with content type filtering (OCR/Audio/UI)
|
|
70
|
+
- Time range and app/window filtering
|
|
71
|
+
- Speaker filtering (by ID or name)
|
|
72
|
+
- Pagination support
|
|
73
|
+
|
|
74
|
+
### search-ui-events (macOS)
|
|
75
|
+
Search UI input events captured via accessibility APIs. This is the third data modality alongside vision and audio:
|
|
76
|
+
- **Event types**: `click`, `text`, `scroll`, `key`, `app_switch`, `window_focus`, `clipboard`
|
|
77
|
+
- Filter by app, window, time range
|
|
78
|
+
- `text` events show aggregated keyboard input (what was typed)
|
|
79
|
+
- `click` events include accessibility element labels
|
|
80
|
+
- `clipboard` events show copy/paste content
|
|
81
|
+
|
|
82
|
+
### get-ui-event-stats (macOS)
|
|
83
|
+
Get aggregated statistics of UI events:
|
|
84
|
+
- Event counts grouped by app and event type
|
|
85
|
+
- Useful for productivity analysis and app usage tracking
|
|
86
|
+
|
|
87
|
+
### export-video
|
|
88
|
+
Export screen recordings as video files:
|
|
89
|
+
- Specify time range with start/end times
|
|
90
|
+
- Configurable FPS for output video
|
|
85
91
|
|
|
86
92
|
## Example Queries in Claude
|
|
87
93
|
|
|
88
94
|
- "Search for any mentions of 'rust' in my screen recordings"
|
|
89
95
|
- "Find audio transcriptions from the last hour"
|
|
90
96
|
- "Show me what was on my screen in VSCode yesterday"
|
|
91
|
-
- "
|
|
92
|
-
- "Find
|
|
97
|
+
- "Export a video of my screen from 2-3pm today"
|
|
98
|
+
- "Find what John said in our meeting about the database"
|
|
99
|
+
- "What did I type in Slack today?" (uses search-ui-events)
|
|
100
|
+
- "Show me my app usage statistics for the past 3 hours"
|
|
101
|
+
- "What did I copy to clipboard recently?"
|
|
102
|
+
- "Which apps did I switch between most today?"
|
|
93
103
|
|
|
94
104
|
## Requirements
|
|
95
105
|
|
package/dist/index.js
CHANGED
|
@@ -66,7 +66,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
66
66
|
// Initialize server
|
|
67
67
|
const server = new index_js_1.Server({
|
|
68
68
|
name: "screenpipe",
|
|
69
|
-
version: "0.
|
|
69
|
+
version: "0.7.0",
|
|
70
70
|
}, {
|
|
71
71
|
capabilities: {
|
|
72
72
|
tools: {},
|
|
@@ -96,7 +96,7 @@ const BASE_TOOLS = [
|
|
|
96
96
|
content_type: {
|
|
97
97
|
type: "string",
|
|
98
98
|
enum: ["all", "ocr", "audio", "ui"],
|
|
99
|
-
description: "Content type filter. Default: 'all'",
|
|
99
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
100
100
|
default: "all",
|
|
101
101
|
},
|
|
102
102
|
limit: {
|
|
@@ -140,6 +140,14 @@ const BASE_TOOLS = [
|
|
|
140
140
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
141
141
|
default: false,
|
|
142
142
|
},
|
|
143
|
+
speaker_ids: {
|
|
144
|
+
type: "string",
|
|
145
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
146
|
+
},
|
|
147
|
+
speaker_name: {
|
|
148
|
+
type: "string",
|
|
149
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
150
|
+
},
|
|
143
151
|
},
|
|
144
152
|
},
|
|
145
153
|
},
|
|
@@ -177,6 +185,84 @@ const BASE_TOOLS = [
|
|
|
177
185
|
required: ["start_time", "end_time"],
|
|
178
186
|
},
|
|
179
187
|
},
|
|
188
|
+
{
|
|
189
|
+
name: "search-ui-events",
|
|
190
|
+
description: "Search UI input events captured via accessibility APIs (macOS). " +
|
|
191
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
192
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
193
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
194
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
195
|
+
annotations: {
|
|
196
|
+
title: "Search UI Events (Accessibility)",
|
|
197
|
+
readOnlyHint: true,
|
|
198
|
+
},
|
|
199
|
+
inputSchema: {
|
|
200
|
+
type: "object",
|
|
201
|
+
properties: {
|
|
202
|
+
q: {
|
|
203
|
+
type: "string",
|
|
204
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
205
|
+
},
|
|
206
|
+
event_type: {
|
|
207
|
+
type: "string",
|
|
208
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
209
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
210
|
+
},
|
|
211
|
+
app_name: {
|
|
212
|
+
type: "string",
|
|
213
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
214
|
+
},
|
|
215
|
+
window_name: {
|
|
216
|
+
type: "string",
|
|
217
|
+
description: "Filter by window title",
|
|
218
|
+
},
|
|
219
|
+
start_time: {
|
|
220
|
+
type: "string",
|
|
221
|
+
format: "date-time",
|
|
222
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
223
|
+
},
|
|
224
|
+
end_time: {
|
|
225
|
+
type: "string",
|
|
226
|
+
format: "date-time",
|
|
227
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
228
|
+
},
|
|
229
|
+
limit: {
|
|
230
|
+
type: "integer",
|
|
231
|
+
description: "Max results. Default: 50",
|
|
232
|
+
default: 50,
|
|
233
|
+
},
|
|
234
|
+
offset: {
|
|
235
|
+
type: "integer",
|
|
236
|
+
description: "Skip N results for pagination. Default: 0",
|
|
237
|
+
default: 0,
|
|
238
|
+
},
|
|
239
|
+
},
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
name: "get-ui-event-stats",
|
|
244
|
+
description: "Get aggregated statistics of UI events by app and event type. " +
|
|
245
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
246
|
+
annotations: {
|
|
247
|
+
title: "UI Event Statistics",
|
|
248
|
+
readOnlyHint: true,
|
|
249
|
+
},
|
|
250
|
+
inputSchema: {
|
|
251
|
+
type: "object",
|
|
252
|
+
properties: {
|
|
253
|
+
start_time: {
|
|
254
|
+
type: "string",
|
|
255
|
+
format: "date-time",
|
|
256
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
257
|
+
},
|
|
258
|
+
end_time: {
|
|
259
|
+
type: "string",
|
|
260
|
+
format: "date-time",
|
|
261
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
},
|
|
265
|
+
},
|
|
180
266
|
];
|
|
181
267
|
// List tools handler
|
|
182
268
|
server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
|
|
@@ -244,18 +330,20 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
244
330
|
mimeType: "text/markdown",
|
|
245
331
|
text: `# Screenpipe Search Guide
|
|
246
332
|
|
|
333
|
+
## Three Data Modalities
|
|
334
|
+
|
|
335
|
+
Screenpipe captures three types of data:
|
|
336
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
337
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
338
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
339
|
+
|
|
247
340
|
## Quick Start
|
|
248
341
|
- **Get recent activity**: Call search-content with no parameters
|
|
249
342
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
250
|
-
- **
|
|
251
|
-
|
|
252
|
-
## Content Types
|
|
253
|
-
- \`ocr\`: Screen text (what you see)
|
|
254
|
-
- \`audio\`: Transcribed speech
|
|
255
|
-
- \`ui\`: UI element interactions
|
|
256
|
-
- \`all\`: Everything (default)
|
|
343
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
344
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
257
345
|
|
|
258
|
-
##
|
|
346
|
+
## search-content (Vision + Audio)
|
|
259
347
|
| Parameter | Description | Default |
|
|
260
348
|
|-----------|-------------|---------|
|
|
261
349
|
| q | Search query | (none - returns all) |
|
|
@@ -266,11 +354,27 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
266
354
|
| app_name | Filter by app | (no filter) |
|
|
267
355
|
| include_frames | Include screenshots | false |
|
|
268
356
|
|
|
357
|
+
## search-ui-events (Accessibility Data)
|
|
358
|
+
| Parameter | Description | Default |
|
|
359
|
+
|-----------|-------------|---------|
|
|
360
|
+
| q | Search text content, app, window | (none) |
|
|
361
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
362
|
+
| app_name | Filter by application | (no filter) |
|
|
363
|
+
| limit | Max results | 50 |
|
|
364
|
+
|
|
365
|
+
### Event Types
|
|
366
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
367
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
368
|
+
- \`app_switch\`: When user switched applications
|
|
369
|
+
- \`window_focus\`: When window focus changed
|
|
370
|
+
- \`clipboard\`: Copy/paste operations
|
|
371
|
+
- \`scroll\`: Scroll events with delta values
|
|
372
|
+
|
|
269
373
|
## Tips
|
|
270
374
|
1. Read screenpipe://context first to get current timestamps
|
|
271
|
-
2.
|
|
272
|
-
3. Use
|
|
273
|
-
4. Combine
|
|
375
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
376
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
377
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)`,
|
|
274
378
|
},
|
|
275
379
|
],
|
|
276
380
|
};
|
|
@@ -592,7 +696,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
592
696
|
// Sort frame IDs
|
|
593
697
|
frameIds.sort((a, b) => a - b);
|
|
594
698
|
// Step 2: Connect to WebSocket and export video
|
|
595
|
-
|
|
699
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
700
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
596
701
|
const exportResult = await new Promise((resolve) => {
|
|
597
702
|
const ws = new ws_1.WebSocket(wsUrl);
|
|
598
703
|
let resolved = false;
|
|
@@ -603,6 +708,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
603
708
|
resolve({ success: false, error: "Export timed out after 5 minutes" });
|
|
604
709
|
}
|
|
605
710
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
711
|
+
ws.on("open", () => {
|
|
712
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
713
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
714
|
+
});
|
|
606
715
|
ws.on("error", (error) => {
|
|
607
716
|
if (!resolved) {
|
|
608
717
|
resolved = true;
|
|
@@ -674,6 +783,123 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
674
783
|
};
|
|
675
784
|
}
|
|
676
785
|
}
|
|
786
|
+
case "search-ui-events": {
|
|
787
|
+
const params = new URLSearchParams();
|
|
788
|
+
for (const [key, value] of Object.entries(args)) {
|
|
789
|
+
if (value !== null && value !== undefined) {
|
|
790
|
+
// Map event_type to the API parameter
|
|
791
|
+
params.append(key, String(value));
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
795
|
+
if (!response.ok) {
|
|
796
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
797
|
+
}
|
|
798
|
+
const data = await response.json();
|
|
799
|
+
const events = data.data || [];
|
|
800
|
+
const pagination = data.pagination || {};
|
|
801
|
+
if (events.length === 0) {
|
|
802
|
+
return {
|
|
803
|
+
content: [
|
|
804
|
+
{
|
|
805
|
+
type: "text",
|
|
806
|
+
text: "No UI events found. This feature requires:\n" +
|
|
807
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
808
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
809
|
+
"Try: broader time range or different event_type filter.",
|
|
810
|
+
},
|
|
811
|
+
],
|
|
812
|
+
};
|
|
813
|
+
}
|
|
814
|
+
const formattedEvents = [];
|
|
815
|
+
for (const event of events) {
|
|
816
|
+
const parts = [
|
|
817
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
818
|
+
event.app_name || "?",
|
|
819
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
820
|
+
];
|
|
821
|
+
let details = "";
|
|
822
|
+
if (event.event_type === "text" && event.text_content) {
|
|
823
|
+
details = `Text: "${event.text_content}"`;
|
|
824
|
+
}
|
|
825
|
+
else if (event.event_type === "click") {
|
|
826
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
827
|
+
if (event.element?.label) {
|
|
828
|
+
details += ` on "${event.element.label}"`;
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
else if (event.event_type === "clipboard" && event.text_content) {
|
|
832
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
833
|
+
}
|
|
834
|
+
else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
835
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
836
|
+
}
|
|
837
|
+
else if (event.event_type === "scroll") {
|
|
838
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
839
|
+
}
|
|
840
|
+
formattedEvents.push(`${parts.join(" ")}\n` +
|
|
841
|
+
`${event.timestamp || ""}\n` +
|
|
842
|
+
`${details}`);
|
|
843
|
+
}
|
|
844
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
845
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
846
|
+
return {
|
|
847
|
+
content: [
|
|
848
|
+
{
|
|
849
|
+
type: "text",
|
|
850
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
851
|
+
},
|
|
852
|
+
],
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
case "get-ui-event-stats": {
|
|
856
|
+
const params = new URLSearchParams();
|
|
857
|
+
if (args.start_time)
|
|
858
|
+
params.append("start_time", String(args.start_time));
|
|
859
|
+
if (args.end_time)
|
|
860
|
+
params.append("end_time", String(args.end_time));
|
|
861
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
862
|
+
if (!response.ok) {
|
|
863
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
864
|
+
}
|
|
865
|
+
const stats = await response.json();
|
|
866
|
+
if (!stats || stats.length === 0) {
|
|
867
|
+
return {
|
|
868
|
+
content: [
|
|
869
|
+
{
|
|
870
|
+
type: "text",
|
|
871
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
872
|
+
},
|
|
873
|
+
],
|
|
874
|
+
};
|
|
875
|
+
}
|
|
876
|
+
// Group by app
|
|
877
|
+
const byApp = {};
|
|
878
|
+
for (const stat of stats) {
|
|
879
|
+
const app = stat.app_name || "Unknown";
|
|
880
|
+
if (!byApp[app]) {
|
|
881
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
882
|
+
}
|
|
883
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
884
|
+
byApp[app].total += stat.count;
|
|
885
|
+
}
|
|
886
|
+
// Sort by total events
|
|
887
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
888
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
889
|
+
const eventDetails = Object.entries(events)
|
|
890
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
891
|
+
.join(", ");
|
|
892
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
893
|
+
});
|
|
894
|
+
return {
|
|
895
|
+
content: [
|
|
896
|
+
{
|
|
897
|
+
type: "text",
|
|
898
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
899
|
+
},
|
|
900
|
+
],
|
|
901
|
+
};
|
|
902
|
+
}
|
|
677
903
|
default:
|
|
678
904
|
throw new Error(`Unknown tool: ${name}`);
|
|
679
905
|
}
|
package/manifest.json
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
"name": "screenpipe",
|
|
4
4
|
"display_name": "Screenpipe",
|
|
5
5
|
"version": "0.5.0",
|
|
6
|
-
"description": "Search your screen recordings
|
|
7
|
-
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory
|
|
6
|
+
"description": "Search your screen recordings and audio transcriptions with AI",
|
|
7
|
+
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory.",
|
|
8
8
|
"author": {
|
|
9
9
|
"name": "screenpipe",
|
|
10
10
|
"url": "https://screenpi.pe"
|
|
@@ -28,39 +28,11 @@
|
|
|
28
28
|
"tools": [
|
|
29
29
|
{
|
|
30
30
|
"name": "search-content",
|
|
31
|
-
"description": "Search through recorded screen content, audio transcriptions, and UI elements"
|
|
31
|
+
"description": "Search through recorded screen content, audio transcriptions, and UI elements with speaker filtering"
|
|
32
32
|
},
|
|
33
33
|
{
|
|
34
34
|
"name": "export-video",
|
|
35
35
|
"description": "Export screen recordings as MP4 video for a specific time range"
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
"name": "pixel-control",
|
|
39
|
-
"description": "Control mouse and keyboard (type text, press keys, move mouse, click)"
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
"name": "find-elements",
|
|
43
|
-
"description": "Find UI elements in applications by role (macOS only)"
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"name": "click-element",
|
|
47
|
-
"description": "Click UI elements by ID (macOS only)"
|
|
48
|
-
},
|
|
49
|
-
{
|
|
50
|
-
"name": "fill-element",
|
|
51
|
-
"description": "Type text into UI elements (macOS only)"
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"name": "scroll-element",
|
|
55
|
-
"description": "Scroll UI elements (macOS only)"
|
|
56
|
-
},
|
|
57
|
-
{
|
|
58
|
-
"name": "open-application",
|
|
59
|
-
"description": "Open applications by name (macOS only)"
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"name": "open-url",
|
|
63
|
-
"description": "Open URLs in browser (macOS only)"
|
|
64
36
|
}
|
|
65
37
|
],
|
|
66
38
|
"compatibility": {
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -45,7 +45,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
45
45
|
const server = new Server(
|
|
46
46
|
{
|
|
47
47
|
name: "screenpipe",
|
|
48
|
-
version: "0.
|
|
48
|
+
version: "0.7.0",
|
|
49
49
|
},
|
|
50
50
|
{
|
|
51
51
|
capabilities: {
|
|
@@ -79,7 +79,7 @@ const BASE_TOOLS: Tool[] = [
|
|
|
79
79
|
content_type: {
|
|
80
80
|
type: "string",
|
|
81
81
|
enum: ["all", "ocr", "audio", "ui"],
|
|
82
|
-
description: "Content type filter. Default: 'all'",
|
|
82
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
83
83
|
default: "all",
|
|
84
84
|
},
|
|
85
85
|
limit: {
|
|
@@ -123,6 +123,14 @@ const BASE_TOOLS: Tool[] = [
|
|
|
123
123
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
124
124
|
default: false,
|
|
125
125
|
},
|
|
126
|
+
speaker_ids: {
|
|
127
|
+
type: "string",
|
|
128
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
129
|
+
},
|
|
130
|
+
speaker_name: {
|
|
131
|
+
type: "string",
|
|
132
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
133
|
+
},
|
|
126
134
|
},
|
|
127
135
|
},
|
|
128
136
|
},
|
|
@@ -164,6 +172,86 @@ const BASE_TOOLS: Tool[] = [
|
|
|
164
172
|
required: ["start_time", "end_time"],
|
|
165
173
|
},
|
|
166
174
|
},
|
|
175
|
+
{
|
|
176
|
+
name: "search-ui-events",
|
|
177
|
+
description:
|
|
178
|
+
"Search UI input events captured via accessibility APIs (macOS). " +
|
|
179
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
180
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
181
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
182
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
183
|
+
annotations: {
|
|
184
|
+
title: "Search UI Events (Accessibility)",
|
|
185
|
+
readOnlyHint: true,
|
|
186
|
+
},
|
|
187
|
+
inputSchema: {
|
|
188
|
+
type: "object",
|
|
189
|
+
properties: {
|
|
190
|
+
q: {
|
|
191
|
+
type: "string",
|
|
192
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
193
|
+
},
|
|
194
|
+
event_type: {
|
|
195
|
+
type: "string",
|
|
196
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
197
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
198
|
+
},
|
|
199
|
+
app_name: {
|
|
200
|
+
type: "string",
|
|
201
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
202
|
+
},
|
|
203
|
+
window_name: {
|
|
204
|
+
type: "string",
|
|
205
|
+
description: "Filter by window title",
|
|
206
|
+
},
|
|
207
|
+
start_time: {
|
|
208
|
+
type: "string",
|
|
209
|
+
format: "date-time",
|
|
210
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
211
|
+
},
|
|
212
|
+
end_time: {
|
|
213
|
+
type: "string",
|
|
214
|
+
format: "date-time",
|
|
215
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
216
|
+
},
|
|
217
|
+
limit: {
|
|
218
|
+
type: "integer",
|
|
219
|
+
description: "Max results. Default: 50",
|
|
220
|
+
default: 50,
|
|
221
|
+
},
|
|
222
|
+
offset: {
|
|
223
|
+
type: "integer",
|
|
224
|
+
description: "Skip N results for pagination. Default: 0",
|
|
225
|
+
default: 0,
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
name: "get-ui-event-stats",
|
|
232
|
+
description:
|
|
233
|
+
"Get aggregated statistics of UI events by app and event type. " +
|
|
234
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
235
|
+
annotations: {
|
|
236
|
+
title: "UI Event Statistics",
|
|
237
|
+
readOnlyHint: true,
|
|
238
|
+
},
|
|
239
|
+
inputSchema: {
|
|
240
|
+
type: "object",
|
|
241
|
+
properties: {
|
|
242
|
+
start_time: {
|
|
243
|
+
type: "string",
|
|
244
|
+
format: "date-time",
|
|
245
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
246
|
+
},
|
|
247
|
+
end_time: {
|
|
248
|
+
type: "string",
|
|
249
|
+
format: "date-time",
|
|
250
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
},
|
|
254
|
+
},
|
|
167
255
|
];
|
|
168
256
|
|
|
169
257
|
// List tools handler
|
|
@@ -237,18 +325,20 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
237
325
|
mimeType: "text/markdown",
|
|
238
326
|
text: `# Screenpipe Search Guide
|
|
239
327
|
|
|
328
|
+
## Three Data Modalities
|
|
329
|
+
|
|
330
|
+
Screenpipe captures three types of data:
|
|
331
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
332
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
333
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
334
|
+
|
|
240
335
|
## Quick Start
|
|
241
336
|
- **Get recent activity**: Call search-content with no parameters
|
|
242
337
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
243
|
-
- **
|
|
244
|
-
|
|
245
|
-
## Content Types
|
|
246
|
-
- \`ocr\`: Screen text (what you see)
|
|
247
|
-
- \`audio\`: Transcribed speech
|
|
248
|
-
- \`ui\`: UI element interactions
|
|
249
|
-
- \`all\`: Everything (default)
|
|
338
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
339
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
250
340
|
|
|
251
|
-
##
|
|
341
|
+
## search-content (Vision + Audio)
|
|
252
342
|
| Parameter | Description | Default |
|
|
253
343
|
|-----------|-------------|---------|
|
|
254
344
|
| q | Search query | (none - returns all) |
|
|
@@ -259,11 +349,27 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
259
349
|
| app_name | Filter by app | (no filter) |
|
|
260
350
|
| include_frames | Include screenshots | false |
|
|
261
351
|
|
|
352
|
+
## search-ui-events (Accessibility Data)
|
|
353
|
+
| Parameter | Description | Default |
|
|
354
|
+
|-----------|-------------|---------|
|
|
355
|
+
| q | Search text content, app, window | (none) |
|
|
356
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
357
|
+
| app_name | Filter by application | (no filter) |
|
|
358
|
+
| limit | Max results | 50 |
|
|
359
|
+
|
|
360
|
+
### Event Types
|
|
361
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
362
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
363
|
+
- \`app_switch\`: When user switched applications
|
|
364
|
+
- \`window_focus\`: When window focus changed
|
|
365
|
+
- \`clipboard\`: Copy/paste operations
|
|
366
|
+
- \`scroll\`: Scroll events with delta values
|
|
367
|
+
|
|
262
368
|
## Tips
|
|
263
369
|
1. Read screenpipe://context first to get current timestamps
|
|
264
|
-
2.
|
|
265
|
-
3. Use
|
|
266
|
-
4. Combine
|
|
370
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
371
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
372
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)`,
|
|
267
373
|
},
|
|
268
374
|
],
|
|
269
375
|
};
|
|
@@ -630,7 +736,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
630
736
|
frameIds.sort((a, b) => a - b);
|
|
631
737
|
|
|
632
738
|
// Step 2: Connect to WebSocket and export video
|
|
633
|
-
|
|
739
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
740
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
634
741
|
|
|
635
742
|
const exportResult = await new Promise<{
|
|
636
743
|
success: boolean;
|
|
@@ -649,6 +756,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
649
756
|
}
|
|
650
757
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
651
758
|
|
|
759
|
+
ws.on("open", () => {
|
|
760
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
761
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
762
|
+
});
|
|
763
|
+
|
|
652
764
|
ws.on("error", (error) => {
|
|
653
765
|
if (!resolved) {
|
|
654
766
|
resolved = true;
|
|
@@ -724,6 +836,136 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
724
836
|
}
|
|
725
837
|
}
|
|
726
838
|
|
|
839
|
+
case "search-ui-events": {
|
|
840
|
+
const params = new URLSearchParams();
|
|
841
|
+
for (const [key, value] of Object.entries(args)) {
|
|
842
|
+
if (value !== null && value !== undefined) {
|
|
843
|
+
// Map event_type to the API parameter
|
|
844
|
+
params.append(key, String(value));
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
849
|
+
if (!response.ok) {
|
|
850
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
const data = await response.json();
|
|
854
|
+
const events = data.data || [];
|
|
855
|
+
const pagination = data.pagination || {};
|
|
856
|
+
|
|
857
|
+
if (events.length === 0) {
|
|
858
|
+
return {
|
|
859
|
+
content: [
|
|
860
|
+
{
|
|
861
|
+
type: "text",
|
|
862
|
+
text: "No UI events found. This feature requires:\n" +
|
|
863
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
864
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
865
|
+
"Try: broader time range or different event_type filter.",
|
|
866
|
+
},
|
|
867
|
+
],
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
const formattedEvents: string[] = [];
|
|
872
|
+
for (const event of events) {
|
|
873
|
+
const parts = [
|
|
874
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
875
|
+
event.app_name || "?",
|
|
876
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
877
|
+
];
|
|
878
|
+
|
|
879
|
+
let details = "";
|
|
880
|
+
if (event.event_type === "text" && event.text_content) {
|
|
881
|
+
details = `Text: "${event.text_content}"`;
|
|
882
|
+
} else if (event.event_type === "click") {
|
|
883
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
884
|
+
if (event.element?.label) {
|
|
885
|
+
details += ` on "${event.element.label}"`;
|
|
886
|
+
}
|
|
887
|
+
} else if (event.event_type === "clipboard" && event.text_content) {
|
|
888
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
889
|
+
} else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
890
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
891
|
+
} else if (event.event_type === "scroll") {
|
|
892
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
formattedEvents.push(
|
|
896
|
+
`${parts.join(" ")}\n` +
|
|
897
|
+
`${event.timestamp || ""}\n` +
|
|
898
|
+
`${details}`
|
|
899
|
+
);
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
903
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
904
|
+
|
|
905
|
+
return {
|
|
906
|
+
content: [
|
|
907
|
+
{
|
|
908
|
+
type: "text",
|
|
909
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
910
|
+
},
|
|
911
|
+
],
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
case "get-ui-event-stats": {
|
|
916
|
+
const params = new URLSearchParams();
|
|
917
|
+
if (args.start_time) params.append("start_time", String(args.start_time));
|
|
918
|
+
if (args.end_time) params.append("end_time", String(args.end_time));
|
|
919
|
+
|
|
920
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
921
|
+
if (!response.ok) {
|
|
922
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
const stats = await response.json();
|
|
926
|
+
|
|
927
|
+
if (!stats || stats.length === 0) {
|
|
928
|
+
return {
|
|
929
|
+
content: [
|
|
930
|
+
{
|
|
931
|
+
type: "text",
|
|
932
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
933
|
+
},
|
|
934
|
+
],
|
|
935
|
+
};
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
// Group by app
|
|
939
|
+
const byApp: Record<string, { app: string; events: Record<string, number>; total: number }> = {};
|
|
940
|
+
for (const stat of stats) {
|
|
941
|
+
const app = stat.app_name || "Unknown";
|
|
942
|
+
if (!byApp[app]) {
|
|
943
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
944
|
+
}
|
|
945
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
946
|
+
byApp[app].total += stat.count;
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
// Sort by total events
|
|
950
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
951
|
+
|
|
952
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
953
|
+
const eventDetails = Object.entries(events)
|
|
954
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
955
|
+
.join(", ");
|
|
956
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
957
|
+
});
|
|
958
|
+
|
|
959
|
+
return {
|
|
960
|
+
content: [
|
|
961
|
+
{
|
|
962
|
+
type: "text",
|
|
963
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
964
|
+
},
|
|
965
|
+
],
|
|
966
|
+
};
|
|
967
|
+
}
|
|
968
|
+
|
|
727
969
|
default:
|
|
728
970
|
throw new Error(`Unknown tool: ${name}`);
|
|
729
971
|
}
|