screenpipe-mcp 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +340 -20
- package/manifest.json +17 -1
- package/package.json +1 -1
- package/src/index.ts +392 -20
package/dist/index.js
CHANGED
|
@@ -69,7 +69,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
69
69
|
// Initialize server
|
|
70
70
|
const server = new index_js_1.Server({
|
|
71
71
|
name: "screenpipe",
|
|
72
|
-
version: "0.8.
|
|
72
|
+
version: "0.8.3",
|
|
73
73
|
}, {
|
|
74
74
|
capabilities: {
|
|
75
75
|
tools: {},
|
|
@@ -81,10 +81,14 @@ const server = new index_js_1.Server({
|
|
|
81
81
|
const BASE_TOOLS = [
|
|
82
82
|
{
|
|
83
83
|
name: "search-content",
|
|
84
|
-
description: "Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
|
|
84
|
+
description: "Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
|
|
85
85
|
"Returns timestamped results with app context. " +
|
|
86
86
|
"Call with no parameters to get recent activity. " +
|
|
87
87
|
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
88
|
+
"SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
|
|
89
|
+
"This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
|
|
90
|
+
"App names are case-sensitive and may differ from user input (e.g. 'Discord' vs 'Discord.exe'). " +
|
|
91
|
+
"The q param searches captured text (accessibility/OCR), NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
|
|
88
92
|
"DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
|
|
89
93
|
"- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
|
|
90
94
|
"- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
|
|
@@ -103,7 +107,7 @@ const BASE_TOOLS = [
|
|
|
103
107
|
content_type: {
|
|
104
108
|
type: "string",
|
|
105
109
|
enum: ["all", "ocr", "audio", "input", "accessibility"],
|
|
106
|
-
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text), 'all'. Default: 'all'.",
|
|
110
|
+
description: "Content type filter: 'ocr' (screen text via OCR, legacy fallback), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text, preferred for screen content), 'all'. Default: 'all'.",
|
|
107
111
|
default: "all",
|
|
108
112
|
},
|
|
109
113
|
limit: {
|
|
@@ -119,12 +123,12 @@ const BASE_TOOLS = [
|
|
|
119
123
|
start_time: {
|
|
120
124
|
type: "string",
|
|
121
125
|
format: "date-time",
|
|
122
|
-
description: "ISO 8601 UTC
|
|
126
|
+
description: "Start time: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', '2d ago', 'now')",
|
|
123
127
|
},
|
|
124
128
|
end_time: {
|
|
125
129
|
type: "string",
|
|
126
130
|
format: "date-time",
|
|
127
|
-
description: "ISO 8601 UTC
|
|
131
|
+
description: "End time: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
128
132
|
},
|
|
129
133
|
app_name: {
|
|
130
134
|
type: "string",
|
|
@@ -155,6 +159,10 @@ const BASE_TOOLS = [
|
|
|
155
159
|
type: "string",
|
|
156
160
|
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
157
161
|
},
|
|
162
|
+
max_content_length: {
|
|
163
|
+
type: "integer",
|
|
164
|
+
description: "Truncate each result's text/transcription to this many characters using middle-truncation (keeps first half + last half). Useful for limiting token usage with small-context models.",
|
|
165
|
+
},
|
|
158
166
|
},
|
|
159
167
|
},
|
|
160
168
|
},
|
|
@@ -162,7 +170,7 @@ const BASE_TOOLS = [
|
|
|
162
170
|
name: "export-video",
|
|
163
171
|
description: "Export a video of screen recordings for a specific time range. " +
|
|
164
172
|
"Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
|
|
165
|
-
"IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z)\n\n" +
|
|
173
|
+
"IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z) or relative times (e.g., '16h ago', 'now')\n\n" +
|
|
166
174
|
"EXAMPLES:\n" +
|
|
167
175
|
"- Last 30 minutes: Calculate timestamps from current time\n" +
|
|
168
176
|
"- Specific meeting: Use the meeting's start and end times in UTC",
|
|
@@ -176,12 +184,12 @@ const BASE_TOOLS = [
|
|
|
176
184
|
start_time: {
|
|
177
185
|
type: "string",
|
|
178
186
|
format: "date-time",
|
|
179
|
-
description: "Start time
|
|
187
|
+
description: "Start time: ISO 8601 UTC (e.g., '2024-01-15T10:00:00Z') or relative (e.g., '16h ago', 'now')",
|
|
180
188
|
},
|
|
181
189
|
end_time: {
|
|
182
190
|
type: "string",
|
|
183
191
|
format: "date-time",
|
|
184
|
-
description: "End time
|
|
192
|
+
description: "End time: ISO 8601 UTC (e.g., '2024-01-15T10:30:00Z') or relative (e.g., 'now', '1h ago')",
|
|
185
193
|
},
|
|
186
194
|
fps: {
|
|
187
195
|
type: "number",
|
|
@@ -192,6 +200,152 @@ const BASE_TOOLS = [
|
|
|
192
200
|
required: ["start_time", "end_time"],
|
|
193
201
|
},
|
|
194
202
|
},
|
|
203
|
+
{
|
|
204
|
+
name: "list-meetings",
|
|
205
|
+
description: "List detected meetings with duration, app, and attendees. " +
|
|
206
|
+
"Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
|
|
207
|
+
"Only available when screenpipe runs in smart transcription mode.",
|
|
208
|
+
annotations: {
|
|
209
|
+
title: "List Meetings",
|
|
210
|
+
readOnlyHint: true,
|
|
211
|
+
},
|
|
212
|
+
inputSchema: {
|
|
213
|
+
type: "object",
|
|
214
|
+
properties: {
|
|
215
|
+
start_time: {
|
|
216
|
+
type: "string",
|
|
217
|
+
format: "date-time",
|
|
218
|
+
description: "Start filter: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
|
|
219
|
+
},
|
|
220
|
+
end_time: {
|
|
221
|
+
type: "string",
|
|
222
|
+
format: "date-time",
|
|
223
|
+
description: "End filter: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
224
|
+
},
|
|
225
|
+
limit: {
|
|
226
|
+
type: "integer",
|
|
227
|
+
description: "Max results. Default: 20",
|
|
228
|
+
default: 20,
|
|
229
|
+
},
|
|
230
|
+
offset: {
|
|
231
|
+
type: "integer",
|
|
232
|
+
description: "Skip N results for pagination. Default: 0",
|
|
233
|
+
default: 0,
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
name: "activity-summary",
|
|
240
|
+
description: "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
|
|
241
|
+
"Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
|
|
242
|
+
"Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
|
|
243
|
+
"first_seen/last_seen show the wall-clock span per app. " +
|
|
244
|
+
"Use this FIRST for broad questions like 'what was I doing?' before drilling into search-content or search-elements. " +
|
|
245
|
+
"Much cheaper than search-content for getting an overview.",
|
|
246
|
+
annotations: {
|
|
247
|
+
title: "Activity Summary",
|
|
248
|
+
readOnlyHint: true,
|
|
249
|
+
},
|
|
250
|
+
inputSchema: {
|
|
251
|
+
type: "object",
|
|
252
|
+
properties: {
|
|
253
|
+
start_time: {
|
|
254
|
+
type: "string",
|
|
255
|
+
format: "date-time",
|
|
256
|
+
description: "Start of time range: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
|
|
257
|
+
},
|
|
258
|
+
end_time: {
|
|
259
|
+
type: "string",
|
|
260
|
+
format: "date-time",
|
|
261
|
+
description: "End of time range: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
262
|
+
},
|
|
263
|
+
app_name: {
|
|
264
|
+
type: "string",
|
|
265
|
+
description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
required: ["start_time", "end_time"],
|
|
269
|
+
},
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
name: "search-elements",
|
|
273
|
+
description: "Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
|
|
274
|
+
"Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
|
|
275
|
+
"Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
|
|
276
|
+
"Use for: finding specific buttons, links, text fields, or UI components. " +
|
|
277
|
+
"Prefer this over search-content when you need structural UI detail rather than full screen text.",
|
|
278
|
+
annotations: {
|
|
279
|
+
title: "Search Elements",
|
|
280
|
+
readOnlyHint: true,
|
|
281
|
+
},
|
|
282
|
+
inputSchema: {
|
|
283
|
+
type: "object",
|
|
284
|
+
properties: {
|
|
285
|
+
q: {
|
|
286
|
+
type: "string",
|
|
287
|
+
description: "Full-text search query across element text. Optional.",
|
|
288
|
+
},
|
|
289
|
+
frame_id: {
|
|
290
|
+
type: "integer",
|
|
291
|
+
description: "Filter to elements from a specific frame",
|
|
292
|
+
},
|
|
293
|
+
source: {
|
|
294
|
+
type: "string",
|
|
295
|
+
enum: ["accessibility", "ocr"],
|
|
296
|
+
description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
|
|
297
|
+
},
|
|
298
|
+
role: {
|
|
299
|
+
type: "string",
|
|
300
|
+
description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
|
|
301
|
+
},
|
|
302
|
+
start_time: {
|
|
303
|
+
type: "string",
|
|
304
|
+
format: "date-time",
|
|
305
|
+
description: "Start time: ISO 8601 UTC or relative (e.g., '16h ago', 'now')",
|
|
306
|
+
},
|
|
307
|
+
end_time: {
|
|
308
|
+
type: "string",
|
|
309
|
+
format: "date-time",
|
|
310
|
+
description: "End time: ISO 8601 UTC or relative (e.g., 'now', '1h ago')",
|
|
311
|
+
},
|
|
312
|
+
app_name: {
|
|
313
|
+
type: "string",
|
|
314
|
+
description: "Filter by app name",
|
|
315
|
+
},
|
|
316
|
+
limit: {
|
|
317
|
+
type: "integer",
|
|
318
|
+
description: "Max results. Default: 50",
|
|
319
|
+
default: 50,
|
|
320
|
+
},
|
|
321
|
+
offset: {
|
|
322
|
+
type: "integer",
|
|
323
|
+
description: "Skip N results for pagination. Default: 0",
|
|
324
|
+
default: 0,
|
|
325
|
+
},
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
name: "frame-context",
|
|
331
|
+
description: "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
|
|
332
|
+
"Falls back to OCR data for legacy frames without accessibility data. " +
|
|
333
|
+
"Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
|
|
334
|
+
annotations: {
|
|
335
|
+
title: "Frame Context",
|
|
336
|
+
readOnlyHint: true,
|
|
337
|
+
},
|
|
338
|
+
inputSchema: {
|
|
339
|
+
type: "object",
|
|
340
|
+
properties: {
|
|
341
|
+
frame_id: {
|
|
342
|
+
type: "integer",
|
|
343
|
+
description: "The frame ID to get context for (from search results)",
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
required: ["frame_id"],
|
|
347
|
+
},
|
|
348
|
+
},
|
|
195
349
|
];
|
|
196
350
|
// List tools handler
|
|
197
351
|
server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
|
|
@@ -262,16 +416,16 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
262
416
|
## Data Modalities
|
|
263
417
|
|
|
264
418
|
Screenpipe captures four types of data:
|
|
265
|
-
1. **
|
|
266
|
-
2. **
|
|
267
|
-
3. **
|
|
268
|
-
4. **
|
|
419
|
+
1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
|
|
420
|
+
2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
|
|
421
|
+
3. **Audio** - Transcribed speech from microphone/system audio
|
|
422
|
+
4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
269
423
|
|
|
270
424
|
## Quick Start
|
|
271
425
|
- **Get recent activity**: Call search-content with no parameters
|
|
272
|
-
- **Search text**: \`{"q": "search term", "content_type": "
|
|
426
|
+
- **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
|
|
273
427
|
- **Get keyboard input**: \`{"content_type": "input"}\`
|
|
274
|
-
- **Get
|
|
428
|
+
- **Get audio only**: \`{"content_type": "audio"}\`
|
|
275
429
|
|
|
276
430
|
## search-content
|
|
277
431
|
| Parameter | Description | Default |
|
|
@@ -279,16 +433,32 @@ Screenpipe captures four types of data:
|
|
|
279
433
|
| q | Search query | (none - returns all) |
|
|
280
434
|
| content_type | all/ocr/audio/input/accessibility | all |
|
|
281
435
|
| limit | Max results | 10 |
|
|
282
|
-
| start_time | ISO 8601 UTC | (no filter) |
|
|
283
|
-
| end_time | ISO 8601 UTC | (no filter) |
|
|
436
|
+
| start_time | ISO 8601 UTC or relative (e.g. '16h ago') | (no filter) |
|
|
437
|
+
| end_time | ISO 8601 UTC or relative (e.g. 'now') | (no filter) |
|
|
284
438
|
| app_name | Filter by app | (no filter) |
|
|
285
439
|
| include_frames | Include screenshots | false |
|
|
286
440
|
|
|
441
|
+
## Search Strategy (MANDATORY)
|
|
442
|
+
1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
|
|
443
|
+
2. Scan results to find correct app_name values and content patterns.
|
|
444
|
+
3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
|
|
445
|
+
4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
|
|
446
|
+
5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
|
|
447
|
+
|
|
448
|
+
## Progressive Disclosure (Token-Efficient Strategy)
|
|
449
|
+
1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
|
|
450
|
+
2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
|
|
451
|
+
3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
|
|
452
|
+
4. **Fetch frame-context** for URLs and accessibility tree of specific frames
|
|
453
|
+
5. **Screenshots** (include_frames=true) only when text isn't enough
|
|
454
|
+
|
|
287
455
|
## Tips
|
|
288
456
|
1. Read screenpipe://context first to get current timestamps
|
|
289
|
-
2. Use
|
|
290
|
-
3. Use
|
|
291
|
-
4.
|
|
457
|
+
2. Use activity-summary before search-content for broad overview questions
|
|
458
|
+
3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
|
|
459
|
+
4. Use content_type=input for "what did I type?" queries
|
|
460
|
+
5. Use content_type=accessibility for accessibility tree text
|
|
461
|
+
6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
|
|
292
462
|
|
|
293
463
|
## Deep Links (Clickable References)
|
|
294
464
|
When showing search results to users, create clickable links so they can jump to that exact moment.
|
|
@@ -434,7 +604,7 @@ Current time: ${dateInfo.isoDate}
|
|
|
434
604
|
Use search-content with:
|
|
435
605
|
- app_name: "${app}"
|
|
436
606
|
${query ? `- q: "${query}"` : "- No query filter"}
|
|
437
|
-
- content_type: "
|
|
607
|
+
- content_type: "all"
|
|
438
608
|
- limit: 50`,
|
|
439
609
|
},
|
|
440
610
|
},
|
|
@@ -712,6 +882,156 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
712
882
|
};
|
|
713
883
|
}
|
|
714
884
|
}
|
|
885
|
+
case "list-meetings": {
|
|
886
|
+
const params = new URLSearchParams();
|
|
887
|
+
for (const [key, value] of Object.entries(args)) {
|
|
888
|
+
if (value !== null && value !== undefined) {
|
|
889
|
+
params.append(key, String(value));
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
const response = await fetchAPI(`/meetings?${params.toString()}`);
|
|
893
|
+
if (!response.ok) {
|
|
894
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
895
|
+
}
|
|
896
|
+
const meetings = await response.json();
|
|
897
|
+
if (!Array.isArray(meetings) || meetings.length === 0) {
|
|
898
|
+
return {
|
|
899
|
+
content: [
|
|
900
|
+
{
|
|
901
|
+
type: "text",
|
|
902
|
+
text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
|
|
903
|
+
},
|
|
904
|
+
],
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
const formatted = meetings.map((m) => {
|
|
908
|
+
const start = m.meeting_start;
|
|
909
|
+
const end = m.meeting_end || "ongoing";
|
|
910
|
+
const app = m.meeting_app;
|
|
911
|
+
const title = m.title ? ` — ${m.title}` : "";
|
|
912
|
+
const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
|
|
913
|
+
return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
|
|
914
|
+
});
|
|
915
|
+
return {
|
|
916
|
+
content: [
|
|
917
|
+
{
|
|
918
|
+
type: "text",
|
|
919
|
+
text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
|
|
920
|
+
},
|
|
921
|
+
],
|
|
922
|
+
};
|
|
923
|
+
}
|
|
924
|
+
case "activity-summary": {
|
|
925
|
+
const params = new URLSearchParams();
|
|
926
|
+
for (const [key, value] of Object.entries(args)) {
|
|
927
|
+
if (value !== null && value !== undefined) {
|
|
928
|
+
params.append(key, String(value));
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
const response = await fetchAPI(`/activity-summary?${params.toString()}`);
|
|
932
|
+
if (!response.ok) {
|
|
933
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
934
|
+
}
|
|
935
|
+
const data = await response.json();
|
|
936
|
+
// Format apps
|
|
937
|
+
const appsLines = (data.apps || []).map((a) => {
|
|
938
|
+
const timeSpan = a.first_seen && a.last_seen
|
|
939
|
+
? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
|
|
940
|
+
: "";
|
|
941
|
+
return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
|
|
942
|
+
});
|
|
943
|
+
// Format audio
|
|
944
|
+
const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
|
|
945
|
+
// Format recent texts
|
|
946
|
+
const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
|
|
947
|
+
const summary = [
|
|
948
|
+
`Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
|
|
949
|
+
`Total frames: ${data.total_frames}`,
|
|
950
|
+
"",
|
|
951
|
+
"Apps:",
|
|
952
|
+
...(appsLines.length ? appsLines : [" (none)"]),
|
|
953
|
+
"",
|
|
954
|
+
`Audio: ${data.audio_summary?.segment_count || 0} segments`,
|
|
955
|
+
...(speakerLines.length ? speakerLines : []),
|
|
956
|
+
"",
|
|
957
|
+
"Recent texts:",
|
|
958
|
+
...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
|
|
959
|
+
].join("\n");
|
|
960
|
+
return { content: [{ type: "text", text: summary }] };
|
|
961
|
+
}
|
|
962
|
+
case "search-elements": {
|
|
963
|
+
const params = new URLSearchParams();
|
|
964
|
+
for (const [key, value] of Object.entries(args)) {
|
|
965
|
+
if (value !== null && value !== undefined) {
|
|
966
|
+
params.append(key, String(value));
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
const response = await fetchAPI(`/elements?${params.toString()}`);
|
|
970
|
+
if (!response.ok) {
|
|
971
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
972
|
+
}
|
|
973
|
+
const data = await response.json();
|
|
974
|
+
const elements = data.data || [];
|
|
975
|
+
const pagination = data.pagination || {};
|
|
976
|
+
if (elements.length === 0) {
|
|
977
|
+
return {
|
|
978
|
+
content: [
|
|
979
|
+
{
|
|
980
|
+
type: "text",
|
|
981
|
+
text: "No elements found. Try: broader search, different role/source, or wider time range.",
|
|
982
|
+
},
|
|
983
|
+
],
|
|
984
|
+
};
|
|
985
|
+
}
|
|
986
|
+
const formatted = elements.map((e) => {
|
|
987
|
+
const boundsStr = e.bounds
|
|
988
|
+
? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
|
|
989
|
+
: "";
|
|
990
|
+
return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
|
|
991
|
+
});
|
|
992
|
+
const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
|
|
993
|
+
(pagination.total > elements.length
|
|
994
|
+
? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
|
|
995
|
+
: "");
|
|
996
|
+
return {
|
|
997
|
+
content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
|
|
998
|
+
};
|
|
999
|
+
}
|
|
1000
|
+
case "frame-context": {
|
|
1001
|
+
const frameId = args.frame_id;
|
|
1002
|
+
if (!frameId) {
|
|
1003
|
+
return {
|
|
1004
|
+
content: [{ type: "text", text: "Error: frame_id is required" }],
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
const response = await fetchAPI(`/frames/${frameId}/context`);
|
|
1008
|
+
if (!response.ok) {
|
|
1009
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
1010
|
+
}
|
|
1011
|
+
const data = await response.json();
|
|
1012
|
+
const lines = [
|
|
1013
|
+
`Frame ${data.frame_id} (source: ${data.text_source})`,
|
|
1014
|
+
];
|
|
1015
|
+
if (data.urls?.length) {
|
|
1016
|
+
lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
|
|
1017
|
+
}
|
|
1018
|
+
if (data.nodes?.length) {
|
|
1019
|
+
lines.push("", `Nodes: ${data.nodes.length}`);
|
|
1020
|
+
for (const node of data.nodes.slice(0, 50)) {
|
|
1021
|
+
const indent = " ".repeat(Math.min(node.depth, 5));
|
|
1022
|
+
lines.push(`${indent}[${node.role}] ${node.text}`);
|
|
1023
|
+
}
|
|
1024
|
+
if (data.nodes.length > 50) {
|
|
1025
|
+
lines.push(` ... and ${data.nodes.length - 50} more nodes`);
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
if (data.text) {
|
|
1029
|
+
// Truncate to avoid massive outputs
|
|
1030
|
+
const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
|
|
1031
|
+
lines.push("", "Full text:", truncated);
|
|
1032
|
+
}
|
|
1033
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1034
|
+
}
|
|
715
1035
|
default:
|
|
716
1036
|
throw new Error(`Unknown tool: ${name}`);
|
|
717
1037
|
}
|
package/manifest.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"manifest_version": "0.3",
|
|
3
3
|
"name": "screenpipe",
|
|
4
4
|
"display_name": "Screenpipe",
|
|
5
|
-
"version": "0.8.
|
|
5
|
+
"version": "0.8.4",
|
|
6
6
|
"description": "Search your screen recordings and audio transcriptions with AI",
|
|
7
7
|
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory.",
|
|
8
8
|
"author": {
|
|
@@ -33,6 +33,22 @@
|
|
|
33
33
|
{
|
|
34
34
|
"name": "export-video",
|
|
35
35
|
"description": "Export screen recordings as MP4 video for a specific time range"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"name": "list-meetings",
|
|
39
|
+
"description": "List detected meetings with duration, app, and attendees"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"name": "activity-summary",
|
|
43
|
+
"description": "Lightweight compressed activity overview for a time range (apps, recent texts, audio summary)"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"name": "search-elements",
|
|
47
|
+
"description": "Search structured UI elements (accessibility tree nodes and OCR text blocks)"
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"name": "frame-context",
|
|
51
|
+
"description": "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame"
|
|
36
52
|
}
|
|
37
53
|
],
|
|
38
54
|
"compatibility": {
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -48,7 +48,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
48
48
|
const server = new Server(
|
|
49
49
|
{
|
|
50
50
|
name: "screenpipe",
|
|
51
|
-
version: "0.8.
|
|
51
|
+
version: "0.8.3",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
capabilities: {
|
|
@@ -64,10 +64,14 @@ const BASE_TOOLS: Tool[] = [
|
|
|
64
64
|
{
|
|
65
65
|
name: "search-content",
|
|
66
66
|
description:
|
|
67
|
-
"Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
|
|
67
|
+
"Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
|
|
68
68
|
"Returns timestamped results with app context. " +
|
|
69
69
|
"Call with no parameters to get recent activity. " +
|
|
70
70
|
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
71
|
+
"SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
|
|
72
|
+
"This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
|
|
73
|
+
"App names are case-sensitive and may differ from user input (e.g. 'Discord' vs 'Discord.exe'). " +
|
|
74
|
+
"The q param searches captured text (accessibility/OCR), NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
|
|
71
75
|
"DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
|
|
72
76
|
"- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
|
|
73
77
|
"- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
|
|
@@ -86,7 +90,7 @@ const BASE_TOOLS: Tool[] = [
|
|
|
86
90
|
content_type: {
|
|
87
91
|
type: "string",
|
|
88
92
|
enum: ["all", "ocr", "audio", "input", "accessibility"],
|
|
89
|
-
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text), 'all'. Default: 'all'.",
|
|
93
|
+
description: "Content type filter: 'ocr' (screen text via OCR, legacy fallback), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text, preferred for screen content), 'all'. Default: 'all'.",
|
|
90
94
|
default: "all",
|
|
91
95
|
},
|
|
92
96
|
limit: {
|
|
@@ -102,12 +106,12 @@ const BASE_TOOLS: Tool[] = [
|
|
|
102
106
|
start_time: {
|
|
103
107
|
type: "string",
|
|
104
108
|
format: "date-time",
|
|
105
|
-
description: "ISO 8601 UTC
|
|
109
|
+
description: "Start time: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', '2d ago', 'now')",
|
|
106
110
|
},
|
|
107
111
|
end_time: {
|
|
108
112
|
type: "string",
|
|
109
113
|
format: "date-time",
|
|
110
|
-
description: "ISO 8601 UTC
|
|
114
|
+
description: "End time: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
111
115
|
},
|
|
112
116
|
app_name: {
|
|
113
117
|
type: "string",
|
|
@@ -138,6 +142,10 @@ const BASE_TOOLS: Tool[] = [
|
|
|
138
142
|
type: "string",
|
|
139
143
|
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
140
144
|
},
|
|
145
|
+
max_content_length: {
|
|
146
|
+
type: "integer",
|
|
147
|
+
description: "Truncate each result's text/transcription to this many characters using middle-truncation (keeps first half + last half). Useful for limiting token usage with small-context models.",
|
|
148
|
+
},
|
|
141
149
|
},
|
|
142
150
|
},
|
|
143
151
|
},
|
|
@@ -146,7 +154,7 @@ const BASE_TOOLS: Tool[] = [
|
|
|
146
154
|
description:
|
|
147
155
|
"Export a video of screen recordings for a specific time range. " +
|
|
148
156
|
"Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
|
|
149
|
-
"IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z)\n\n" +
|
|
157
|
+
"IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z) or relative times (e.g., '16h ago', 'now')\n\n" +
|
|
150
158
|
"EXAMPLES:\n" +
|
|
151
159
|
"- Last 30 minutes: Calculate timestamps from current time\n" +
|
|
152
160
|
"- Specific meeting: Use the meeting's start and end times in UTC",
|
|
@@ -161,13 +169,13 @@ const BASE_TOOLS: Tool[] = [
|
|
|
161
169
|
type: "string",
|
|
162
170
|
format: "date-time",
|
|
163
171
|
description:
|
|
164
|
-
"Start time
|
|
172
|
+
"Start time: ISO 8601 UTC (e.g., '2024-01-15T10:00:00Z') or relative (e.g., '16h ago', 'now')",
|
|
165
173
|
},
|
|
166
174
|
end_time: {
|
|
167
175
|
type: "string",
|
|
168
176
|
format: "date-time",
|
|
169
177
|
description:
|
|
170
|
-
"End time
|
|
178
|
+
"End time: ISO 8601 UTC (e.g., '2024-01-15T10:30:00Z') or relative (e.g., 'now', '1h ago')",
|
|
171
179
|
},
|
|
172
180
|
fps: {
|
|
173
181
|
type: "number",
|
|
@@ -179,6 +187,156 @@ const BASE_TOOLS: Tool[] = [
|
|
|
179
187
|
required: ["start_time", "end_time"],
|
|
180
188
|
},
|
|
181
189
|
},
|
|
190
|
+
{
|
|
191
|
+
name: "list-meetings",
|
|
192
|
+
description:
|
|
193
|
+
"List detected meetings with duration, app, and attendees. " +
|
|
194
|
+
"Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
|
|
195
|
+
"Only available when screenpipe runs in smart transcription mode.",
|
|
196
|
+
annotations: {
|
|
197
|
+
title: "List Meetings",
|
|
198
|
+
readOnlyHint: true,
|
|
199
|
+
},
|
|
200
|
+
inputSchema: {
|
|
201
|
+
type: "object",
|
|
202
|
+
properties: {
|
|
203
|
+
start_time: {
|
|
204
|
+
type: "string",
|
|
205
|
+
format: "date-time",
|
|
206
|
+
description: "Start filter: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
|
|
207
|
+
},
|
|
208
|
+
end_time: {
|
|
209
|
+
type: "string",
|
|
210
|
+
format: "date-time",
|
|
211
|
+
description: "End filter: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
212
|
+
},
|
|
213
|
+
limit: {
|
|
214
|
+
type: "integer",
|
|
215
|
+
description: "Max results. Default: 20",
|
|
216
|
+
default: 20,
|
|
217
|
+
},
|
|
218
|
+
offset: {
|
|
219
|
+
type: "integer",
|
|
220
|
+
description: "Skip N results for pagination. Default: 0",
|
|
221
|
+
default: 0,
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
name: "activity-summary",
|
|
228
|
+
description:
|
|
229
|
+
"Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
|
|
230
|
+
"Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
|
|
231
|
+
"Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
|
|
232
|
+
"first_seen/last_seen show the wall-clock span per app. " +
|
|
233
|
+
"Use this FIRST for broad questions like 'what was I doing?' before drilling into search-content or search-elements. " +
|
|
234
|
+
"Much cheaper than search-content for getting an overview.",
|
|
235
|
+
annotations: {
|
|
236
|
+
title: "Activity Summary",
|
|
237
|
+
readOnlyHint: true,
|
|
238
|
+
},
|
|
239
|
+
inputSchema: {
|
|
240
|
+
type: "object",
|
|
241
|
+
properties: {
|
|
242
|
+
start_time: {
|
|
243
|
+
type: "string",
|
|
244
|
+
format: "date-time",
|
|
245
|
+
description: "Start of time range: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
|
|
246
|
+
},
|
|
247
|
+
end_time: {
|
|
248
|
+
type: "string",
|
|
249
|
+
format: "date-time",
|
|
250
|
+
description: "End of time range: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
|
|
251
|
+
},
|
|
252
|
+
app_name: {
|
|
253
|
+
type: "string",
|
|
254
|
+
description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
|
|
255
|
+
},
|
|
256
|
+
},
|
|
257
|
+
required: ["start_time", "end_time"],
|
|
258
|
+
},
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
name: "search-elements",
|
|
262
|
+
description:
|
|
263
|
+
"Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
|
|
264
|
+
"Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
|
|
265
|
+
"Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
|
|
266
|
+
"Use for: finding specific buttons, links, text fields, or UI components. " +
|
|
267
|
+
"Prefer this over search-content when you need structural UI detail rather than full screen text.",
|
|
268
|
+
annotations: {
|
|
269
|
+
title: "Search Elements",
|
|
270
|
+
readOnlyHint: true,
|
|
271
|
+
},
|
|
272
|
+
inputSchema: {
|
|
273
|
+
type: "object",
|
|
274
|
+
properties: {
|
|
275
|
+
q: {
|
|
276
|
+
type: "string",
|
|
277
|
+
description: "Full-text search query across element text. Optional.",
|
|
278
|
+
},
|
|
279
|
+
frame_id: {
|
|
280
|
+
type: "integer",
|
|
281
|
+
description: "Filter to elements from a specific frame",
|
|
282
|
+
},
|
|
283
|
+
source: {
|
|
284
|
+
type: "string",
|
|
285
|
+
enum: ["accessibility", "ocr"],
|
|
286
|
+
description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
|
|
287
|
+
},
|
|
288
|
+
role: {
|
|
289
|
+
type: "string",
|
|
290
|
+
description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
|
|
291
|
+
},
|
|
292
|
+
start_time: {
|
|
293
|
+
type: "string",
|
|
294
|
+
format: "date-time",
|
|
295
|
+
description: "Start time: ISO 8601 UTC or relative (e.g., '16h ago', 'now')",
|
|
296
|
+
},
|
|
297
|
+
end_time: {
|
|
298
|
+
type: "string",
|
|
299
|
+
format: "date-time",
|
|
300
|
+
description: "End time: ISO 8601 UTC or relative (e.g., 'now', '1h ago')",
|
|
301
|
+
},
|
|
302
|
+
app_name: {
|
|
303
|
+
type: "string",
|
|
304
|
+
description: "Filter by app name",
|
|
305
|
+
},
|
|
306
|
+
limit: {
|
|
307
|
+
type: "integer",
|
|
308
|
+
description: "Max results. Default: 50",
|
|
309
|
+
default: 50,
|
|
310
|
+
},
|
|
311
|
+
offset: {
|
|
312
|
+
type: "integer",
|
|
313
|
+
description: "Skip N results for pagination. Default: 0",
|
|
314
|
+
default: 0,
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
},
|
|
319
|
+
{
|
|
320
|
+
name: "frame-context",
|
|
321
|
+
description:
|
|
322
|
+
"Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
|
|
323
|
+
"Falls back to OCR data for legacy frames without accessibility data. " +
|
|
324
|
+
"Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
|
|
325
|
+
annotations: {
|
|
326
|
+
title: "Frame Context",
|
|
327
|
+
readOnlyHint: true,
|
|
328
|
+
},
|
|
329
|
+
inputSchema: {
|
|
330
|
+
type: "object",
|
|
331
|
+
properties: {
|
|
332
|
+
frame_id: {
|
|
333
|
+
type: "integer",
|
|
334
|
+
description: "The frame ID to get context for (from search results)",
|
|
335
|
+
},
|
|
336
|
+
},
|
|
337
|
+
required: ["frame_id"],
|
|
338
|
+
},
|
|
339
|
+
},
|
|
182
340
|
];
|
|
183
341
|
|
|
184
342
|
// List tools handler
|
|
@@ -255,16 +413,16 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
255
413
|
## Data Modalities
|
|
256
414
|
|
|
257
415
|
Screenpipe captures four types of data:
|
|
258
|
-
1. **
|
|
259
|
-
2. **
|
|
260
|
-
3. **
|
|
261
|
-
4. **
|
|
416
|
+
1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
|
|
417
|
+
2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
|
|
418
|
+
3. **Audio** - Transcribed speech from microphone/system audio
|
|
419
|
+
4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
262
420
|
|
|
263
421
|
## Quick Start
|
|
264
422
|
- **Get recent activity**: Call search-content with no parameters
|
|
265
|
-
- **Search text**: \`{"q": "search term", "content_type": "
|
|
423
|
+
- **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
|
|
266
424
|
- **Get keyboard input**: \`{"content_type": "input"}\`
|
|
267
|
-
- **Get
|
|
425
|
+
- **Get audio only**: \`{"content_type": "audio"}\`
|
|
268
426
|
|
|
269
427
|
## search-content
|
|
270
428
|
| Parameter | Description | Default |
|
|
@@ -272,16 +430,32 @@ Screenpipe captures four types of data:
|
|
|
272
430
|
| q | Search query | (none - returns all) |
|
|
273
431
|
| content_type | all/ocr/audio/input/accessibility | all |
|
|
274
432
|
| limit | Max results | 10 |
|
|
275
|
-
| start_time | ISO 8601 UTC | (no filter) |
|
|
276
|
-
| end_time | ISO 8601 UTC | (no filter) |
|
|
433
|
+
| start_time | ISO 8601 UTC or relative (e.g. '16h ago') | (no filter) |
|
|
434
|
+
| end_time | ISO 8601 UTC or relative (e.g. 'now') | (no filter) |
|
|
277
435
|
| app_name | Filter by app | (no filter) |
|
|
278
436
|
| include_frames | Include screenshots | false |
|
|
279
437
|
|
|
438
|
+
## Search Strategy (MANDATORY)
|
|
439
|
+
1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
|
|
440
|
+
2. Scan results to find correct app_name values and content patterns.
|
|
441
|
+
3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
|
|
442
|
+
4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
|
|
443
|
+
5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
|
|
444
|
+
|
|
445
|
+
## Progressive Disclosure (Token-Efficient Strategy)
|
|
446
|
+
1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
|
|
447
|
+
2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
|
|
448
|
+
3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
|
|
449
|
+
4. **Fetch frame-context** for URLs and accessibility tree of specific frames
|
|
450
|
+
5. **Screenshots** (include_frames=true) only when text isn't enough
|
|
451
|
+
|
|
280
452
|
## Tips
|
|
281
453
|
1. Read screenpipe://context first to get current timestamps
|
|
282
|
-
2. Use
|
|
283
|
-
3. Use
|
|
284
|
-
4.
|
|
454
|
+
2. Use activity-summary before search-content for broad overview questions
|
|
455
|
+
3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
|
|
456
|
+
4. Use content_type=input for "what did I type?" queries
|
|
457
|
+
5. Use content_type=accessibility for accessibility tree text
|
|
458
|
+
6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
|
|
285
459
|
|
|
286
460
|
## Deep Links (Clickable References)
|
|
287
461
|
When showing search results to users, create clickable links so they can jump to that exact moment.
|
|
@@ -435,7 +609,7 @@ Current time: ${dateInfo.isoDate}
|
|
|
435
609
|
Use search-content with:
|
|
436
610
|
- app_name: "${app}"
|
|
437
611
|
${query ? `- q: "${query}"` : "- No query filter"}
|
|
438
|
-
- content_type: "
|
|
612
|
+
- content_type: "all"
|
|
439
613
|
- limit: 50`,
|
|
440
614
|
},
|
|
441
615
|
},
|
|
@@ -763,6 +937,204 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
763
937
|
}
|
|
764
938
|
}
|
|
765
939
|
|
|
940
|
+
case "list-meetings": {
|
|
941
|
+
const params = new URLSearchParams();
|
|
942
|
+
for (const [key, value] of Object.entries(args)) {
|
|
943
|
+
if (value !== null && value !== undefined) {
|
|
944
|
+
params.append(key, String(value));
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
const response = await fetchAPI(`/meetings?${params.toString()}`);
|
|
949
|
+
if (!response.ok) {
|
|
950
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
const meetings = await response.json();
|
|
954
|
+
|
|
955
|
+
if (!Array.isArray(meetings) || meetings.length === 0) {
|
|
956
|
+
return {
|
|
957
|
+
content: [
|
|
958
|
+
{
|
|
959
|
+
type: "text",
|
|
960
|
+
text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
|
|
961
|
+
},
|
|
962
|
+
],
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
const formatted = meetings.map((m: Record<string, unknown>) => {
|
|
967
|
+
const start = m.meeting_start as string;
|
|
968
|
+
const end = (m.meeting_end as string) || "ongoing";
|
|
969
|
+
const app = m.meeting_app as string;
|
|
970
|
+
const title = m.title ? ` — ${m.title}` : "";
|
|
971
|
+
const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
|
|
972
|
+
return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
|
|
973
|
+
});
|
|
974
|
+
|
|
975
|
+
return {
|
|
976
|
+
content: [
|
|
977
|
+
{
|
|
978
|
+
type: "text",
|
|
979
|
+
text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
|
|
980
|
+
},
|
|
981
|
+
],
|
|
982
|
+
};
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
case "activity-summary": {
|
|
986
|
+
const params = new URLSearchParams();
|
|
987
|
+
for (const [key, value] of Object.entries(args)) {
|
|
988
|
+
if (value !== null && value !== undefined) {
|
|
989
|
+
params.append(key, String(value));
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
const response = await fetchAPI(`/activity-summary?${params.toString()}`);
|
|
994
|
+
if (!response.ok) {
|
|
995
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
const data = await response.json();
|
|
999
|
+
|
|
1000
|
+
// Format apps
|
|
1001
|
+
const appsLines = (data.apps || []).map(
|
|
1002
|
+
(a: { name: string; frame_count: number; minutes: number; first_seen?: string; last_seen?: string }) => {
|
|
1003
|
+
const timeSpan = a.first_seen && a.last_seen
|
|
1004
|
+
? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
|
|
1005
|
+
: "";
|
|
1006
|
+
return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
|
|
1007
|
+
}
|
|
1008
|
+
);
|
|
1009
|
+
|
|
1010
|
+
// Format audio
|
|
1011
|
+
const speakerLines = (data.audio_summary?.speakers || []).map(
|
|
1012
|
+
(s: { name: string; segment_count: number }) =>
|
|
1013
|
+
` ${s.name}: ${s.segment_count} segments`
|
|
1014
|
+
);
|
|
1015
|
+
|
|
1016
|
+
// Format recent texts
|
|
1017
|
+
const textLines = (data.recent_texts || []).map(
|
|
1018
|
+
(t: { text: string; app_name: string; timestamp: string }) =>
|
|
1019
|
+
` [${t.app_name}] ${t.text}`
|
|
1020
|
+
);
|
|
1021
|
+
|
|
1022
|
+
const summary = [
|
|
1023
|
+
`Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
|
|
1024
|
+
`Total frames: ${data.total_frames}`,
|
|
1025
|
+
"",
|
|
1026
|
+
"Apps:",
|
|
1027
|
+
...(appsLines.length ? appsLines : [" (none)"]),
|
|
1028
|
+
"",
|
|
1029
|
+
`Audio: ${data.audio_summary?.segment_count || 0} segments`,
|
|
1030
|
+
...(speakerLines.length ? speakerLines : []),
|
|
1031
|
+
"",
|
|
1032
|
+
"Recent texts:",
|
|
1033
|
+
...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
|
|
1034
|
+
].join("\n");
|
|
1035
|
+
|
|
1036
|
+
return { content: [{ type: "text", text: summary }] };
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
case "search-elements": {
|
|
1040
|
+
const params = new URLSearchParams();
|
|
1041
|
+
for (const [key, value] of Object.entries(args)) {
|
|
1042
|
+
if (value !== null && value !== undefined) {
|
|
1043
|
+
params.append(key, String(value));
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
const response = await fetchAPI(`/elements?${params.toString()}`);
|
|
1048
|
+
if (!response.ok) {
|
|
1049
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
const data = await response.json();
|
|
1053
|
+
const elements = data.data || [];
|
|
1054
|
+
const pagination = data.pagination || {};
|
|
1055
|
+
|
|
1056
|
+
if (elements.length === 0) {
|
|
1057
|
+
return {
|
|
1058
|
+
content: [
|
|
1059
|
+
{
|
|
1060
|
+
type: "text",
|
|
1061
|
+
text: "No elements found. Try: broader search, different role/source, or wider time range.",
|
|
1062
|
+
},
|
|
1063
|
+
],
|
|
1064
|
+
};
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
const formatted = elements.map(
|
|
1068
|
+
(e: {
|
|
1069
|
+
id: number;
|
|
1070
|
+
frame_id: number;
|
|
1071
|
+
source: string;
|
|
1072
|
+
role: string;
|
|
1073
|
+
text: string | null;
|
|
1074
|
+
depth: number;
|
|
1075
|
+
bounds: { left: number; top: number; width: number; height: number } | null;
|
|
1076
|
+
}) => {
|
|
1077
|
+
const boundsStr = e.bounds
|
|
1078
|
+
? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
|
|
1079
|
+
: "";
|
|
1080
|
+
return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
|
|
1081
|
+
}
|
|
1082
|
+
);
|
|
1083
|
+
|
|
1084
|
+
const header =
|
|
1085
|
+
`Elements: ${elements.length}/${pagination.total || "?"}` +
|
|
1086
|
+
(pagination.total > elements.length
|
|
1087
|
+
? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
|
|
1088
|
+
: "");
|
|
1089
|
+
|
|
1090
|
+
return {
|
|
1091
|
+
content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
|
|
1092
|
+
};
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
case "frame-context": {
|
|
1096
|
+
const frameId = args.frame_id as number;
|
|
1097
|
+
if (!frameId) {
|
|
1098
|
+
return {
|
|
1099
|
+
content: [{ type: "text", text: "Error: frame_id is required" }],
|
|
1100
|
+
};
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
const response = await fetchAPI(`/frames/${frameId}/context`);
|
|
1104
|
+
if (!response.ok) {
|
|
1105
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
const data = await response.json();
|
|
1109
|
+
|
|
1110
|
+
const lines = [
|
|
1111
|
+
`Frame ${data.frame_id} (source: ${data.text_source})`,
|
|
1112
|
+
];
|
|
1113
|
+
|
|
1114
|
+
if (data.urls?.length) {
|
|
1115
|
+
lines.push("", "URLs:", ...data.urls.map((u: string) => ` ${u}`));
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
if (data.nodes?.length) {
|
|
1119
|
+
lines.push("", `Nodes: ${data.nodes.length}`);
|
|
1120
|
+
for (const node of data.nodes.slice(0, 50)) {
|
|
1121
|
+
const indent = " ".repeat(Math.min(node.depth, 5));
|
|
1122
|
+
lines.push(`${indent}[${node.role}] ${node.text}`);
|
|
1123
|
+
}
|
|
1124
|
+
if (data.nodes.length > 50) {
|
|
1125
|
+
lines.push(` ... and ${data.nodes.length - 50} more nodes`);
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
if (data.text) {
|
|
1130
|
+
// Truncate to avoid massive outputs
|
|
1131
|
+
const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
|
|
1132
|
+
lines.push("", "Full text:", truncated);
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1136
|
+
}
|
|
1137
|
+
|
|
766
1138
|
default:
|
|
767
1139
|
throw new Error(`Unknown tool: ${name}`);
|
|
768
1140
|
}
|