screenpipe-mcp 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -21
- package/dist/http-server.d.ts +0 -6
- package/dist/http-server.js +10 -6
- package/dist/index.js +250 -152
- package/manifest.json +17 -1
- package/package.json +1 -1
- package/src/http-server.ts +9 -5
- package/src/index.ts +289 -153
package/dist/index.js
CHANGED
|
@@ -69,7 +69,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
69
69
|
// Initialize server
|
|
70
70
|
const server = new index_js_1.Server({
|
|
71
71
|
name: "screenpipe",
|
|
72
|
-
version: "0.
|
|
72
|
+
version: "0.8.3",
|
|
73
73
|
}, {
|
|
74
74
|
capabilities: {
|
|
75
75
|
tools: {},
|
|
@@ -81,14 +81,18 @@ const server = new index_js_1.Server({
|
|
|
81
81
|
const BASE_TOOLS = [
|
|
82
82
|
{
|
|
83
83
|
name: "search-content",
|
|
84
|
-
description: "Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
|
|
84
|
+
description: "Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
|
|
85
85
|
"Returns timestamped results with app context. " +
|
|
86
86
|
"Call with no parameters to get recent activity. " +
|
|
87
87
|
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
88
|
+
"SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
|
|
89
|
+
"This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
|
|
90
|
+
"App names are case-sensitive and may differ from user input (e.g. 'Discord' vs 'Discord.exe'). " +
|
|
91
|
+
"The q param searches captured text (accessibility/OCR), NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
|
|
92
|
+
"DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
|
|
93
|
+
"- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
|
|
94
|
+
"- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
|
|
95
|
+
"NEVER fabricate frame IDs or timestamps — only use values from actual search results.",
|
|
92
96
|
annotations: {
|
|
93
97
|
title: "Search Content",
|
|
94
98
|
readOnlyHint: true,
|
|
@@ -102,8 +106,8 @@ const BASE_TOOLS = [
|
|
|
102
106
|
},
|
|
103
107
|
content_type: {
|
|
104
108
|
type: "string",
|
|
105
|
-
enum: ["all", "ocr", "audio", "
|
|
106
|
-
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), '
|
|
109
|
+
enum: ["all", "ocr", "audio", "input", "accessibility"],
|
|
110
|
+
description: "Content type filter: 'ocr' (screen text via OCR, legacy fallback), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text, preferred for screen content), 'all'. Default: 'all'.",
|
|
107
111
|
default: "all",
|
|
108
112
|
},
|
|
109
113
|
limit: {
|
|
@@ -193,45 +197,115 @@ const BASE_TOOLS = [
|
|
|
193
197
|
},
|
|
194
198
|
},
|
|
195
199
|
{
|
|
196
|
-
name: "
|
|
197
|
-
description: "
|
|
198
|
-
"
|
|
199
|
-
"
|
|
200
|
-
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
201
|
-
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
200
|
+
name: "list-meetings",
|
|
201
|
+
description: "List detected meetings with duration, app, and attendees. " +
|
|
202
|
+
"Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
|
|
203
|
+
"Only available when screenpipe runs in smart transcription mode.",
|
|
202
204
|
annotations: {
|
|
203
|
-
title: "
|
|
205
|
+
title: "List Meetings",
|
|
204
206
|
readOnlyHint: true,
|
|
205
207
|
},
|
|
206
208
|
inputSchema: {
|
|
207
209
|
type: "object",
|
|
208
210
|
properties: {
|
|
209
|
-
|
|
211
|
+
start_time: {
|
|
210
212
|
type: "string",
|
|
211
|
-
|
|
213
|
+
format: "date-time",
|
|
214
|
+
description: "ISO 8601 UTC start filter (e.g., 2024-01-15T10:00:00Z)",
|
|
212
215
|
},
|
|
213
|
-
|
|
216
|
+
end_time: {
|
|
214
217
|
type: "string",
|
|
215
|
-
|
|
216
|
-
description: "
|
|
218
|
+
format: "date-time",
|
|
219
|
+
description: "ISO 8601 UTC end filter (e.g., 2024-01-15T18:00:00Z)",
|
|
220
|
+
},
|
|
221
|
+
limit: {
|
|
222
|
+
type: "integer",
|
|
223
|
+
description: "Max results. Default: 20",
|
|
224
|
+
default: 20,
|
|
225
|
+
},
|
|
226
|
+
offset: {
|
|
227
|
+
type: "integer",
|
|
228
|
+
description: "Skip N results for pagination. Default: 0",
|
|
229
|
+
default: 0,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
name: "activity-summary",
|
|
236
|
+
description: "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
|
|
237
|
+
"Returns app usage (name, frame count, minutes), recent accessibility texts, and audio speaker summary. " +
|
|
238
|
+
"Use this FIRST for broad questions like 'what was I doing?' before drilling into search-content or search-elements. " +
|
|
239
|
+
"Much cheaper than search-content for getting an overview.",
|
|
240
|
+
annotations: {
|
|
241
|
+
title: "Activity Summary",
|
|
242
|
+
readOnlyHint: true,
|
|
243
|
+
},
|
|
244
|
+
inputSchema: {
|
|
245
|
+
type: "object",
|
|
246
|
+
properties: {
|
|
247
|
+
start_time: {
|
|
248
|
+
type: "string",
|
|
249
|
+
format: "date-time",
|
|
250
|
+
description: "Start of time range in ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z)",
|
|
251
|
+
},
|
|
252
|
+
end_time: {
|
|
253
|
+
type: "string",
|
|
254
|
+
format: "date-time",
|
|
255
|
+
description: "End of time range in ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z)",
|
|
217
256
|
},
|
|
218
257
|
app_name: {
|
|
219
258
|
type: "string",
|
|
220
|
-
description: "
|
|
259
|
+
description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
|
|
221
260
|
},
|
|
222
|
-
|
|
261
|
+
},
|
|
262
|
+
required: ["start_time", "end_time"],
|
|
263
|
+
},
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
name: "search-elements",
|
|
267
|
+
description: "Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
|
|
268
|
+
"Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
|
|
269
|
+
"Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
|
|
270
|
+
"Use for: finding specific buttons, links, text fields, or UI components. " +
|
|
271
|
+
"Prefer this over search-content when you need structural UI detail rather than full screen text.",
|
|
272
|
+
annotations: {
|
|
273
|
+
title: "Search Elements",
|
|
274
|
+
readOnlyHint: true,
|
|
275
|
+
},
|
|
276
|
+
inputSchema: {
|
|
277
|
+
type: "object",
|
|
278
|
+
properties: {
|
|
279
|
+
q: {
|
|
223
280
|
type: "string",
|
|
224
|
-
description: "
|
|
281
|
+
description: "Full-text search query across element text. Optional.",
|
|
282
|
+
},
|
|
283
|
+
frame_id: {
|
|
284
|
+
type: "integer",
|
|
285
|
+
description: "Filter to elements from a specific frame",
|
|
286
|
+
},
|
|
287
|
+
source: {
|
|
288
|
+
type: "string",
|
|
289
|
+
enum: ["accessibility", "ocr"],
|
|
290
|
+
description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
|
|
291
|
+
},
|
|
292
|
+
role: {
|
|
293
|
+
type: "string",
|
|
294
|
+
description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
|
|
225
295
|
},
|
|
226
296
|
start_time: {
|
|
227
297
|
type: "string",
|
|
228
298
|
format: "date-time",
|
|
229
|
-
description: "ISO 8601 UTC start time
|
|
299
|
+
description: "ISO 8601 UTC start time",
|
|
230
300
|
},
|
|
231
301
|
end_time: {
|
|
232
302
|
type: "string",
|
|
233
303
|
format: "date-time",
|
|
234
|
-
description: "ISO 8601 UTC end time
|
|
304
|
+
description: "ISO 8601 UTC end time",
|
|
305
|
+
},
|
|
306
|
+
app_name: {
|
|
307
|
+
type: "string",
|
|
308
|
+
description: "Filter by app name",
|
|
235
309
|
},
|
|
236
310
|
limit: {
|
|
237
311
|
type: "integer",
|
|
@@ -247,27 +321,23 @@ const BASE_TOOLS = [
|
|
|
247
321
|
},
|
|
248
322
|
},
|
|
249
323
|
{
|
|
250
|
-
name: "
|
|
251
|
-
description: "Get
|
|
252
|
-
"
|
|
324
|
+
name: "frame-context",
|
|
325
|
+
description: "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
|
|
326
|
+
"Falls back to OCR data for legacy frames without accessibility data. " +
|
|
327
|
+
"Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
|
|
253
328
|
annotations: {
|
|
254
|
-
title: "
|
|
329
|
+
title: "Frame Context",
|
|
255
330
|
readOnlyHint: true,
|
|
256
331
|
},
|
|
257
332
|
inputSchema: {
|
|
258
333
|
type: "object",
|
|
259
334
|
properties: {
|
|
260
|
-
|
|
261
|
-
type: "
|
|
262
|
-
|
|
263
|
-
description: "ISO 8601 UTC start time for stats period",
|
|
264
|
-
},
|
|
265
|
-
end_time: {
|
|
266
|
-
type: "string",
|
|
267
|
-
format: "date-time",
|
|
268
|
-
description: "ISO 8601 UTC end time for stats period",
|
|
335
|
+
frame_id: {
|
|
336
|
+
type: "integer",
|
|
337
|
+
description: "The frame ID to get context for (from search results)",
|
|
269
338
|
},
|
|
270
339
|
},
|
|
340
|
+
required: ["frame_id"],
|
|
271
341
|
},
|
|
272
342
|
},
|
|
273
343
|
];
|
|
@@ -337,63 +407,63 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
337
407
|
mimeType: "text/markdown",
|
|
338
408
|
text: `# Screenpipe Search Guide
|
|
339
409
|
|
|
340
|
-
##
|
|
410
|
+
## Data Modalities
|
|
341
411
|
|
|
342
|
-
Screenpipe captures
|
|
343
|
-
1. **
|
|
344
|
-
2. **
|
|
345
|
-
3. **
|
|
412
|
+
Screenpipe captures four types of data:
|
|
413
|
+
1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
|
|
414
|
+
2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
|
|
415
|
+
3. **Audio** - Transcribed speech from microphone/system audio
|
|
416
|
+
4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
346
417
|
|
|
347
418
|
## Quick Start
|
|
348
419
|
- **Get recent activity**: Call search-content with no parameters
|
|
349
|
-
- **Search text**: \`{"q": "search term", "content_type": "
|
|
350
|
-
- **Get keyboard input**:
|
|
351
|
-
- **
|
|
420
|
+
- **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
|
|
421
|
+
- **Get keyboard input**: \`{"content_type": "input"}\`
|
|
422
|
+
- **Get audio only**: \`{"content_type": "audio"}\`
|
|
352
423
|
|
|
353
|
-
## search-content
|
|
424
|
+
## search-content
|
|
354
425
|
| Parameter | Description | Default |
|
|
355
426
|
|-----------|-------------|---------|
|
|
356
427
|
| q | Search query | (none - returns all) |
|
|
357
|
-
| content_type | ocr/audio/
|
|
428
|
+
| content_type | all/ocr/audio/input/accessibility | all |
|
|
358
429
|
| limit | Max results | 10 |
|
|
359
430
|
| start_time | ISO 8601 UTC | (no filter) |
|
|
360
431
|
| end_time | ISO 8601 UTC | (no filter) |
|
|
361
432
|
| app_name | Filter by app | (no filter) |
|
|
362
433
|
| include_frames | Include screenshots | false |
|
|
363
434
|
|
|
364
|
-
##
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
| limit | Max results | 50 |
|
|
435
|
+
## Search Strategy (MANDATORY)
|
|
436
|
+
1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
|
|
437
|
+
2. Scan results to find correct app_name values and content patterns.
|
|
438
|
+
3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
|
|
439
|
+
4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
|
|
440
|
+
5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
|
|
371
441
|
|
|
372
|
-
|
|
373
|
-
-
|
|
374
|
-
-
|
|
375
|
-
-
|
|
376
|
-
-
|
|
377
|
-
|
|
378
|
-
- \`scroll\`: Scroll events with delta values
|
|
442
|
+
## Progressive Disclosure (Token-Efficient Strategy)
|
|
443
|
+
1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
|
|
444
|
+
2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
|
|
445
|
+
3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
|
|
446
|
+
4. **Fetch frame-context** for URLs and accessibility tree of specific frames
|
|
447
|
+
5. **Screenshots** (include_frames=true) only when text isn't enough
|
|
379
448
|
|
|
380
449
|
## Tips
|
|
381
450
|
1. Read screenpipe://context first to get current timestamps
|
|
382
|
-
2. Use search-
|
|
383
|
-
3. Use
|
|
384
|
-
4.
|
|
451
|
+
2. Use activity-summary before search-content for broad overview questions
|
|
452
|
+
3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
|
|
453
|
+
4. Use content_type=input for "what did I type?" queries
|
|
454
|
+
5. Use content_type=accessibility for accessibility tree text
|
|
455
|
+
6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
|
|
385
456
|
|
|
386
|
-
##
|
|
387
|
-
When showing search results to users, create clickable links to
|
|
457
|
+
## Deep Links (Clickable References)
|
|
458
|
+
When showing search results to users, create clickable links so they can jump to that exact moment.
|
|
388
459
|
|
|
389
|
-
**
|
|
460
|
+
**ALWAYS prefer frame-based links for OCR results** (frame IDs are exact DB keys):
|
|
461
|
+
- \`[10:30 AM — Chrome](screenpipe://frame/12345)\` — use \`content.frame_id\` from OCR results
|
|
390
462
|
|
|
391
|
-
**
|
|
392
|
-
- \`[
|
|
393
|
-
- \`[yesterday at 3pm](screenpipe://timeline?timestamp=2024-01-14T15:00:00Z)\`
|
|
463
|
+
**Use timestamp links only for audio results** (which have no frame_id):
|
|
464
|
+
- \`[meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z)\` — use exact \`timestamp\` from audio results
|
|
394
465
|
|
|
395
|
-
|
|
396
|
-
Always use the exact timestamp from search results when creating these links.`,
|
|
466
|
+
**NEVER fabricate frame IDs or timestamps.** Only use values copied from actual search results.`,
|
|
397
467
|
},
|
|
398
468
|
],
|
|
399
469
|
};
|
|
@@ -528,7 +598,7 @@ Current time: ${dateInfo.isoDate}
|
|
|
528
598
|
Use search-content with:
|
|
529
599
|
- app_name: "${app}"
|
|
530
600
|
${query ? `- q: "${query}"` : "- No query filter"}
|
|
531
|
-
- content_type: "
|
|
601
|
+
- content_type: "all"
|
|
532
602
|
- limit: 50`,
|
|
533
603
|
},
|
|
534
604
|
},
|
|
@@ -636,8 +706,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
636
706
|
`${content.transcription || ""}` +
|
|
637
707
|
tagsStr);
|
|
638
708
|
}
|
|
639
|
-
else if (result.type === "UI") {
|
|
640
|
-
formattedResults.push(`[
|
|
709
|
+
else if (result.type === "UI" || result.type === "Accessibility") {
|
|
710
|
+
formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
|
|
641
711
|
`${content.timestamp || ""}\n` +
|
|
642
712
|
`${content.text || ""}`);
|
|
643
713
|
}
|
|
@@ -806,123 +876,151 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
806
876
|
};
|
|
807
877
|
}
|
|
808
878
|
}
|
|
809
|
-
case "
|
|
879
|
+
case "list-meetings": {
|
|
810
880
|
const params = new URLSearchParams();
|
|
811
881
|
for (const [key, value] of Object.entries(args)) {
|
|
812
882
|
if (value !== null && value !== undefined) {
|
|
813
|
-
// Map event_type to the API parameter
|
|
814
883
|
params.append(key, String(value));
|
|
815
884
|
}
|
|
816
885
|
}
|
|
817
|
-
const response = await fetchAPI(`/
|
|
886
|
+
const response = await fetchAPI(`/meetings?${params.toString()}`);
|
|
818
887
|
if (!response.ok) {
|
|
819
888
|
throw new Error(`HTTP error: ${response.status}`);
|
|
820
889
|
}
|
|
821
|
-
const
|
|
822
|
-
|
|
823
|
-
const pagination = data.pagination || {};
|
|
824
|
-
if (events.length === 0) {
|
|
890
|
+
const meetings = await response.json();
|
|
891
|
+
if (!Array.isArray(meetings) || meetings.length === 0) {
|
|
825
892
|
return {
|
|
826
893
|
content: [
|
|
827
894
|
{
|
|
828
895
|
type: "text",
|
|
829
|
-
text: "No
|
|
830
|
-
"1. macOS with Accessibility permissions granted\n" +
|
|
831
|
-
"2. UI Events enabled in screenpipe settings\n" +
|
|
832
|
-
"Try: broader time range or different event_type filter.",
|
|
896
|
+
text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
|
|
833
897
|
},
|
|
834
898
|
],
|
|
835
899
|
};
|
|
836
900
|
}
|
|
837
|
-
const
|
|
838
|
-
|
|
839
|
-
const
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
]
|
|
844
|
-
|
|
845
|
-
if (event.event_type === "text" && event.text_content) {
|
|
846
|
-
details = `Text: "${event.text_content}"`;
|
|
847
|
-
}
|
|
848
|
-
else if (event.event_type === "click") {
|
|
849
|
-
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
850
|
-
if (event.element?.label) {
|
|
851
|
-
details += ` on "${event.element.label}"`;
|
|
852
|
-
}
|
|
853
|
-
}
|
|
854
|
-
else if (event.event_type === "clipboard" && event.text_content) {
|
|
855
|
-
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
856
|
-
}
|
|
857
|
-
else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
858
|
-
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
859
|
-
}
|
|
860
|
-
else if (event.event_type === "scroll") {
|
|
861
|
-
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
862
|
-
}
|
|
863
|
-
formattedEvents.push(`${parts.join(" ")}\n` +
|
|
864
|
-
`${event.timestamp || ""}\n` +
|
|
865
|
-
`${details}`);
|
|
866
|
-
}
|
|
867
|
-
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
868
|
-
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
901
|
+
const formatted = meetings.map((m) => {
|
|
902
|
+
const start = m.meeting_start;
|
|
903
|
+
const end = m.meeting_end || "ongoing";
|
|
904
|
+
const app = m.meeting_app;
|
|
905
|
+
const title = m.title ? ` — ${m.title}` : "";
|
|
906
|
+
const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
|
|
907
|
+
return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
|
|
908
|
+
});
|
|
869
909
|
return {
|
|
870
910
|
content: [
|
|
871
911
|
{
|
|
872
912
|
type: "text",
|
|
873
|
-
text:
|
|
913
|
+
text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
|
|
874
914
|
},
|
|
875
915
|
],
|
|
876
916
|
};
|
|
877
917
|
}
|
|
878
|
-
case "
|
|
918
|
+
case "activity-summary": {
|
|
879
919
|
const params = new URLSearchParams();
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
920
|
+
for (const [key, value] of Object.entries(args)) {
|
|
921
|
+
if (value !== null && value !== undefined) {
|
|
922
|
+
params.append(key, String(value));
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
const response = await fetchAPI(`/activity-summary?${params.toString()}`);
|
|
885
926
|
if (!response.ok) {
|
|
886
927
|
throw new Error(`HTTP error: ${response.status}`);
|
|
887
928
|
}
|
|
888
|
-
const
|
|
889
|
-
|
|
929
|
+
const data = await response.json();
|
|
930
|
+
// Format apps
|
|
931
|
+
const appsLines = (data.apps || []).map((a) => ` ${a.name}: ${a.minutes} min (${a.frame_count} frames)`);
|
|
932
|
+
// Format audio
|
|
933
|
+
const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
|
|
934
|
+
// Format recent texts
|
|
935
|
+
const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
|
|
936
|
+
const summary = [
|
|
937
|
+
`Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
|
|
938
|
+
`Total frames: ${data.total_frames}`,
|
|
939
|
+
"",
|
|
940
|
+
"Apps:",
|
|
941
|
+
...(appsLines.length ? appsLines : [" (none)"]),
|
|
942
|
+
"",
|
|
943
|
+
`Audio: ${data.audio_summary?.segment_count || 0} segments`,
|
|
944
|
+
...(speakerLines.length ? speakerLines : []),
|
|
945
|
+
"",
|
|
946
|
+
"Recent texts:",
|
|
947
|
+
...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
|
|
948
|
+
].join("\n");
|
|
949
|
+
return { content: [{ type: "text", text: summary }] };
|
|
950
|
+
}
|
|
951
|
+
case "search-elements": {
|
|
952
|
+
const params = new URLSearchParams();
|
|
953
|
+
for (const [key, value] of Object.entries(args)) {
|
|
954
|
+
if (value !== null && value !== undefined) {
|
|
955
|
+
params.append(key, String(value));
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
const response = await fetchAPI(`/elements?${params.toString()}`);
|
|
959
|
+
if (!response.ok) {
|
|
960
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
961
|
+
}
|
|
962
|
+
const data = await response.json();
|
|
963
|
+
const elements = data.data || [];
|
|
964
|
+
const pagination = data.pagination || {};
|
|
965
|
+
if (elements.length === 0) {
|
|
890
966
|
return {
|
|
891
967
|
content: [
|
|
892
968
|
{
|
|
893
969
|
type: "text",
|
|
894
|
-
text: "No
|
|
970
|
+
text: "No elements found. Try: broader search, different role/source, or wider time range.",
|
|
895
971
|
},
|
|
896
972
|
],
|
|
897
973
|
};
|
|
898
974
|
}
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
byApp[app] = { app, events: {}, total: 0 };
|
|
905
|
-
}
|
|
906
|
-
byApp[app].events[stat.event_type] = stat.count;
|
|
907
|
-
byApp[app].total += stat.count;
|
|
908
|
-
}
|
|
909
|
-
// Sort by total events
|
|
910
|
-
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
911
|
-
const lines = sorted.map(({ app, events, total }) => {
|
|
912
|
-
const eventDetails = Object.entries(events)
|
|
913
|
-
.map(([type, count]) => `${type}: ${count}`)
|
|
914
|
-
.join(", ");
|
|
915
|
-
return `${app}: ${total} events (${eventDetails})`;
|
|
975
|
+
const formatted = elements.map((e) => {
|
|
976
|
+
const boundsStr = e.bounds
|
|
977
|
+
? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
|
|
978
|
+
: "";
|
|
979
|
+
return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
|
|
916
980
|
});
|
|
981
|
+
const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
|
|
982
|
+
(pagination.total > elements.length
|
|
983
|
+
? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
|
|
984
|
+
: "");
|
|
917
985
|
return {
|
|
918
|
-
content: [
|
|
919
|
-
{
|
|
920
|
-
type: "text",
|
|
921
|
-
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
922
|
-
},
|
|
923
|
-
],
|
|
986
|
+
content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
|
|
924
987
|
};
|
|
925
988
|
}
|
|
989
|
+
case "frame-context": {
|
|
990
|
+
const frameId = args.frame_id;
|
|
991
|
+
if (!frameId) {
|
|
992
|
+
return {
|
|
993
|
+
content: [{ type: "text", text: "Error: frame_id is required" }],
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
const response = await fetchAPI(`/frames/${frameId}/context`);
|
|
997
|
+
if (!response.ok) {
|
|
998
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
999
|
+
}
|
|
1000
|
+
const data = await response.json();
|
|
1001
|
+
const lines = [
|
|
1002
|
+
`Frame ${data.frame_id} (source: ${data.text_source})`,
|
|
1003
|
+
];
|
|
1004
|
+
if (data.urls?.length) {
|
|
1005
|
+
lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
|
|
1006
|
+
}
|
|
1007
|
+
if (data.nodes?.length) {
|
|
1008
|
+
lines.push("", `Nodes: ${data.nodes.length}`);
|
|
1009
|
+
for (const node of data.nodes.slice(0, 50)) {
|
|
1010
|
+
const indent = " ".repeat(Math.min(node.depth, 5));
|
|
1011
|
+
lines.push(`${indent}[${node.role}] ${node.text}`);
|
|
1012
|
+
}
|
|
1013
|
+
if (data.nodes.length > 50) {
|
|
1014
|
+
lines.push(` ... and ${data.nodes.length - 50} more nodes`);
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
if (data.text) {
|
|
1018
|
+
// Truncate to avoid massive outputs
|
|
1019
|
+
const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
|
|
1020
|
+
lines.push("", "Full text:", truncated);
|
|
1021
|
+
}
|
|
1022
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1023
|
+
}
|
|
926
1024
|
default:
|
|
927
1025
|
throw new Error(`Unknown tool: ${name}`);
|
|
928
1026
|
}
|
package/manifest.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"manifest_version": "0.3",
|
|
3
3
|
"name": "screenpipe",
|
|
4
4
|
"display_name": "Screenpipe",
|
|
5
|
-
"version": "0.8.
|
|
5
|
+
"version": "0.8.3",
|
|
6
6
|
"description": "Search your screen recordings and audio transcriptions with AI",
|
|
7
7
|
"long_description": "Screenpipe is a 24/7 screen and audio recorder that lets you search everything you've seen or heard. This extension connects Claude to your local screenpipe instance, enabling AI-powered search through your digital memory.",
|
|
8
8
|
"author": {
|
|
@@ -33,6 +33,22 @@
|
|
|
33
33
|
{
|
|
34
34
|
"name": "export-video",
|
|
35
35
|
"description": "Export screen recordings as MP4 video for a specific time range"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"name": "list-meetings",
|
|
39
|
+
"description": "List detected meetings with duration, app, and attendees"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"name": "activity-summary",
|
|
43
|
+
"description": "Lightweight compressed activity overview for a time range (apps, recent texts, audio summary)"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"name": "search-elements",
|
|
47
|
+
"description": "Search structured UI elements (accessibility tree nodes and OCR text blocks)"
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"name": "frame-context",
|
|
51
|
+
"description": "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame"
|
|
36
52
|
}
|
|
37
53
|
],
|
|
38
54
|
"compatibility": {
|