screenpipe-mcp 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +452 -1329
  2. package/package.json +1 -1
  3. package/src/index.ts +514 -1413
package/dist/index.js CHANGED
@@ -44,19 +44,6 @@ const ws_1 = require("ws");
44
44
  const fs = __importStar(require("fs"));
45
45
  const path = __importStar(require("path"));
46
46
  const os = __importStar(require("os"));
47
- // Helper to get current date in ISO format
48
- function getCurrentDateInfo() {
49
- const now = new Date();
50
- return {
51
- isoDate: now.toISOString(),
52
- localDate: now.toLocaleDateString("en-US", {
53
- weekday: "long",
54
- year: "numeric",
55
- month: "long",
56
- day: "numeric",
57
- }),
58
- };
59
- }
60
47
  // Parse command line arguments
61
48
  const args = process.argv.slice(2);
62
49
  let port = 3030;
@@ -69,1199 +56,307 @@ const SCREENPIPE_API = `http://localhost:${port}`;
69
56
  // Initialize server
70
57
  const server = new index_js_1.Server({
71
58
  name: "screenpipe",
72
- version: "0.8.5",
59
+ version: "0.9.0",
73
60
  }, {
74
61
  capabilities: {
75
62
  tools: {},
76
- prompts: {},
77
63
  resources: {},
78
64
  },
79
65
  });
80
- // Tool definitions
81
- const BASE_TOOLS = [
66
+ // ---------------------------------------------------------------------------
67
+ // Tools
68
+ // ---------------------------------------------------------------------------
69
+ const TOOLS = [
82
70
  {
83
71
  name: "search-content",
84
- description: "Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
72
+ description: "Search screen text, audio transcriptions, input events, and memories. " +
85
73
  "Returns timestamped results with app context. " +
86
- "Call with no parameters to get recent activity. " +
87
- "Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
88
- "WHEN TO USE WHICH content_type:\n" +
89
- "- For meetings/calls/conversations: content_type='audio', do NOT use q param (transcriptions are noisy, q filters too aggressively)\n" +
90
- "- For screen text/reading: content_type='all' or 'accessibility'\n" +
91
- "- For time spent/app usage questions: use activity-summary tool instead (this tool returns content, not time stats)\n\n" +
92
- "SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
93
- "This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
94
- "App names are case-sensitive (e.g. 'Discord' vs 'Discord.exe'). " +
95
- "The q param searches captured text, NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
96
- "DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
97
- "- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
98
- "- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
99
- "NEVER fabricate frame IDs or timestamps — only use values from actual search results.",
100
- annotations: {
101
- title: "Search Content",
102
- readOnlyHint: true,
103
- },
74
+ "IMPORTANT: prefer activity-summary for broad questions ('what was I doing?'). " +
75
+ "Use search-content only when you need specific text/content. " +
76
+ "Start with limit=5, increase only if needed. Results can be large — use max_content_length=500 to truncate.",
77
+ annotations: { title: "Search Content", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
104
78
  inputSchema: {
105
79
  type: "object",
106
80
  properties: {
107
81
  q: {
108
82
  type: "string",
109
- description: "Search query (full-text search on captured text). Optional - omit to return all content in time range. IMPORTANT: Do NOT use q for audio/meeting searches — transcriptions are noisy and q filters too aggressively. Only use q when searching for specific text the user saw on screen.",
83
+ description: "Full-text search query. Omit to return all content in time range. Avoid for audio — transcriptions are noisy, q filters too aggressively.",
110
84
  },
111
85
  content_type: {
112
86
  type: "string",
113
- enum: ["all", "ocr", "audio", "input", "accessibility"],
114
- description: "Content type filter: 'audio' (transcriptions — use for meetings/calls/conversations), 'accessibility' (accessibility tree text, preferred for screen content), 'ocr' (screen text via OCR, legacy fallback), 'input' (clicks, keystrokes, clipboard, app switches), 'all'. Default: 'all'. For meeting/call queries, ALWAYS use 'audio'.",
87
+ enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
88
+ description: "Filter by content type. 'accessibility' is preferred for screen text (OS-native). 'ocr' is fallback for apps without accessibility support. Default: 'all'.",
115
89
  default: "all",
116
90
  },
117
- limit: {
118
- type: "integer",
119
- description: "Max results. Default: 10",
120
- default: 10,
121
- },
122
- offset: {
123
- type: "integer",
124
- description: "Skip N results for pagination. Default: 0",
125
- default: 0,
126
- },
91
+ limit: { type: "integer", description: "Max results (default 10, max 20). Start with 5 for exploration.", default: 10 },
92
+ offset: { type: "integer", description: "Pagination offset. Use when results say 'use offset=N for more'.", default: 0 },
127
93
  start_time: {
128
94
  type: "string",
129
- format: "date-time",
130
- description: "Start time: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', '2d ago', 'now')",
95
+ description: "ISO 8601 UTC or relative (e.g. '2h ago', '1d ago'). Always provide to avoid scanning entire history.",
131
96
  },
132
97
  end_time: {
133
98
  type: "string",
134
- format: "date-time",
135
- description: "End time: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
136
- },
137
- app_name: {
138
- type: "string",
139
- description: "Filter by app (e.g., 'Google Chrome', 'Slack', 'zoom.us')",
140
- },
141
- window_name: {
142
- type: "string",
143
- description: "Filter by window title",
144
- },
145
- min_length: {
146
- type: "integer",
147
- description: "Minimum content length in characters",
148
- },
149
- max_length: {
150
- type: "integer",
151
- description: "Maximum content length in characters",
99
+ description: "ISO 8601 UTC or relative (e.g. 'now'). Defaults to now.",
152
100
  },
101
+ app_name: { type: "string", description: "Filter by app name (e.g. 'Google Chrome', 'Slack', 'zoom.us'). Case-sensitive." },
102
+ window_name: { type: "string", description: "Filter by window title substring" },
103
+ min_length: { type: "integer", description: "Min content length in characters" },
104
+ max_length: { type: "integer", description: "Max content length in characters" },
153
105
  include_frames: {
154
106
  type: "boolean",
155
- description: "Include base64 screenshots (OCR only). Default: false",
107
+ description: "Include base64 screenshots (OCR only). Warning: large response.",
156
108
  default: false,
157
109
  },
158
- speaker_ids: {
159
- type: "string",
160
- description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
161
- },
162
- speaker_name: {
163
- type: "string",
164
- description: "Filter audio by speaker name (case-insensitive partial match)",
165
- },
110
+ speaker_ids: { type: "string", description: "Comma-separated speaker IDs to filter audio" },
111
+ speaker_name: { type: "string", description: "Filter audio by speaker name (case-insensitive partial match)" },
166
112
  max_content_length: {
167
113
  type: "integer",
168
- description: "Truncate each result's text/transcription to this many characters using middle-truncation (keeps first half + last half). Useful for limiting token usage with small-context models.",
114
+ description: "Truncate each result's text via middle-truncation. Use 200-500 to keep responses compact.",
169
115
  },
170
116
  },
171
117
  },
172
118
  },
173
119
  {
174
- name: "export-video",
175
- description: "Export a video of screen recordings for a specific time range. " +
176
- "Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
177
- "IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z) or relative times (e.g., '16h ago', 'now')\n\n" +
178
- "EXAMPLES:\n" +
179
- "- Last 30 minutes: Calculate timestamps from current time\n" +
180
- "- Specific meeting: Use the meeting's start and end times in UTC",
181
- annotations: {
182
- title: "Export Video",
183
- destructiveHint: true,
120
+ name: "list-meetings",
121
+ description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees. " +
122
+ "Only available when screenpipe runs in smart transcription mode.",
123
+ annotations: { title: "List Meetings", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
124
+ inputSchema: {
125
+ type: "object",
126
+ properties: {
127
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '1d ago')" },
128
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
129
+ limit: { type: "integer", description: "Max results (default 20)", default: 20 },
130
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
131
+ },
184
132
  },
133
+ },
134
+ {
135
+ name: "activity-summary",
136
+ description: "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
137
+ "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
138
+ "Only escalate to search-content if you need specific text content.",
139
+ annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
185
140
  inputSchema: {
186
141
  type: "object",
187
142
  properties: {
188
- start_time: {
189
- type: "string",
190
- format: "date-time",
191
- description: "Start time: ISO 8601 UTC (e.g., '2024-01-15T10:00:00Z') or relative (e.g., '16h ago', 'now')",
192
- },
193
- end_time: {
194
- type: "string",
195
- format: "date-time",
196
- description: "End time: ISO 8601 UTC (e.g., '2024-01-15T10:30:00Z') or relative (e.g., 'now', '1h ago')",
197
- },
198
- fps: {
199
- type: "number",
200
- description: "Frames per second for the output video. Lower values (0.5-1.0) create smaller files, higher values (5-10) create smoother playback. Default: 1.0",
201
- default: 1.0,
202
- },
143
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '3h ago')" },
144
+ end_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. 'now')" },
145
+ app_name: { type: "string", description: "Optional app name filter to focus on one app" },
203
146
  },
204
147
  required: ["start_time", "end_time"],
205
148
  },
206
149
  },
207
150
  {
208
- name: "list-meetings",
209
- description: "List detected meetings with duration, app, and attendees. " +
210
- "Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
211
- "Only available when screenpipe runs in smart transcription mode.",
212
- annotations: {
213
- title: "List Meetings",
214
- readOnlyHint: true,
215
- },
151
+ name: "search-elements",
152
+ description: "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
153
+ "Lighter than search-content for targeted UI lookups. " +
154
+ "Use when you need to find specific UI controls or page structure, not general content.",
155
+ annotations: { title: "Search Elements", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
216
156
  inputSchema: {
217
157
  type: "object",
218
158
  properties: {
219
- start_time: {
220
- type: "string",
221
- format: "date-time",
222
- description: "Start filter: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
223
- },
224
- end_time: {
159
+ q: { type: "string", description: "Full-text search on element text" },
160
+ frame_id: { type: "integer", description: "Filter to specific frame ID from search results" },
161
+ source: {
225
162
  type: "string",
226
- format: "date-time",
227
- description: "End filter: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
228
- },
229
- limit: {
230
- type: "integer",
231
- description: "Max results. Default: 20",
232
- default: 20,
233
- },
234
- offset: {
235
- type: "integer",
236
- description: "Skip N results for pagination. Default: 0",
237
- default: 0,
238
- },
163
+ enum: ["accessibility", "ocr"],
164
+ description: "Element source. 'accessibility' is preferred (OS-native tree). 'ocr' for apps without a11y.",
165
+ },
166
+ role: { type: "string", description: "Element role filter (e.g. 'AXButton', 'AXLink', 'AXTextField')" },
167
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
168
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
169
+ app_name: { type: "string", description: "Filter by app name" },
170
+ limit: { type: "integer", description: "Max results (default 50). Start with 10-20.", default: 50 },
171
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
239
172
  },
240
173
  },
241
174
  },
242
175
  {
243
- name: "activity-summary",
244
- description: "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
245
- "Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
246
- "Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
247
- "first_seen/last_seen show the wall-clock span per app.\n\n" +
248
- "USE THIS TOOL (not search-content or raw SQL) for:\n" +
249
- "- 'how long did I spend on X?' → active_minutes per app\n" +
250
- "- 'which apps did I use today?' app list sorted by active_minutes\n" +
251
- "- 'what was I doing?' → broad overview before drilling deeper\n" +
252
- "- Any time-spent or app-usage question\n\n" +
253
- "WARNING: Do NOT estimate time from raw frame counts or SQL queries — those are inaccurate. " +
254
- "This endpoint calculates actual active session time correctly.",
255
- annotations: {
256
- title: "Activity Summary",
257
- readOnlyHint: true,
176
+ name: "frame-context",
177
+ description: "Get full accessibility text, parsed tree nodes, and URLs for a specific frame ID. " +
178
+ "Use after search-content to get detailed context for a specific moment.",
179
+ annotations: { title: "Frame Context", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
180
+ inputSchema: {
181
+ type: "object",
182
+ properties: {
183
+ frame_id: { type: "integer", description: "Frame ID from search-content results (content.frame_id field)" },
184
+ },
185
+ required: ["frame_id"],
258
186
  },
187
+ },
188
+ {
189
+ name: "export-video",
190
+ description: "Export an MP4 video of screen recordings for a time range. " +
191
+ "Returns the file path. Can take a few minutes for long ranges.",
192
+ annotations: { title: "Export Video", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
259
193
  inputSchema: {
260
194
  type: "object",
261
195
  properties: {
262
- start_time: {
263
- type: "string",
264
- format: "date-time",
265
- description: "Start of time range: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
266
- },
267
- end_time: {
268
- type: "string",
269
- format: "date-time",
270
- description: "End of time range: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
271
- },
272
- app_name: {
273
- type: "string",
274
- description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
275
- },
196
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
197
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
198
+ fps: { type: "number", description: "Output FPS (default 1.0). Higher = smoother but larger file.", default: 1.0 },
276
199
  },
277
200
  required: ["start_time", "end_time"],
278
201
  },
279
202
  },
280
203
  {
281
- name: "search-elements",
282
- description: "Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
283
- "Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
284
- "Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
285
- "Use for: finding specific buttons, links, text fields, or UI components. " +
286
- "Prefer this over search-content when you need structural UI detail rather than full screen text.",
287
- annotations: {
288
- title: "Search Elements",
289
- readOnlyHint: true,
290
- },
204
+ name: "update-memory",
205
+ description: "Create, update, or delete a persistent memory (facts, preferences, decisions the user wants to remember). " +
206
+ "To retrieve memories, use search-content with content_type='memory'. " +
207
+ "To create: provide content + tags. To update: provide id + fields to change. To delete: provide id + delete=true.",
208
+ annotations: { title: "Update Memory", readOnlyHint: false, destructiveHint: false, openWorldHint: false, idempotentHint: true },
291
209
  inputSchema: {
292
210
  type: "object",
293
211
  properties: {
294
- q: {
295
- type: "string",
296
- description: "Full-text search query across element text. Optional.",
297
- },
298
- frame_id: {
299
- type: "integer",
300
- description: "Filter to elements from a specific frame",
301
- },
302
- source: {
303
- type: "string",
304
- enum: ["accessibility", "ocr"],
305
- description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
306
- },
307
- role: {
308
- type: "string",
309
- description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
310
- },
311
- start_time: {
312
- type: "string",
313
- format: "date-time",
314
- description: "Start time: ISO 8601 UTC or relative (e.g., '16h ago', 'now')",
315
- },
316
- end_time: {
317
- type: "string",
318
- format: "date-time",
319
- description: "End time: ISO 8601 UTC or relative (e.g., 'now', '1h ago')",
320
- },
321
- app_name: {
322
- type: "string",
323
- description: "Filter by app name",
324
- },
325
- limit: {
326
- type: "integer",
327
- description: "Max results. Default: 50",
328
- default: 50,
329
- },
330
- offset: {
331
- type: "integer",
332
- description: "Skip N results for pagination. Default: 0",
333
- default: 0,
334
- },
212
+ id: { type: "integer", description: "Memory ID — omit to create new, provide to update/delete" },
213
+ content: { type: "string", description: "Memory text (required for creation)" },
214
+ tags: { type: "array", items: { type: "string" }, description: "Categorization tags (e.g. ['work', 'project-x'])" },
215
+ importance: { type: "number", description: "0.0 (trivial) to 1.0 (critical). Default 0.5." },
216
+ source_context: { type: "object", description: "Optional metadata linking to source (app, timestamp, etc.)" },
217
+ delete: { type: "boolean", description: "Set true to delete the memory identified by id" },
335
218
  },
336
219
  },
337
220
  },
338
221
  {
339
- name: "frame-context",
340
- description: "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
341
- "Falls back to OCR data for legacy frames without accessibility data. " +
342
- "Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
343
- annotations: {
344
- title: "Frame Context",
345
- readOnlyHint: true,
346
- },
222
+ name: "send-notification",
223
+ description: "Send a notification to the screenpipe desktop UI. " +
224
+ "Use to alert the user about findings, completed tasks, or actions needing attention.",
225
+ annotations: { title: "Send Notification", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
347
226
  inputSchema: {
348
227
  type: "object",
349
228
  properties: {
350
- frame_id: {
351
- type: "integer",
352
- description: "The frame ID to get context for (from search results)",
229
+ title: { type: "string", description: "Notification title (short, descriptive)" },
230
+ body: { type: "string", description: "Notification body (markdown supported)" },
231
+ pipe_name: { type: "string", description: "Name of the pipe/tool sending this notification" },
232
+ timeout_secs: { type: "integer", description: "Auto-dismiss after N seconds (default 20). Use 0 for persistent.", default: 20 },
233
+ actions: {
234
+ type: "array",
235
+ description: "Up to 5 action buttons. Each needs id, label, type ('pipe'|'api'|'deeplink'|'dismiss').",
236
+ items: {
237
+ type: "object",
238
+ properties: {
239
+ id: { type: "string", description: "Unique action ID" },
240
+ label: { type: "string", description: "Button label" },
241
+ type: { type: "string", enum: ["pipe", "api", "deeplink", "dismiss"], description: "Action type" },
242
+ pipe: { type: "string", description: "Pipe name to run (type=pipe)" },
243
+ context: { type: "object", description: "Context passed to pipe (type=pipe)" },
244
+ open_in_chat: { type: "boolean", description: "Open pipe run in chat UI instead of background (type=pipe)" },
245
+ url: { type: "string", description: "URL for api/deeplink actions" },
246
+ },
247
+ required: ["id", "label", "type"],
248
+ },
353
249
  },
354
250
  },
355
- required: ["frame_id"],
251
+ required: ["title", "pipe_name"],
356
252
  },
357
253
  },
358
254
  ];
359
- // List tools handler
360
255
  server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
361
- return { tools: BASE_TOOLS };
256
+ return { tools: TOOLS };
362
257
  });
363
- // MCP Resources - provide dynamic context data
258
+ // ---------------------------------------------------------------------------
259
+ // Resources — dynamic context only (no duplicated reference docs)
260
+ // ---------------------------------------------------------------------------
364
261
  const RESOURCES = [
365
262
  {
366
263
  uri: "screenpipe://context",
367
264
  name: "Current Context",
368
- description: "Current date/time and pre-computed timestamps for common time ranges",
265
+ description: "Current date/time, timezone, and pre-computed timestamps for common time ranges",
369
266
  mimeType: "application/json",
370
267
  },
371
268
  {
372
269
  uri: "screenpipe://guide",
373
270
  name: "Usage Guide",
374
- description: "How to use screenpipe search effectively",
375
- mimeType: "text/markdown",
376
- },
377
- {
378
- uri: "ui://search",
379
- name: "Search Dashboard",
380
- description: "Interactive search UI for exploring screen recordings and audio transcriptions",
381
- mimeType: "text/html",
382
- },
383
- {
384
- uri: "screenpipe://pipe-creation-guide",
385
- name: "Pipe Creation Guide",
386
- description: "How to create screenpipe pipes (scheduled AI automations): format, YAML frontmatter, schedule syntax, API parameters, and example templates",
387
- mimeType: "text/markdown",
388
- },
389
- {
390
- uri: "screenpipe://api-reference",
391
- name: "REST API Reference",
392
- description: "Full screenpipe REST API reference: search, activity-summary, elements, frames, export, retranscribe, raw SQL, connections, speakers (60+ endpoints)",
393
- mimeType: "text/markdown",
394
- },
395
- {
396
- uri: "screenpipe://cli-reference",
397
- name: "CLI Reference",
398
- description: "Screenpipe CLI commands: pipe management (list, enable, run, install, delete) and connection management (Telegram, Slack, Discord, etc.)",
271
+ description: "How to use screenpipe tools effectively — search strategy, progressive disclosure, and common patterns",
399
272
  mimeType: "text/markdown",
400
273
  },
401
274
  ];
402
- // List resources handler
403
275
  server.setRequestHandler(types_js_1.ListResourcesRequestSchema, async () => {
404
276
  return { resources: RESOURCES };
405
277
  });
406
- // Read resource handler
407
278
  server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) => {
408
279
  const { uri } = request.params;
409
- const dateInfo = getCurrentDateInfo();
410
- const now = Date.now();
411
- switch (uri) {
412
- case "screenpipe://context":
413
- return {
414
- contents: [
415
- {
416
- uri,
417
- mimeType: "application/json",
418
- text: JSON.stringify({
419
- current_time: dateInfo.isoDate,
420
- current_date_local: dateInfo.localDate,
421
- timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
422
- timestamps: {
423
- now: dateInfo.isoDate,
424
- one_hour_ago: new Date(now - 60 * 60 * 1000).toISOString(),
425
- three_hours_ago: new Date(now - 3 * 60 * 60 * 1000).toISOString(),
426
- today_start: `${new Date().toISOString().split("T")[0]}T00:00:00Z`,
427
- yesterday_start: `${new Date(now - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
428
- one_week_ago: new Date(now - 7 * 24 * 60 * 60 * 1000).toISOString(),
429
- },
430
- common_apps: ["Google Chrome", "Safari", "Slack", "zoom.us", "Microsoft Teams", "Code", "Terminal"],
431
- }, null, 2),
432
- },
433
- ],
434
- };
435
- case "screenpipe://guide":
436
- return {
437
- contents: [
438
- {
439
- uri,
440
- mimeType: "text/markdown",
441
- text: `# Screenpipe Search Guide
442
-
443
- ## Data Modalities
444
-
445
- Screenpipe captures four types of data:
446
- 1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
447
- 2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
448
- 3. **Audio** - Transcribed speech from microphone/system audio
449
- 4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
450
-
451
- ## Quick Start
452
- - **Get recent activity**: Call search-content with no parameters
453
- - **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
454
- - **Get keyboard input**: \`{"content_type": "input"}\`
455
- - **Get audio only**: \`{"content_type": "audio"}\`
456
-
457
- ## Common User Requests → Correct Tool Choice
458
- | User says | Use this tool | Key params |
459
- |-----------|--------------|------------|
460
- | "summarize my meeting/call" | search-content | content_type:"audio", NO q param, start_time |
461
- | "what did they/I say about X" | search-content | content_type:"audio", NO q param (scan results manually) |
462
- | "how long on X" / "which apps" / "time spent" | activity-summary | start_time, end_time |
463
- | "what was I doing" | activity-summary | start_time, end_time (then drill into search-content) |
464
- | "what was I reading/looking at" | search-content | content_type:"all", start_time |
465
-
466
- ## Behavior Rules
467
- - Act immediately on clear requests. NEVER ask "what time range?" or "which content type?" when the intent is obvious.
468
- - If search returns empty, silently retry with wider time range or fewer filters. Do NOT ask the user what to change.
469
- - For meetings: ALWAYS use content_type:"audio" and do NOT use the q param. Transcriptions are noisy — q filters too aggressively and misses relevant content.
470
-
471
- ## search-content
472
- | Parameter | Description | Default |
473
- |-----------|-------------|---------|
474
- | q | Search query | (none - returns all) |
475
- | content_type | all/ocr/audio/input/accessibility | all |
476
- | limit | Max results | 10 |
477
- | start_time | ISO 8601 UTC or relative (e.g. '16h ago') | (no filter) |
478
- | end_time | ISO 8601 UTC or relative (e.g. 'now') | (no filter) |
479
- | app_name | Filter by app | (no filter) |
480
- | include_frames | Include screenshots | false |
481
-
482
- ## Search Strategy (MANDATORY)
483
- 1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
484
- 2. Scan results to find correct app_name values and content patterns.
485
- 3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
486
- 4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
487
- 5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
488
-
489
- ## Progressive Disclosure (Token-Efficient Strategy)
490
- 1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
491
- 2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
492
- 3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
493
- 4. **Fetch frame-context** for URLs and accessibility tree of specific frames
494
- 5. **Screenshots** (include_frames=true) only when text isn't enough
495
-
496
- ## Chat History
497
- Previous screenpipe chat conversations are stored as individual JSON files in ~/.screenpipe/chats/{conversation-id}.json
498
- Each file contains: id, title, messages[], createdAt, updatedAt. You can read these files to reference or search previous conversations.
499
-
500
- ## Speaker Management
501
- screenpipe auto-identifies speakers in audio. API endpoints for managing them:
502
- - \`GET /speakers/unnamed?limit=10\` — list unnamed speakers
503
- - \`GET /speakers/search?name=John\` — search by name
504
- - \`POST /speakers/update\` with \`{"id": 5, "name": "John"}\` — rename a speaker
505
- - \`POST /speakers/merge\` with \`{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}\` — merge duplicates
506
- - \`GET /speakers/similar?speaker_id=5\` — find similar speakers for merging
507
- - \`POST /speakers/reassign\` — reassign audio chunk to different speaker
508
-
509
- ## Tips
510
- 1. Read screenpipe://context first to get current timestamps
511
- 2. Use activity-summary before search-content for broad overview questions
512
- 3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
513
- 4. Use content_type=input for "what did I type?" queries
514
- 5. Use content_type=accessibility for accessibility tree text
515
- 6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
516
-
517
- ## Deep Links (Clickable References)
518
- When showing search results to users, create clickable links so they can jump to that exact moment.
519
-
520
- **ALWAYS prefer frame-based links for OCR results** (frame IDs are exact DB keys):
521
- - \`[10:30 AM — Chrome](screenpipe://frame/12345)\` — use \`content.frame_id\` from OCR results
522
-
523
- **Use timestamp links only for audio results** (which have no frame_id):
524
- - \`[meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z)\` — use exact \`timestamp\` from audio results
525
-
526
- **NEVER fabricate frame IDs or timestamps.** Only use values copied from actual search results.`,
527
- },
528
- ],
529
- };
530
- case "ui://search": {
531
- // MCP App UI - Interactive search dashboard
532
- const uiHtmlPath = path.join(__dirname, "..", "ui", "search.html");
533
- let htmlContent;
534
- try {
535
- htmlContent = fs.readFileSync(uiHtmlPath, "utf-8");
536
- }
537
- catch {
538
- // Fallback: serve embedded minimal UI if file not found
539
- htmlContent = `<!DOCTYPE html>
540
- <html>
541
- <head>
542
- <style>
543
- body { font-family: system-ui; background: #0a0a0a; color: #fff; padding: 20px; }
544
- input { width: 100%; padding: 10px; margin-bottom: 10px; background: #1a1a1a; border: 1px solid #333; color: #fff; border-radius: 6px; }
545
- button { padding: 10px 20px; background: #fff; color: #000; border: none; border-radius: 6px; cursor: pointer; }
546
- #results { margin-top: 20px; }
547
- .result { background: #1a1a1a; padding: 12px; margin: 8px 0; border-radius: 8px; border: 1px solid #333; }
548
- </style>
549
- </head>
550
- <body>
551
- <h2>screenpipe search</h2>
552
- <input id="q" placeholder="search..." onkeydown="if(event.key==='Enter')search()"/>
553
- <button onclick="search()">search</button>
554
- <div id="results"></div>
555
- <script>
556
- function search() {
557
- window.parent.postMessage({jsonrpc:'2.0',method:'tools/call',params:{name:'search-content',arguments:{q:document.getElementById('q').value,limit:20}}},'*');
280
+ if (uri === "screenpipe://context") {
281
+ const now = new Date();
282
+ const ms = now.getTime();
283
+ return {
284
+ contents: [
285
+ {
286
+ uri,
287
+ mimeType: "application/json",
288
+ text: JSON.stringify({
289
+ current_time: now.toISOString(),
290
+ current_date_local: now.toLocaleDateString("en-US", {
291
+ weekday: "long",
292
+ year: "numeric",
293
+ month: "long",
294
+ day: "numeric",
295
+ }),
296
+ timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
297
+ timestamps: {
298
+ now: now.toISOString(),
299
+ one_hour_ago: new Date(ms - 60 * 60 * 1000).toISOString(),
300
+ three_hours_ago: new Date(ms - 3 * 60 * 60 * 1000).toISOString(),
301
+ today_start: `${now.toISOString().split("T")[0]}T00:00:00Z`,
302
+ yesterday_start: `${new Date(ms - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
303
+ one_week_ago: new Date(ms - 7 * 24 * 60 * 60 * 1000).toISOString(),
304
+ },
305
+ }, null, 2),
306
+ },
307
+ ],
308
+ };
558
309
  }
559
- window.addEventListener('message',e=>{
560
- if(e.data?.result||e.data?.method==='tool/result'){
561
- const r=e.data.result||e.data.params?.result;
562
- const d=r?.data||r||[];
563
- document.getElementById('results').innerHTML=d.map(x=>'<div class="result"><b>'+((x.type||'')+'</b> '+(x.content?.app_name||'')+': '+(x.content?.text||x.content?.transcription||'').substring(0,200))+'</div>').join('');
564
- }
565
- });
566
- </script>
567
- </body>
568
- </html>`;
569
- }
570
- return {
571
- contents: [
572
- {
573
- uri,
574
- mimeType: "text/html",
575
- text: htmlContent,
576
- },
577
- ],
578
- };
579
- }
580
- case "screenpipe://pipe-creation-guide":
581
- return {
582
- contents: [
583
- {
584
- uri,
585
- mimeType: "text/markdown",
586
- text: `# Screenpipe Pipe Creation Guide
587
-
588
- ## What is a pipe?
589
-
590
- A pipe is a scheduled AI agent defined as a single markdown file: \`~/.screenpipe/pipes/{name}/pipe.md\`
591
- Every N minutes, screenpipe runs a coding agent (like pi or claude-code) with the pipe's prompt.
592
- The agent can query your screen data, write files, call external APIs, send notifications, etc.
593
-
594
- ## pipe.md format
595
-
596
- The file starts with YAML frontmatter on the very first line (no blank lines before it), then the prompt body:
597
-
598
- \`\`\`markdown
599
- ---
600
- schedule: every 30m
601
- enabled: true
602
- ---
603
-
604
- Your prompt instructions here...
605
- \`\`\`
606
-
607
- ### Config fields
608
-
609
- | Field | Values | Description |
610
- |-------|--------|-------------|
611
- | \`schedule\` | \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\` | When to run |
612
- | \`enabled\` | \`true\` / \`false\` | Whether the pipe is active |
613
- | \`preset\` | AI preset name (e.g. \`Oai\`) | Which AI model to use |
614
- | \`history\` | \`true\` / \`false\` | Include previous output as context |
615
- | \`connections\` | list of connection IDs | Required integrations (e.g. \`obsidian\`, \`telegram\`) |
616
-
617
- ## Context header
618
-
619
- Before execution, screenpipe prepends a context header to the prompt with:
620
- - Time range (start/end timestamps based on the schedule interval)
621
- - Current date and user's timezone
622
- - Screenpipe API base URL (http://localhost:3030)
623
- - Output directory
624
-
625
- The AI agent uses this context to query the right time range. No template variables needed in the prompt.
626
-
627
- ## Screenpipe search API
628
-
629
- The agent queries screen data via the local REST API:
630
-
631
- \`\`\`
632
- curl "http://localhost:3030/search?limit=20&content_type=all&start_time=<ISO8601>&end_time=<ISO8601>"
633
- \`\`\`
634
-
635
- ### Query parameters
636
-
637
- | Parameter | Description |
638
- |-----------|-------------|
639
- | \`q\` | Text search query (optional — skip for audio, transcriptions are noisy) |
640
- | \`content_type\` | \`all\`, \`ocr\`, \`audio\`, \`input\`, \`accessibility\` (prefer \`all\` or \`accessibility\`) |
641
- | \`limit\` | Max results (default 20) |
642
- | \`offset\` | Pagination offset |
643
- | \`start_time\` / \`end_time\` | ISO 8601 timestamps or relative (\`1h ago\`, \`now\`) |
644
- | \`app_name\` | Filter by app (e.g. \`Google Chrome\`, \`Slack\`) |
645
- | \`window_name\` | Filter by window title |
646
- | \`browser_url\` | Filter by URL |
647
- | \`min_length\` / \`max_length\` | Filter by text length |
648
- | \`speaker_name\` | Filter audio by speaker |
649
-
650
- Other useful endpoints:
651
- - \`GET /activity-summary?start_time=...&end_time=...\` — lightweight overview (~200 tokens)
652
- - \`GET /elements?q=...&role=AXButton&start_time=...\` — UI elements
653
- - \`GET /connections/{id}\` — get integration credentials (telegram, slack, obsidian, etc.)
654
- - \`POST /raw_sql\` — run SQL queries (always include LIMIT)
655
-
656
- Full API reference: read the \`screenpipe://api-reference\` resource.
657
-
658
- ## Installing and running
659
-
660
- After creating the pipe.md file:
661
-
662
- \`\`\`bash
663
- bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
664
- bunx screenpipe@latest pipe enable my-pipe
665
- bunx screenpipe@latest pipe run my-pipe # test immediately
666
- \`\`\`
667
-
668
- ## Example pipes
669
-
670
- ### Daily recap (manual trigger)
671
- \`\`\`markdown
672
- ---
673
- schedule: manual
674
- enabled: true
675
- ---
676
-
677
- Analyze my screen and audio recordings from today (last 16 hours). Use limit=10 per search, max 5 searches total.
678
-
679
- ## Summary
680
- One sentence: what I mainly did today.
681
-
682
- ## Accomplishments
683
- - Top 3 things I finished, with timestamps
684
-
685
- ## Key Moments
686
- - Important things I saw, said, or heard
687
-
688
- ## Unfinished Work
689
- - What I should continue tomorrow
690
- \`\`\`
691
-
692
- ### Obsidian sync (every hour)
693
- \`\`\`markdown
694
- ---
695
- schedule: every 1h
696
- enabled: true
697
- connections:
698
- - obsidian
699
- ---
700
-
701
- Sync screenpipe activity to Obsidian vault as a daily note.
702
-
703
- 1. Get vault path from GET http://localhost:3030/connections/obsidian
704
- 2. Read existing daily note (merge into it)
705
- 3. Query search API in 30-minute chunks with min_length=50
706
- 4. Synthesize activities, extract action items, write note
707
- \`\`\`
708
-
709
- ### Slack standup (every weekday at 9am)
710
- \`\`\`markdown
711
- ---
712
- schedule: every weekday at 9am
713
- enabled: true
714
- connections:
715
- - slack
716
- ---
717
-
718
- Generate standup update from yesterday's activity and post to Slack.
719
-
720
- 1. Query activity-summary for yesterday
721
- 2. Search for key accomplishments and blockers
722
- 3. Format as: Done / Doing / Blocked
723
- 4. POST to Slack webhook from GET http://localhost:3030/connections/slack
724
- \`\`\`
725
-
726
- ## Optimization tips
727
-
728
- - Be specific about expected output format
729
- - Give step-by-step instructions
730
- - Add error handling: "if API returns empty, try content_type=accessibility instead of ocr"
731
- - Add validation: "before writing, verify you have at least 3 entries"
732
- - Specify exact file paths, API parameters, output structure
733
- - Keep search limit low (10-20) and use time ranges from the context header
734
- - Use \`min_length=50\` to skip noisy OCR fragments`,
735
- },
736
- ],
737
- };
738
- case "screenpipe://api-reference":
739
- return {
740
- contents: [
741
- {
742
- uri,
743
- mimeType: "text/markdown",
744
- text: `# Screenpipe REST API Reference
745
-
746
- Local REST API at \`http://localhost:3030\`. Full reference (60+ endpoints): https://docs.screenpi.pe/llms-full.txt
747
-
748
- ## Shell
749
-
750
- - **macOS/Linux** → \`bash\`, \`curl\`
751
- - **Windows** → \`powershell\`, \`curl.exe\` (not the alias)
752
-
753
- ## Context Window Protection
754
-
755
- API responses can be large. Always write curl output to a file first (\`curl ... -o /tmp/sp_result.json\`), check size (\`wc -c\`), and if over 5KB read only the first 50-100 lines. Extract what you need with \`jq\`. NEVER dump full large responses into context.
756
-
757
- ---
758
-
759
- ## 1. Search — \`GET /search\`
760
-
761
- \`\`\`bash
762
- curl "http://localhost:3030/search?q=QUERY&content_type=all&limit=10&start_time=1h%20ago"
763
- \`\`\`
764
-
765
- ### Parameters
766
-
767
- | Parameter | Type | Required | Description |
768
- |-----------|------|----------|-------------|
769
- | \`q\` | string | No | Keywords. Do NOT use for audio — transcriptions are noisy. |
770
- | \`content_type\` | string | No | \`all\` (default), \`ocr\`, \`audio\`, \`input\`, \`accessibility\` |
771
- | \`limit\` | integer | No | Max 1-20. Default: 10 |
772
- | \`offset\` | integer | No | Pagination. Default: 0 |
773
- | \`start_time\` | ISO 8601 or relative | **Yes** | \`2024-01-15T10:00:00Z\` or \`16h ago\`, \`2d ago\`, \`30m ago\` |
774
- | \`end_time\` | ISO 8601 or relative | No | Defaults to now. \`now\`, \`1h ago\` |
775
- | \`app_name\` | string | No | e.g. "Google Chrome", "Slack", "zoom.us" |
776
- | \`window_name\` | string | No | Window title substring |
777
- | \`speaker_name\` | string | No | Filter audio by speaker (case-insensitive partial) |
778
- | \`focused\` | boolean | No | Only focused windows |
779
-
780
- ### Critical Rules
781
-
782
- 1. **ALWAYS include \`start_time\`** — queries without time bounds WILL timeout
783
- 2. **Start with 1-2 hour ranges** — expand only if no results
784
- 3. **Use \`app_name\`** when user mentions a specific app
785
- 4. **"recent"** = 30 min. **"today"** = since midnight. **"yesterday"** = yesterday's range
786
-
787
- ### Response Format
788
-
789
- \`\`\`json
790
- {
791
- "data": [
792
- {"type": "OCR", "content": {"frame_id": 12345, "text": "...", "timestamp": "...", "app_name": "Chrome"}},
793
- {"type": "Audio", "content": {"chunk_id": 678, "transcription": "...", "timestamp": "...", "speaker": {"name": "John"}}},
794
- {"type": "UI", "content": {"id": 999, "text": "Clicked Submit", "timestamp": "...", "app_name": "Safari"}}
795
- ],
796
- "pagination": {"limit": 10, "offset": 0, "total": 42}
797
- }
798
- \`\`\`
799
-
800
- ---
801
-
802
- ## 2. Activity Summary — \`GET /activity-summary\`
803
-
804
- \`\`\`bash
805
- curl "http://localhost:3030/activity-summary?start_time=1h%20ago&end_time=now"
806
- \`\`\`
807
-
808
- Returns app usage with \`active_minutes\`, first/last seen, recent texts, audio summary. ~200-500 tokens. Best starting point.
809
-
810
- ---
811
-
812
- ## 3. Elements — \`GET /elements\`
813
-
814
- Lightweight FTS search across UI elements (~100-500 bytes each).
815
-
816
- \`\`\`bash
817
- curl "http://localhost:3030/elements?q=Submit&role=AXButton&start_time=1h%20ago&limit=10"
818
- \`\`\`
819
-
820
- Parameters: \`q\`, \`frame_id\`, \`source\` (\`accessibility\`|\`ocr\`), \`role\`, \`start_time\`, \`end_time\`, \`app_name\`, \`limit\`, \`offset\`.
821
-
822
- ### Frame Context — \`GET /frames/{id}/context\`
823
-
824
- Returns accessibility text, parsed nodes, and extracted URLs for a frame.
825
-
826
- Common roles: \`AXButton\`, \`AXStaticText\`, \`AXLink\`, \`AXTextField\`, \`AXTextArea\`, \`AXMenuItem\`, \`AXCheckBox\`
827
-
828
- ---
829
-
830
- ## 4. Frames — \`GET /frames/{frame_id}\`
831
-
832
- Returns raw PNG screenshot. Never fetch more than 2-3 per query.
833
-
834
- ---
835
-
836
- ## 5. Media Export — \`POST /frames/export\`
837
-
838
- \`\`\`bash
839
- curl -X POST http://localhost:3030/frames/export \\
840
- -H "Content-Type: application/json" \\
841
- -d '{"start_time": "5m ago", "end_time": "now", "fps": 1.0}'
842
- \`\`\`
843
-
844
- FPS guidelines: 5min→1.0, 30min→0.5, 1h→0.2, 2h+→0.1. Max 10,000 frames.
845
-
846
- ---
847
-
848
- ## 6. Retranscribe — \`POST /audio/retranscribe\`
849
-
850
- \`\`\`bash
851
- curl -X POST http://localhost:3030/audio/retranscribe \\
852
- -H "Content-Type: application/json" \\
853
- -d '{"start": "1h ago", "end": "now"}'
854
- \`\`\`
855
-
856
- Optional: \`engine\`, \`vocabulary\` (array of \`{"word": "...", "replacement": "..."}\`), \`prompt\` (topic context).
857
-
858
- ---
859
-
860
- ## 7. Raw SQL — \`POST /raw_sql\`
861
-
862
- \`\`\`bash
863
- curl -X POST http://localhost:3030/raw_sql \\
864
- -H "Content-Type: application/json" \\
865
- -d '{"query": "SELECT ... LIMIT 100"}'
866
- \`\`\`
867
-
868
- Every SELECT needs LIMIT. Always filter by time. Read-only.
869
-
870
- ### Schema
871
-
872
- | Table | Key Columns | Time Column |
873
- |-------|-------------|-------------|
874
- | \`frames\` | \`app_name\`, \`window_name\`, \`browser_url\`, \`focused\` | \`timestamp\` |
875
- | \`ocr_text\` | \`text\`, \`app_name\`, \`window_name\` | join via \`frame_id\` |
876
- | \`elements\` | \`source\`, \`role\`, \`text\` | join via \`frame_id\` |
877
- | \`audio_transcriptions\` | \`transcription\`, \`device\`, \`speaker_id\`, \`is_input_device\` | \`timestamp\` |
878
- | \`speakers\` | \`name\`, \`metadata\` | — |
879
- | \`ui_events\` | \`event_type\`, \`app_name\`, \`window_title\`, \`browser_url\` | \`timestamp\` |
880
- | \`accessibility\` | \`app_name\`, \`window_name\`, \`text_content\` | \`timestamp\` |
881
-
882
- ### Example Queries
883
-
884
- \`\`\`sql
885
- -- Most used apps (last 24h)
886
- SELECT app_name, COUNT(*) as frames FROM frames
887
- WHERE timestamp > datetime('now', '-24 hours') AND app_name IS NOT NULL
888
- GROUP BY app_name ORDER BY frames DESC LIMIT 20
889
-
890
- -- Speaker stats
891
- SELECT COALESCE(NULLIF(s.name, ''), 'Unknown') as speaker, COUNT(*) as segments
892
- FROM audio_transcriptions at LEFT JOIN speakers s ON at.speaker_id = s.id
893
- WHERE at.timestamp > datetime('now', '-24 hours')
894
- GROUP BY at.speaker_id ORDER BY segments DESC LIMIT 20
895
- \`\`\`
896
-
897
- ---
898
-
899
- ## 8. Connections — \`GET /connections\`
900
-
901
- \`\`\`bash
902
- curl http://localhost:3030/connections # List all
903
- curl http://localhost:3030/connections/telegram # Get credentials
904
- \`\`\`
905
-
906
- Services: Telegram (\`bot_token\` + \`chat_id\`), Slack (\`webhook_url\`), Discord (\`webhook_url\`), Todoist (\`api_token\`), Teams (\`webhook_url\`), Email (SMTP config).
907
-
908
- ---
909
-
910
- ## 9. Speakers
310
+ if (uri === "screenpipe://guide") {
311
+ return {
312
+ contents: [
313
+ {
314
+ uri,
315
+ mimeType: "text/markdown",
316
+ text: `# Screenpipe Usage Guide
911
317
 
912
- \`\`\`bash
913
- curl "http://localhost:3030/speakers/search?name=John"
914
- curl "http://localhost:3030/speakers/unnamed?limit=10"
915
- curl -X POST http://localhost:3030/speakers/update -H "Content-Type: application/json" -d '{"id": 5, "name": "John"}'
916
- curl -X POST http://localhost:3030/speakers/merge -H "Content-Type: application/json" -d '{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}'
917
- \`\`\`
318
+ ## Progressive Disclosure — start light, escalate only when needed
918
319
 
919
- ---
320
+ | Step | Tool | When to use |
321
+ |------|------|-------------|
322
+ | 1 | activity-summary | Broad questions: "what was I doing?", "which apps?", "how long on X?" |
323
+ | 2 | search-content | Need specific text, transcriptions, or content |
324
+ | 3 | search-elements | Need UI structure — buttons, links, form fields |
325
+ | 4 | frame-context | Need full detail for a specific moment (use frame_id from step 2) |
920
326
 
921
- ## 10. Other Endpoints
327
+ ## Search Strategy
922
328
 
923
- \`\`\`bash
924
- curl http://localhost:3030/health # Health check
925
- curl http://localhost:3030/audio/list # Audio devices
926
- curl http://localhost:3030/vision/list # Monitors
927
- \`\`\`
329
+ - **Always provide start_time** — without it, search scans the entire history
330
+ - **Start with limit=5** — increase only if you need more results
331
+ - **Use max_content_length=500** to keep responses compact
332
+ - **Don't use q for audio** — transcriptions are noisy, q filters too aggressively. Search audio by time range and speaker instead
333
+ - **app_name is case-sensitive** — use exact names: "Google Chrome" not "chrome"
334
+ - **content_type=accessibility is preferred** for screen text (OS-native). ocr is fallback for apps without accessibility support
928
335
 
929
- ## Pipes API
336
+ ## Common Patterns
930
337
 
931
- \`\`\`bash
932
- curl http://localhost:3030/pipes/list # List all pipes
933
- curl -X POST http://localhost:3030/pipes/enable -d '{"name":"..."}' # Enable
934
- curl -X POST http://localhost:3030/pipes/disable -d '{"name":"..."}' # Disable
935
- curl -X POST http://localhost:3030/pipes/run -d '{"name":"..."}' # Run once
936
- curl "http://localhost:3030/pipes/{name}/executions?limit=5" # Execution history
937
- \`\`\`
338
+ - "What was I doing for the last 2 hours?" → activity-summary with start_time='2h ago'
339
+ - "What did I discuss in my meeting?" → list-meetings to find it, then search-content with audio + that time range
340
+ - "Find when I was on Twitter" → search-content with app_name='Arc' (or the browser name), q='twitter'
341
+ - "Remember that I prefer X" update-memory with content describing the preference
342
+ - "What do you remember about X?" search-content with content_type='memory', q='X'
938
343
 
939
344
  ## Deep Links
940
345
 
941
- \`\`\`markdown
942
- [10:30 AM — Chrome](screenpipe://frame/12345) # OCR results (use frame_id)
943
- [meeting at 3pm](screenpipe://timeline?timestamp=ISO8601) # Audio results (use timestamp)
944
- \`\`\`
945
-
946
- Only use IDs/timestamps from actual search results. Never fabricate.`,
947
- },
948
- ],
949
- };
950
- case "screenpipe://cli-reference":
951
- return {
952
- contents: [
953
- {
954
- uri,
955
- mimeType: "text/markdown",
956
- text: `# Screenpipe CLI Reference
957
-
958
- Use \`bunx screenpipe@latest\` to run CLI commands (or \`npx screenpipe@latest\`). No separate install needed.
959
-
960
- ## Shell
961
-
962
- - **macOS/Linux** → \`bash\`
963
- - **Windows** → \`powershell\`
964
-
965
- ---
966
-
967
- ## Pipe Management
968
-
969
- Pipes are markdown-based AI automations. Each pipe lives at \`~/.screenpipe/pipes/<name>/pipe.md\`.
970
-
971
- ### Commands
972
-
973
- \`\`\`bash
974
- bunx screenpipe@latest pipe list # List all pipes (compact table)
975
- bunx screenpipe@latest pipe enable <name> # Enable a pipe
976
- bunx screenpipe@latest pipe disable <name> # Disable a pipe
977
- bunx screenpipe@latest pipe run <name> # Run once immediately (for testing)
978
- bunx screenpipe@latest pipe logs <name> # View execution logs
979
- bunx screenpipe@latest pipe install <url-or-path> # Install from GitHub or local path
980
- bunx screenpipe@latest pipe delete <name> # Delete a pipe
981
- bunx screenpipe@latest pipe models list # View AI model presets
982
- \`\`\`
983
-
984
- ### Creating a Pipe
985
-
986
- Create \`~/.screenpipe/pipes/<name>/pipe.md\` with YAML frontmatter + prompt:
987
-
988
- \`\`\`markdown
989
- ---
990
- schedule: every 30m
991
- enabled: true
992
- preset: Oai
993
- ---
994
-
995
- Your prompt instructions here. The AI agent executes this on schedule.
996
- \`\`\`
997
-
998
- **Schedule syntax**: \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\`
999
-
1000
- **Config fields**: \`schedule\`, \`enabled\` (bool), \`preset\` (AI preset name), \`history\` (bool — include previous output), \`connections\` (list of required integrations)
1001
-
1002
- After creating:
1003
- \`\`\`bash
1004
- bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
1005
- bunx screenpipe@latest pipe enable my-pipe
1006
- bunx screenpipe@latest pipe run my-pipe # test immediately
1007
- \`\`\`
1008
-
1009
- ### Editing Config
1010
-
1011
- Edit frontmatter in the pipe.md file directly, or via API:
1012
-
1013
- \`\`\`bash
1014
- curl -X POST http://localhost:3030/pipes/<name>/config \\
1015
- -H "Content-Type: application/json" \\
1016
- -d '{"config": {"schedule": "every 1h", "enabled": true}}'
1017
- \`\`\`
1018
-
1019
- ### Rules
1020
-
1021
- 1. Use \`pipe list\` (not \`--json\`) — table output is compact
1022
- 2. Never dump full pipe JSON — can be 15MB+
1023
- 3. Check logs first when debugging: \`pipe logs <name>\`
1024
- 4. Use \`pipe run <name>\` to test before waiting for schedule
1025
-
1026
- ---
1027
-
1028
- ## Connection Management
1029
-
1030
- Manage integrations (Telegram, Slack, Discord, Email, Todoist, Teams) from the CLI.
1031
-
1032
- ### Commands
1033
-
1034
- \`\`\`bash
1035
- bunx screenpipe@latest connection list # List all connections + status
1036
- bunx screenpipe@latest connection list --json # JSON output
1037
- bunx screenpipe@latest connection get <id> # Show saved credentials
1038
- bunx screenpipe@latest connection set <id> key=val # Save credentials
1039
- bunx screenpipe@latest connection test <id> # Test a connection
1040
- bunx screenpipe@latest connection remove <id> # Remove credentials
1041
- \`\`\`
1042
-
1043
- ### Examples
1044
-
1045
- \`\`\`bash
1046
- # Set up Telegram
1047
- bunx screenpipe@latest connection set telegram bot_token=123456:ABC-DEF chat_id=5776185278
1048
-
1049
- # Set up Slack webhook
1050
- bunx screenpipe@latest connection set slack webhook_url=https://hooks.slack.com/services/...
1051
-
1052
- # Verify it works
1053
- bunx screenpipe@latest connection test telegram
1054
- \`\`\`
1055
-
1056
- Connection IDs: \`telegram\`, \`slack\`, \`discord\`, \`email\`, \`todoist\`, \`teams\`, \`google-calendar\`, \`apple-intelligence\`, \`openclaw\`, \`obsidian\`
1057
-
1058
- Credentials are stored locally at \`~/.screenpipe/connections.json\`.`,
1059
- },
1060
- ],
1061
- };
1062
- default:
1063
- throw new Error(`Unknown resource: ${uri}`);
1064
- }
1065
- });
1066
- // MCP Prompts - static interaction templates
1067
- const PROMPTS = [
1068
- {
1069
- name: "search-recent",
1070
- description: "Search recent screen activity",
1071
- arguments: [
1072
- { name: "query", description: "Optional search term", required: false },
1073
- { name: "hours", description: "Hours to look back (default: 1)", required: false },
1074
- ],
1075
- },
1076
- {
1077
- name: "find-in-app",
1078
- description: "Find content from a specific application",
1079
- arguments: [
1080
- { name: "app", description: "App name (e.g., Chrome, Slack)", required: true },
1081
- { name: "query", description: "Optional search term", required: false },
1082
- ],
1083
- },
1084
- {
1085
- name: "meeting-notes",
1086
- description: "Get audio transcriptions from meetings",
1087
- arguments: [
1088
- { name: "hours", description: "Hours to look back (default: 3)", required: false },
1089
- ],
1090
- },
1091
- {
1092
- name: "create-pipe",
1093
- description: "Create a new screenpipe pipe (scheduled AI automation)",
1094
- arguments: [
1095
- { name: "description", description: "What the pipe should do", required: true },
1096
- { name: "schedule", description: "Schedule (e.g., 'every 30m', 'every day at 9am', 'manual')", required: false },
1097
- ],
1098
- },
1099
- ];
1100
- // List prompts handler
1101
- server.setRequestHandler(types_js_1.ListPromptsRequestSchema, async () => {
1102
- return { prompts: PROMPTS };
1103
- });
1104
- // Get prompt handler
1105
- server.setRequestHandler(types_js_1.GetPromptRequestSchema, async (request) => {
1106
- const { name, arguments: promptArgs } = request.params;
1107
- const dateInfo = getCurrentDateInfo();
1108
- const now = Date.now();
1109
- switch (name) {
1110
- case "search-recent": {
1111
- const query = promptArgs?.query || "";
1112
- const hours = parseInt(promptArgs?.hours || "1", 10);
1113
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
1114
- return {
1115
- description: `Search recent activity (last ${hours} hour${hours > 1 ? "s" : ""})`,
1116
- messages: [
1117
- {
1118
- role: "user",
1119
- content: {
1120
- type: "text",
1121
- text: `Search screenpipe for recent activity.
1122
-
1123
- Current time: ${dateInfo.isoDate}
1124
-
1125
- Use search-content with:
1126
- ${query ? `- q: "${query}"` : "- No query filter (get all content)"}
1127
- - start_time: "${startTime}"
1128
- - limit: 50`,
1129
- },
1130
- },
1131
- ],
1132
- };
1133
- }
1134
- case "find-in-app": {
1135
- const app = promptArgs?.app || "Google Chrome";
1136
- const query = promptArgs?.query || "";
1137
- return {
1138
- description: `Find content from ${app}`,
1139
- messages: [
1140
- {
1141
- role: "user",
1142
- content: {
1143
- type: "text",
1144
- text: `Search screenpipe for content from ${app}.
1145
-
1146
- Current time: ${dateInfo.isoDate}
1147
-
1148
- Use search-content with:
1149
- - app_name: "${app}"
1150
- ${query ? `- q: "${query}"` : "- No query filter"}
1151
- - content_type: "all"
1152
- - limit: 50`,
1153
- },
1154
- },
1155
- ],
1156
- };
1157
- }
1158
- case "meeting-notes": {
1159
- const hours = parseInt(promptArgs?.hours || "3", 10);
1160
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
1161
- return {
1162
- description: `Get meeting transcriptions (last ${hours} hours)`,
1163
- messages: [
1164
- {
1165
- role: "user",
1166
- content: {
1167
- type: "text",
1168
- text: `Get audio transcriptions from recent meetings.
1169
-
1170
- Current time: ${dateInfo.isoDate}
1171
-
1172
- Use search-content with:
1173
- - content_type: "audio"
1174
- - start_time: "${startTime}"
1175
- - limit: 100
1176
-
1177
- Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
1178
- },
1179
- },
1180
- ],
1181
- };
1182
- }
1183
- case "create-pipe": {
1184
- const description = promptArgs?.description || "a useful automation";
1185
- const schedule = promptArgs?.schedule || "every 30m";
1186
- return {
1187
- description: `Create a new screenpipe pipe: ${description}`,
1188
- messages: [
1189
- {
1190
- role: "user",
1191
- content: {
1192
- type: "text",
1193
- text: `Create a new screenpipe pipe based on this description: "${description}"
1194
- Schedule: ${schedule}
1195
-
1196
- ## How to create a pipe
1197
-
1198
- A pipe is a TypeScript file that runs on a schedule or manually. It uses the screenpipe API to access screen/audio data and can send notifications, call AI, etc.
1199
-
1200
- ### Pipe structure
1201
- \`\`\`typescript
1202
- const pipe = () => import("https://raw.githubusercontent.com/nichochar/screenpipe/refs/heads/main/pipes/pipe-modules/pipe-core/index.ts");
1203
-
1204
- async function main() {
1205
- const sp = await pipe();
1206
-
1207
- // Query recent screen/audio data
1208
- const results = await sp.queryScreenpipe({
1209
- q: "search term",
1210
- contentType: "all", // "ocr" | "audio" | "all" | "ui"
1211
- limit: 50,
1212
- startTime: new Date(Date.now() - 30 * 60 * 1000).toISOString(),
1213
- endTime: new Date().toISOString(),
1214
- });
1215
-
1216
- // Send notification
1217
- await sp.sendDesktopNotification({ title: "Title", body: "Body" });
1218
-
1219
- // Call AI (uses user's configured AI provider)
1220
- const response = await sp.generateText({
1221
- messages: [{ role: "user", content: "Analyze this data..." }],
1222
- });
1223
- }
1224
-
1225
- main();
1226
- \`\`\`
1227
-
1228
- ### Key APIs available in pipes
1229
- - \`queryScreenpipe(params)\` - Search screen text (OCR/UI), audio transcriptions
1230
- - \`sendDesktopNotification({ title, body })\` - System notifications
1231
- - \`generateText({ messages, model? })\` - AI text generation
1232
- - \`generateObject({ messages, schema, model? })\` - AI structured output
1233
- - \`loadPipeConfig()\` - Load pipe configuration
1234
- - \`fetch()\` - HTTP requests to external services
1235
-
1236
- ### pipe.json config
1237
- \`\`\`json
1238
- {
1239
- "cron": "${schedule === "manual" ? "" : schedule.replace("every ", "*/").replace("m", " * * * *").replace("h", " * * *")}",
1240
- "is_nextjs": false,
1241
- "fields": [
1242
- { "name": "setting_name", "type": "string", "default": "value", "description": "Setting description" }
1243
- ]
1244
- }
1245
- \`\`\`
1246
-
1247
- ### Important notes
1248
- - Use \`contentType: "ui"\` for accessibility/structured text, \`"ocr"\` for raw screen text
1249
- - Always handle empty results gracefully
1250
- - Use \`startTime\`/\`endTime\` to scope queries
1251
- - Pipes run in Bun runtime with full TypeScript support
1252
- - For scheduled pipes, keep execution fast (< 30s)
1253
-
1254
- Create the pipe with the necessary files (pipe.ts and pipe.json). Follow the patterns above exactly.`,
1255
- },
1256
- },
1257
- ],
1258
- };
1259
- }
1260
- default:
1261
- throw new Error(`Unknown prompt: ${name}`);
346
+ When referencing specific moments in results, create clickable links:
347
+ - Frame: [10:30 AM — Chrome](screenpipe://frame/{frame_id}) use frame_id from search results
348
+ - Timeline: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) use exact timestamp from results
349
+ Never fabricate IDs or timestamps — only use values from actual results.
350
+ `,
351
+ },
352
+ ],
353
+ };
1262
354
  }
355
+ throw new Error(`Unknown resource: ${uri}`);
1263
356
  });
1264
- // Helper function to make HTTP requests
357
+ // ---------------------------------------------------------------------------
358
+ // Helper
359
+ // ---------------------------------------------------------------------------
1265
360
  async function fetchAPI(endpoint, options = {}) {
1266
361
  const url = `${SCREENPIPE_API}${endpoint}`;
1267
362
  return fetch(url, {
@@ -1272,7 +367,9 @@ async function fetchAPI(endpoint, options = {}) {
1272
367
  },
1273
368
  });
1274
369
  }
1275
- // Call tool handler
370
+ // ---------------------------------------------------------------------------
371
+ // Tool handlers
372
+ // ---------------------------------------------------------------------------
1276
373
  server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1277
374
  const { name, arguments: args } = request.params;
1278
375
  if (!args) {
@@ -1288,92 +385,221 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1288
385
  params.append(key, String(value));
1289
386
  }
1290
387
  }
1291
- const response = await fetchAPI(`/search?${params.toString()}`);
1292
- if (!response.ok) {
388
+ const response = await fetchAPI(`/search?${params.toString()}`);
389
+ if (!response.ok)
390
+ throw new Error(`HTTP error: ${response.status}`);
391
+ const data = await response.json();
392
+ const results = data.data || [];
393
+ const pagination = data.pagination || {};
394
+ if (results.length === 0) {
395
+ return {
396
+ content: [
397
+ {
398
+ type: "text",
399
+ text: "No results found. Try: broader terms, different content_type, or wider time range.",
400
+ },
401
+ ],
402
+ };
403
+ }
404
+ const contentItems = [];
405
+ const formattedResults = [];
406
+ const images = [];
407
+ for (const result of results) {
408
+ const content = result.content;
409
+ if (!content)
410
+ continue;
411
+ if (result.type === "OCR") {
412
+ const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
413
+ formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
414
+ `${content.timestamp || ""}\n` +
415
+ `${content.text || ""}` +
416
+ tagsStr);
417
+ if (includeFrames && content.frame) {
418
+ images.push({
419
+ data: content.frame,
420
+ context: `${content.app_name} at ${content.timestamp}`,
421
+ });
422
+ }
423
+ }
424
+ else if (result.type === "Audio") {
425
+ const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
426
+ formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
427
+ `${content.timestamp || ""}\n` +
428
+ `${content.transcription || ""}` +
429
+ tagsStr);
430
+ }
431
+ else if (result.type === "UI" || result.type === "Accessibility") {
432
+ formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
433
+ `${content.timestamp || ""}\n` +
434
+ `${content.text || ""}`);
435
+ }
436
+ else if (result.type === "Memory") {
437
+ const tagsStr = content.tags?.length ? ` [${content.tags.join(", ")}]` : "";
438
+ const importance = content.importance != null ? ` (importance: ${content.importance})` : "";
439
+ formattedResults.push(`[Memory #${content.id}]${tagsStr}${importance}\n` +
440
+ `${content.created_at || ""}\n` +
441
+ `${content.content || ""}`);
442
+ }
443
+ }
444
+ const header = `Results: ${results.length}/${pagination.total || "?"}` +
445
+ (pagination.total > results.length
446
+ ? ` (use offset=${(pagination.offset || 0) + results.length} for more)`
447
+ : "");
448
+ contentItems.push({
449
+ type: "text",
450
+ text: header + "\n\n" + formattedResults.join("\n---\n"),
451
+ });
452
+ for (const img of images) {
453
+ contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
454
+ contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
455
+ }
456
+ return { content: contentItems };
457
+ }
458
+ case "list-meetings": {
459
+ const params = new URLSearchParams();
460
+ for (const [key, value] of Object.entries(args)) {
461
+ if (value !== null && value !== undefined) {
462
+ params.append(key, String(value));
463
+ }
464
+ }
465
+ const response = await fetchAPI(`/meetings?${params.toString()}`);
466
+ if (!response.ok)
467
+ throw new Error(`HTTP error: ${response.status}`);
468
+ const meetings = await response.json();
469
+ if (!Array.isArray(meetings) || meetings.length === 0) {
470
+ return {
471
+ content: [{ type: "text", text: "No meetings found in the given time range." }],
472
+ };
473
+ }
474
+ const formatted = meetings.map((m) => {
475
+ const start = m.meeting_start;
476
+ const end = m.meeting_end || "ongoing";
477
+ const app = m.meeting_app;
478
+ const title = m.title ? ` — ${m.title}` : "";
479
+ const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
480
+ return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
481
+ });
482
+ return {
483
+ content: [
484
+ { type: "text", text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}` },
485
+ ],
486
+ };
487
+ }
488
+ case "activity-summary": {
489
+ const params = new URLSearchParams();
490
+ for (const [key, value] of Object.entries(args)) {
491
+ if (value !== null && value !== undefined) {
492
+ params.append(key, String(value));
493
+ }
494
+ }
495
+ const response = await fetchAPI(`/activity-summary?${params.toString()}`);
496
+ if (!response.ok)
1293
497
  throw new Error(`HTTP error: ${response.status}`);
498
+ const data = await response.json();
499
+ const appsLines = (data.apps || []).map((a) => {
500
+ const timeSpan = a.first_seen && a.last_seen
501
+ ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
502
+ : "";
503
+ return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
504
+ });
505
+ const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
506
+ const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
507
+ const summary = [
508
+ `Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
509
+ `Total frames: ${data.total_frames}`,
510
+ "",
511
+ "Apps:",
512
+ ...(appsLines.length ? appsLines : [" (none)"]),
513
+ "",
514
+ `Audio: ${data.audio_summary?.segment_count || 0} segments`,
515
+ ...(speakerLines.length ? speakerLines : []),
516
+ "",
517
+ "Recent texts:",
518
+ ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
519
+ ].join("\n");
520
+ return { content: [{ type: "text", text: summary }] };
521
+ }
522
+ case "search-elements": {
523
+ const params = new URLSearchParams();
524
+ for (const [key, value] of Object.entries(args)) {
525
+ if (value !== null && value !== undefined) {
526
+ params.append(key, String(value));
527
+ }
1294
528
  }
529
+ const response = await fetchAPI(`/elements?${params.toString()}`);
530
+ if (!response.ok)
531
+ throw new Error(`HTTP error: ${response.status}`);
1295
532
  const data = await response.json();
1296
- const results = data.data || [];
533
+ const elements = data.data || [];
1297
534
  const pagination = data.pagination || {};
1298
- if (results.length === 0) {
535
+ if (elements.length === 0) {
1299
536
  return {
1300
537
  content: [
1301
538
  {
1302
539
  type: "text",
1303
- text: "No results found. Try: broader search terms, different content_type, or wider time range.",
540
+ text: "No elements found. Try: broader search, different role/source, or wider time range.",
1304
541
  },
1305
542
  ],
1306
543
  };
1307
544
  }
1308
- // Build content array with text and optional images
1309
- const contentItems = [];
1310
- const formattedResults = [];
1311
- const images = [];
1312
- for (const result of results) {
1313
- const content = result.content;
1314
- if (!content)
1315
- continue;
1316
- if (result.type === "OCR") {
1317
- const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
1318
- formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
1319
- `${content.timestamp || ""}\n` +
1320
- `${content.text || ""}` +
1321
- tagsStr);
1322
- if (includeFrames && content.frame) {
1323
- images.push({
1324
- data: content.frame,
1325
- context: `${content.app_name} at ${content.timestamp}`,
1326
- });
1327
- }
1328
- }
1329
- else if (result.type === "Audio") {
1330
- const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
1331
- formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
1332
- `${content.timestamp || ""}\n` +
1333
- `${content.transcription || ""}` +
1334
- tagsStr);
545
+ const formatted = elements.map((e) => {
546
+ const boundsStr = e.bounds
547
+ ? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
548
+ : "";
549
+ return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
550
+ });
551
+ const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
552
+ (pagination.total > elements.length
553
+ ? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
554
+ : "");
555
+ return {
556
+ content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
557
+ };
558
+ }
559
+ case "frame-context": {
560
+ const frameId = args.frame_id;
561
+ if (!frameId) {
562
+ return { content: [{ type: "text", text: "Error: frame_id is required" }] };
563
+ }
564
+ const response = await fetchAPI(`/frames/${frameId}/context`);
565
+ if (!response.ok)
566
+ throw new Error(`HTTP error: ${response.status}`);
567
+ const data = await response.json();
568
+ const lines = [`Frame ${data.frame_id} (source: ${data.text_source})`];
569
+ if (data.urls?.length) {
570
+ lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
571
+ }
572
+ if (data.nodes?.length) {
573
+ lines.push("", `Nodes: ${data.nodes.length}`);
574
+ for (const node of data.nodes.slice(0, 50)) {
575
+ const indent = " ".repeat(Math.min(node.depth, 5));
576
+ lines.push(`${indent}[${node.role}] ${node.text}`);
1335
577
  }
1336
- else if (result.type === "UI" || result.type === "Accessibility") {
1337
- formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
1338
- `${content.timestamp || ""}\n` +
1339
- `${content.text || ""}`);
578
+ if (data.nodes.length > 50) {
579
+ lines.push(` ... and ${data.nodes.length - 50} more nodes`);
1340
580
  }
1341
581
  }
1342
- // Header with pagination info
1343
- const header = `Results: ${results.length}/${pagination.total || "?"}` +
1344
- (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
1345
- contentItems.push({
1346
- type: "text",
1347
- text: header + "\n\n" + formattedResults.join("\n---\n"),
1348
- });
1349
- // Add images if requested
1350
- for (const img of images) {
1351
- contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
1352
- contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
582
+ if (data.text) {
583
+ const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
584
+ lines.push("", "Full text:", truncated);
1353
585
  }
1354
- return { content: contentItems };
586
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1355
587
  }
1356
588
  case "export-video": {
1357
589
  const startTime = args.start_time;
1358
590
  const endTime = args.end_time;
1359
591
  const fps = args.fps || 1.0;
1360
- // Validate time inputs
1361
592
  if (!startTime || !endTime) {
1362
593
  return {
1363
- content: [
1364
- {
1365
- type: "text",
1366
- text: "Error: Both start_time and end_time are required in ISO 8601 format (e.g., '2024-01-15T10:00:00Z')",
1367
- },
1368
- ],
594
+ content: [{ type: "text", text: "Error: start_time and end_time are required" }],
1369
595
  };
1370
596
  }
1371
- // Step 1: Query the search API to get frame IDs for the time range
597
+ // Get frame IDs for the time range
1372
598
  const searchParams = new URLSearchParams({
1373
599
  content_type: "ocr",
1374
600
  start_time: startTime,
1375
601
  end_time: endTime,
1376
- limit: "10000", // Get all frames in range
602
+ limit: "10000",
1377
603
  });
1378
604
  const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
1379
605
  if (!searchResponse.ok) {
@@ -1386,12 +612,11 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1386
612
  content: [
1387
613
  {
1388
614
  type: "text",
1389
- text: `No screen recordings found between ${startTime} and ${endTime}. Make sure screenpipe was recording during this time period.`,
615
+ text: `No screen recordings found between ${startTime} and ${endTime}.`,
1390
616
  },
1391
617
  ],
1392
618
  };
1393
619
  }
1394
- // Extract unique frame IDs from OCR results
1395
620
  const frameIds = [];
1396
621
  const seenIds = new Set();
1397
622
  for (const result of results) {
@@ -1405,18 +630,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1405
630
  }
1406
631
  if (frameIds.length === 0) {
1407
632
  return {
1408
- content: [
1409
- {
1410
- type: "text",
1411
- text: `Found ${results.length} results but no valid frame IDs. The recordings may be audio-only.`,
1412
- },
1413
- ],
633
+ content: [{ type: "text", text: "No valid frame IDs found (audio-only?)." }],
1414
634
  };
1415
635
  }
1416
- // Sort frame IDs
1417
636
  frameIds.sort((a, b) => a - b);
1418
- // Step 2: Connect to WebSocket and export video
1419
- // Send frame_ids in message body to avoid URL length limits
1420
637
  const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
1421
638
  const exportResult = await new Promise((resolve) => {
1422
639
  const ws = new ws_1.WebSocket(wsUrl);
@@ -1427,9 +644,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1427
644
  ws.close();
1428
645
  resolve({ success: false, error: "Export timed out after 5 minutes" });
1429
646
  }
1430
- }, 5 * 60 * 1000); // 5 minute timeout
647
+ }, 5 * 60 * 1000);
1431
648
  ws.on("open", () => {
1432
- // Send frame_ids in message body to avoid URL length limits
1433
649
  ws.send(JSON.stringify({ frame_ids: frameIds }));
1434
650
  });
1435
651
  ws.on("error", (error) => {
@@ -1450,7 +666,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1450
666
  try {
1451
667
  const message = JSON.parse(data.toString());
1452
668
  if (message.status === "completed" && message.video_data) {
1453
- // Save video to temp file
1454
669
  const tempDir = os.tmpdir();
1455
670
  const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
1456
671
  const filename = `screenpipe_export_${timestamp}.mp4`;
@@ -1459,11 +674,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1459
674
  resolved = true;
1460
675
  clearTimeout(timeout);
1461
676
  ws.close();
1462
- resolve({
1463
- success: true,
1464
- filePath,
1465
- frameCount: frameIds.length,
1466
- });
677
+ resolve({ success: true, filePath, frameCount: frameIds.length });
1467
678
  }
1468
679
  else if (message.status === "error") {
1469
680
  resolved = true;
@@ -1471,9 +682,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1471
682
  ws.close();
1472
683
  resolve({ success: false, error: message.error || "Export failed" });
1473
684
  }
1474
- // Ignore "extracting" and "encoding" status updates
1475
685
  }
1476
- catch (parseError) {
686
+ catch {
1477
687
  // Ignore parse errors for progress messages
1478
688
  }
1479
689
  });
@@ -1483,176 +693,94 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1483
693
  content: [
1484
694
  {
1485
695
  type: "text",
1486
- text: `Successfully exported video!\n\n` +
1487
- `File: ${exportResult.filePath}\n` +
1488
- `Frames: ${exportResult.frameCount}\n` +
1489
- `Time range: ${startTime} to ${endTime}\n` +
1490
- `FPS: ${fps}`,
696
+ text: `Video exported: ${exportResult.filePath}\n` +
697
+ `Frames: ${exportResult.frameCount} | ${startTime} → ${endTime} | ${fps} fps`,
1491
698
  },
1492
699
  ],
1493
700
  };
1494
701
  }
1495
702
  else {
1496
703
  return {
1497
- content: [
1498
- {
1499
- type: "text",
1500
- text: `Failed to export video: ${exportResult.error}`,
1501
- },
1502
- ],
704
+ content: [{ type: "text", text: `Export failed: ${exportResult.error}` }],
1503
705
  };
1504
706
  }
1505
707
  }
1506
- case "list-meetings": {
1507
- const params = new URLSearchParams();
1508
- for (const [key, value] of Object.entries(args)) {
1509
- if (value !== null && value !== undefined) {
1510
- params.append(key, String(value));
1511
- }
708
+ case "update-memory": {
709
+ if (args.delete && args.id) {
710
+ const response = await fetchAPI(`/memories/${args.id}`, { method: "DELETE" });
711
+ if (!response.ok)
712
+ throw new Error(`HTTP error: ${response.status}`);
713
+ return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
1512
714
  }
1513
- const response = await fetchAPI(`/meetings?${params.toString()}`);
1514
- if (!response.ok) {
1515
- throw new Error(`HTTP error: ${response.status}`);
715
+ if (args.id) {
716
+ const body = {};
717
+ if (args.content !== undefined)
718
+ body.content = args.content;
719
+ if (args.tags !== undefined)
720
+ body.tags = args.tags;
721
+ if (args.importance !== undefined)
722
+ body.importance = args.importance;
723
+ if (args.source_context !== undefined)
724
+ body.source_context = args.source_context;
725
+ const response = await fetchAPI(`/memories/${args.id}`, {
726
+ method: "PUT",
727
+ body: JSON.stringify(body),
728
+ });
729
+ if (!response.ok)
730
+ throw new Error(`HTTP error: ${response.status}`);
731
+ const memory = await response.json();
732
+ return {
733
+ content: [{ type: "text", text: `Memory ${memory.id} updated: "${memory.content}"` }],
734
+ };
1516
735
  }
1517
- const meetings = await response.json();
1518
- if (!Array.isArray(meetings) || meetings.length === 0) {
736
+ if (!args.content) {
1519
737
  return {
1520
- content: [
1521
- {
1522
- type: "text",
1523
- text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
1524
- },
1525
- ],
738
+ content: [{ type: "text", text: "Error: 'content' is required to create a memory" }],
1526
739
  };
1527
740
  }
1528
- const formatted = meetings.map((m) => {
1529
- const start = m.meeting_start;
1530
- const end = m.meeting_end || "ongoing";
1531
- const app = m.meeting_app;
1532
- const title = m.title ? ` — ${m.title}` : "";
1533
- const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
1534
- return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
741
+ const memoryBody = {
742
+ content: args.content,
743
+ source: "mcp",
744
+ tags: args.tags || [],
745
+ importance: args.importance ?? 0.5,
746
+ };
747
+ if (args.source_context)
748
+ memoryBody.source_context = args.source_context;
749
+ const memoryResponse = await fetchAPI("/memories", {
750
+ method: "POST",
751
+ body: JSON.stringify(memoryBody),
1535
752
  });
753
+ if (!memoryResponse.ok)
754
+ throw new Error(`HTTP error: ${memoryResponse.status}`);
755
+ const newMemory = await memoryResponse.json();
1536
756
  return {
1537
757
  content: [
1538
- {
1539
- type: "text",
1540
- text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
1541
- },
758
+ { type: "text", text: `Memory created (id: ${newMemory.id}): "${newMemory.content}"` },
1542
759
  ],
1543
760
  };
1544
761
  }
1545
- case "activity-summary": {
1546
- const params = new URLSearchParams();
1547
- for (const [key, value] of Object.entries(args)) {
1548
- if (value !== null && value !== undefined) {
1549
- params.append(key, String(value));
1550
- }
1551
- }
1552
- const response = await fetchAPI(`/activity-summary?${params.toString()}`);
1553
- if (!response.ok) {
1554
- throw new Error(`HTTP error: ${response.status}`);
1555
- }
1556
- const data = await response.json();
1557
- // Format apps
1558
- const appsLines = (data.apps || []).map((a) => {
1559
- const timeSpan = a.first_seen && a.last_seen
1560
- ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
1561
- : "";
1562
- return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
1563
- });
1564
- // Format audio
1565
- const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
1566
- // Format recent texts
1567
- const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
1568
- const summary = [
1569
- `Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
1570
- `Total frames: ${data.total_frames}`,
1571
- "",
1572
- "Apps:",
1573
- ...(appsLines.length ? appsLines : [" (none)"]),
1574
- "",
1575
- `Audio: ${data.audio_summary?.segment_count || 0} segments`,
1576
- ...(speakerLines.length ? speakerLines : []),
1577
- "",
1578
- "Recent texts:",
1579
- ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
1580
- ].join("\n");
1581
- return { content: [{ type: "text", text: summary }] };
1582
- }
1583
- case "search-elements": {
1584
- const params = new URLSearchParams();
1585
- for (const [key, value] of Object.entries(args)) {
1586
- if (value !== null && value !== undefined) {
1587
- params.append(key, String(value));
1588
- }
1589
- }
1590
- const response = await fetchAPI(`/elements?${params.toString()}`);
1591
- if (!response.ok) {
1592
- throw new Error(`HTTP error: ${response.status}`);
1593
- }
1594
- const data = await response.json();
1595
- const elements = data.data || [];
1596
- const pagination = data.pagination || {};
1597
- if (elements.length === 0) {
1598
- return {
1599
- content: [
1600
- {
1601
- type: "text",
1602
- text: "No elements found. Try: broader search, different role/source, or wider time range.",
1603
- },
1604
- ],
1605
- };
1606
- }
1607
- const formatted = elements.map((e) => {
1608
- const boundsStr = e.bounds
1609
- ? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
1610
- : "";
1611
- return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
762
+ case "send-notification": {
763
+ const notifBody = {
764
+ title: args.title,
765
+ body: args.body || "",
766
+ type: "pipe",
767
+ };
768
+ if (args.timeout_secs)
769
+ notifBody.timeout = Number(args.timeout_secs) * 1000;
770
+ if (args.actions)
771
+ notifBody.actions = args.actions;
772
+ const notifResponse = await fetch("http://localhost:11435/notify", {
773
+ method: "POST",
774
+ headers: { "Content-Type": "application/json" },
775
+ body: JSON.stringify(notifBody),
1612
776
  });
1613
- const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
1614
- (pagination.total > elements.length
1615
- ? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
1616
- : "");
777
+ if (!notifResponse.ok)
778
+ throw new Error(`HTTP error: ${notifResponse.status}`);
779
+ const notifResult = await notifResponse.json();
1617
780
  return {
1618
- content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
781
+ content: [{ type: "text", text: `Notification sent: ${notifResult.message}` }],
1619
782
  };
1620
783
  }
1621
- case "frame-context": {
1622
- const frameId = args.frame_id;
1623
- if (!frameId) {
1624
- return {
1625
- content: [{ type: "text", text: "Error: frame_id is required" }],
1626
- };
1627
- }
1628
- const response = await fetchAPI(`/frames/${frameId}/context`);
1629
- if (!response.ok) {
1630
- throw new Error(`HTTP error: ${response.status}`);
1631
- }
1632
- const data = await response.json();
1633
- const lines = [
1634
- `Frame ${data.frame_id} (source: ${data.text_source})`,
1635
- ];
1636
- if (data.urls?.length) {
1637
- lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
1638
- }
1639
- if (data.nodes?.length) {
1640
- lines.push("", `Nodes: ${data.nodes.length}`);
1641
- for (const node of data.nodes.slice(0, 50)) {
1642
- const indent = " ".repeat(Math.min(node.depth, 5));
1643
- lines.push(`${indent}[${node.role}] ${node.text}`);
1644
- }
1645
- if (data.nodes.length > 50) {
1646
- lines.push(` ... and ${data.nodes.length - 50} more nodes`);
1647
- }
1648
- }
1649
- if (data.text) {
1650
- // Truncate to avoid massive outputs
1651
- const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
1652
- lines.push("", "Full text:", truncated);
1653
- }
1654
- return { content: [{ type: "text", text: lines.join("\n") }] };
1655
- }
1656
784
  default:
1657
785
  throw new Error(`Unknown tool: ${name}`);
1658
786
  }
@@ -1660,12 +788,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1660
788
  catch (error) {
1661
789
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
1662
790
  return {
1663
- content: [
1664
- {
1665
- type: "text",
1666
- text: `Error executing ${name}: ${errorMessage}`,
1667
- },
1668
- ],
791
+ content: [{ type: "text", text: `Error executing ${name}: ${errorMessage}` }],
1669
792
  };
1670
793
  }
1671
794
  });