screenpipe-mcp 0.8.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/bun.lock +429 -0
  2. package/dist/index.js +365 -718
  3. package/package.json +8 -8
  4. package/src/index.ts +407 -781
package/dist/index.js CHANGED
@@ -44,19 +44,6 @@ const ws_1 = require("ws");
44
44
  const fs = __importStar(require("fs"));
45
45
  const path = __importStar(require("path"));
46
46
  const os = __importStar(require("os"));
47
- // Helper to get current date in ISO format
48
- function getCurrentDateInfo() {
49
- const now = new Date();
50
- return {
51
- isoDate: now.toISOString(),
52
- localDate: now.toLocaleDateString("en-US", {
53
- weekday: "long",
54
- year: "numeric",
55
- month: "long",
56
- day: "numeric",
57
- }),
58
- };
59
- }
60
47
  // Parse command line arguments
61
48
  const args = process.argv.slice(2);
62
49
  let port = 3030;
@@ -69,614 +56,246 @@ const SCREENPIPE_API = `http://localhost:${port}`;
69
56
  // Initialize server
70
57
  const server = new index_js_1.Server({
71
58
  name: "screenpipe",
72
- version: "0.8.5",
59
+ version: "0.9.0",
73
60
  }, {
74
61
  capabilities: {
75
62
  tools: {},
76
- prompts: {},
77
63
  resources: {},
78
64
  },
79
65
  });
80
- // Tool definitions
81
- const BASE_TOOLS = [
66
+ // ---------------------------------------------------------------------------
67
+ // Tools minimal descriptions, no behavioral guidance (that belongs in resources)
68
+ // ---------------------------------------------------------------------------
69
+ const TOOLS = [
82
70
  {
83
71
  name: "search-content",
84
- description: "Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
85
- "Returns timestamped results with app context. " +
86
- "Call with no parameters to get recent activity. " +
87
- "Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
88
- "WHEN TO USE WHICH content_type:\n" +
89
- "- For meetings/calls/conversations: content_type='audio', do NOT use q param (transcriptions are noisy, q filters too aggressively)\n" +
90
- "- For screen text/reading: content_type='all' or 'accessibility'\n" +
91
- "- For time spent/app usage questions: use activity-summary tool instead (this tool returns content, not time stats)\n\n" +
92
- "SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
93
- "This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
94
- "App names are case-sensitive (e.g. 'Discord' vs 'Discord.exe'). " +
95
- "The q param searches captured text, NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
96
- "DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
97
- "- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
98
- "- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
99
- "NEVER fabricate frame IDs or timestamps — only use values from actual search results.",
100
- annotations: {
101
- title: "Search Content",
102
- readOnlyHint: true,
103
- },
72
+ description: "Search screen text, audio transcriptions, input events, and memories. " +
73
+ "Returns timestamped results with app context. Call with no params for recent activity.",
74
+ annotations: { title: "Search Content", readOnlyHint: true },
104
75
  inputSchema: {
105
76
  type: "object",
106
77
  properties: {
107
78
  q: {
108
79
  type: "string",
109
- description: "Search query (full-text search on captured text). Optional - omit to return all content in time range. IMPORTANT: Do NOT use q for audio/meeting searches — transcriptions are noisy and q filters too aggressively. Only use q when searching for specific text the user saw on screen.",
80
+ description: "Full-text search query. Omit to return all content in time range.",
110
81
  },
111
82
  content_type: {
112
83
  type: "string",
113
- enum: ["all", "ocr", "audio", "input", "accessibility"],
114
- description: "Content type filter: 'audio' (transcriptions — use for meetings/calls/conversations), 'accessibility' (accessibility tree text, preferred for screen content), 'ocr' (screen text via OCR, legacy fallback), 'input' (clicks, keystrokes, clipboard, app switches), 'all'. Default: 'all'. For meeting/call queries, ALWAYS use 'audio'.",
84
+ enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
85
+ description: "Filter by content type. Default: 'all'.",
115
86
  default: "all",
116
87
  },
117
- limit: {
118
- type: "integer",
119
- description: "Max results. Default: 10",
120
- default: 10,
121
- },
122
- offset: {
123
- type: "integer",
124
- description: "Skip N results for pagination. Default: 0",
125
- default: 0,
126
- },
88
+ limit: { type: "integer", description: "Max results (default 10)", default: 10 },
89
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
127
90
  start_time: {
128
91
  type: "string",
129
- format: "date-time",
130
- description: "Start time: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', '2d ago', 'now')",
92
+ description: "ISO 8601 UTC or relative (e.g. '2h ago')",
131
93
  },
132
94
  end_time: {
133
95
  type: "string",
134
- format: "date-time",
135
- description: "End time: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
136
- },
137
- app_name: {
138
- type: "string",
139
- description: "Filter by app (e.g., 'Google Chrome', 'Slack', 'zoom.us')",
140
- },
141
- window_name: {
142
- type: "string",
143
- description: "Filter by window title",
144
- },
145
- min_length: {
146
- type: "integer",
147
- description: "Minimum content length in characters",
148
- },
149
- max_length: {
150
- type: "integer",
151
- description: "Maximum content length in characters",
96
+ description: "ISO 8601 UTC or relative (e.g. 'now')",
152
97
  },
98
+ app_name: { type: "string", description: "Filter by app name" },
99
+ window_name: { type: "string", description: "Filter by window title" },
100
+ min_length: { type: "integer", description: "Min content length" },
101
+ max_length: { type: "integer", description: "Max content length" },
153
102
  include_frames: {
154
103
  type: "boolean",
155
- description: "Include base64 screenshots (OCR only). Default: false",
104
+ description: "Include base64 screenshots (OCR only)",
156
105
  default: false,
157
106
  },
158
- speaker_ids: {
159
- type: "string",
160
- description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
161
- },
162
- speaker_name: {
163
- type: "string",
164
- description: "Filter audio by speaker name (case-insensitive partial match)",
165
- },
107
+ speaker_ids: { type: "string", description: "Comma-separated speaker IDs" },
108
+ speaker_name: { type: "string", description: "Filter audio by speaker name" },
166
109
  max_content_length: {
167
110
  type: "integer",
168
- description: "Truncate each result's text/transcription to this many characters using middle-truncation (keeps first half + last half). Useful for limiting token usage with small-context models.",
111
+ description: "Truncate each result via middle-truncation",
169
112
  },
170
113
  },
171
114
  },
172
115
  },
173
116
  {
174
- name: "export-video",
175
- description: "Export a video of screen recordings for a specific time range. " +
176
- "Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
177
- "IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z) or relative times (e.g., '16h ago', 'now')\n\n" +
178
- "EXAMPLES:\n" +
179
- "- Last 30 minutes: Calculate timestamps from current time\n" +
180
- "- Specific meeting: Use the meeting's start and end times in UTC",
181
- annotations: {
182
- title: "Export Video",
183
- destructiveHint: true,
117
+ name: "list-meetings",
118
+ description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees.",
119
+ annotations: { title: "List Meetings", readOnlyHint: true },
120
+ inputSchema: {
121
+ type: "object",
122
+ properties: {
123
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
124
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
125
+ limit: { type: "integer", description: "Max results (default 20)", default: 20 },
126
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
127
+ },
184
128
  },
129
+ },
130
+ {
131
+ name: "activity-summary",
132
+ description: "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
133
+ "Use for 'how long on X?', 'which apps?', 'what was I doing?' questions.",
134
+ annotations: { title: "Activity Summary", readOnlyHint: true },
185
135
  inputSchema: {
186
136
  type: "object",
187
137
  properties: {
188
- start_time: {
189
- type: "string",
190
- format: "date-time",
191
- description: "Start time: ISO 8601 UTC (e.g., '2024-01-15T10:00:00Z') or relative (e.g., '16h ago', 'now')",
192
- },
193
- end_time: {
194
- type: "string",
195
- format: "date-time",
196
- description: "End time: ISO 8601 UTC (e.g., '2024-01-15T10:30:00Z') or relative (e.g., 'now', '1h ago')",
197
- },
198
- fps: {
199
- type: "number",
200
- description: "Frames per second for the output video. Lower values (0.5-1.0) create smaller files, higher values (5-10) create smoother playback. Default: 1.0",
201
- default: 1.0,
202
- },
138
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
139
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
140
+ app_name: { type: "string", description: "Optional app name filter" },
203
141
  },
204
142
  required: ["start_time", "end_time"],
205
143
  },
206
144
  },
207
145
  {
208
- name: "list-meetings",
209
- description: "List detected meetings with duration, app, and attendees. " +
210
- "Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
211
- "Only available when screenpipe runs in smart transcription mode.",
212
- annotations: {
213
- title: "List Meetings",
214
- readOnlyHint: true,
215
- },
146
+ name: "search-elements",
147
+ description: "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
148
+ "Lighter than search-content for targeted UI lookups.",
149
+ annotations: { title: "Search Elements", readOnlyHint: true },
216
150
  inputSchema: {
217
151
  type: "object",
218
152
  properties: {
219
- start_time: {
220
- type: "string",
221
- format: "date-time",
222
- description: "Start filter: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
223
- },
224
- end_time: {
153
+ q: { type: "string", description: "Full-text search on element text" },
154
+ frame_id: { type: "integer", description: "Filter to specific frame" },
155
+ source: {
225
156
  type: "string",
226
- format: "date-time",
227
- description: "End filter: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
228
- },
229
- limit: {
230
- type: "integer",
231
- description: "Max results. Default: 20",
232
- default: 20,
233
- },
234
- offset: {
235
- type: "integer",
236
- description: "Skip N results for pagination. Default: 0",
237
- default: 0,
238
- },
157
+ enum: ["accessibility", "ocr"],
158
+ description: "Element source filter",
159
+ },
160
+ role: { type: "string", description: "Element role (e.g. AXButton, AXLink)" },
161
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
162
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
163
+ app_name: { type: "string", description: "Filter by app name" },
164
+ limit: { type: "integer", description: "Max results (default 50)", default: 50 },
165
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
239
166
  },
240
167
  },
241
168
  },
242
169
  {
243
- name: "activity-summary",
244
- description: "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
245
- "Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
246
- "Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
247
- "first_seen/last_seen show the wall-clock span per app.\n\n" +
248
- "USE THIS TOOL (not search-content or raw SQL) for:\n" +
249
- "- 'how long did I spend on X?' active_minutes per app\n" +
250
- "- 'which apps did I use today?' → app list sorted by active_minutes\n" +
251
- "- 'what was I doing?' → broad overview before drilling deeper\n" +
252
- "- Any time-spent or app-usage question\n\n" +
253
- "WARNING: Do NOT estimate time from raw frame counts or SQL queries — those are inaccurate. " +
254
- "This endpoint calculates actual active session time correctly.",
255
- annotations: {
256
- title: "Activity Summary",
257
- readOnlyHint: true,
170
+ name: "frame-context",
171
+ description: "Get accessibility text, parsed tree nodes, and URLs for a specific frame ID.",
172
+ annotations: { title: "Frame Context", readOnlyHint: true },
173
+ inputSchema: {
174
+ type: "object",
175
+ properties: {
176
+ frame_id: { type: "integer", description: "Frame ID from search results" },
177
+ },
178
+ required: ["frame_id"],
258
179
  },
180
+ },
181
+ {
182
+ name: "export-video",
183
+ description: "Export an MP4 video of screen recordings for a time range.",
184
+ annotations: { title: "Export Video", destructiveHint: true },
259
185
  inputSchema: {
260
186
  type: "object",
261
187
  properties: {
262
- start_time: {
263
- type: "string",
264
- format: "date-time",
265
- description: "Start of time range: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
266
- },
267
- end_time: {
268
- type: "string",
269
- format: "date-time",
270
- description: "End of time range: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
271
- },
272
- app_name: {
273
- type: "string",
274
- description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
275
- },
188
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
189
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
190
+ fps: { type: "number", description: "Output FPS (default 1.0)", default: 1.0 },
276
191
  },
277
192
  required: ["start_time", "end_time"],
278
193
  },
279
194
  },
280
195
  {
281
- name: "search-elements",
282
- description: "Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
283
- "Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
284
- "Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
285
- "Use for: finding specific buttons, links, text fields, or UI components. " +
286
- "Prefer this over search-content when you need structural UI detail rather than full screen text.",
287
- annotations: {
288
- title: "Search Elements",
289
- readOnlyHint: true,
290
- },
196
+ name: "update-memory",
197
+ description: "Create, update, or delete a persistent memory (facts, preferences, decisions). " +
198
+ "Retrieve memories via search-content with content_type='memory'.",
199
+ annotations: { title: "Update Memory", destructiveHint: false },
291
200
  inputSchema: {
292
201
  type: "object",
293
202
  properties: {
294
- q: {
295
- type: "string",
296
- description: "Full-text search query across element text. Optional.",
297
- },
298
- frame_id: {
299
- type: "integer",
300
- description: "Filter to elements from a specific frame",
301
- },
302
- source: {
303
- type: "string",
304
- enum: ["accessibility", "ocr"],
305
- description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
306
- },
307
- role: {
308
- type: "string",
309
- description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
310
- },
311
- start_time: {
312
- type: "string",
313
- format: "date-time",
314
- description: "Start time: ISO 8601 UTC or relative (e.g., '16h ago', 'now')",
315
- },
316
- end_time: {
317
- type: "string",
318
- format: "date-time",
319
- description: "End time: ISO 8601 UTC or relative (e.g., 'now', '1h ago')",
320
- },
321
- app_name: {
322
- type: "string",
323
- description: "Filter by app name",
324
- },
325
- limit: {
326
- type: "integer",
327
- description: "Max results. Default: 50",
328
- default: 50,
329
- },
330
- offset: {
331
- type: "integer",
332
- description: "Skip N results for pagination. Default: 0",
333
- default: 0,
334
- },
203
+ id: { type: "integer", description: "Memory ID (omit to create new)" },
204
+ content: { type: "string", description: "Memory text" },
205
+ tags: { type: "array", items: { type: "string" }, description: "Categorization tags" },
206
+ importance: { type: "number", description: "0.0-1.0 (default 0.5)" },
207
+ source_context: { type: "object", description: "Optional source data links" },
208
+ delete: { type: "boolean", description: "Delete the memory identified by id" },
335
209
  },
336
210
  },
337
211
  },
338
212
  {
339
- name: "frame-context",
340
- description: "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
341
- "Falls back to OCR data for legacy frames without accessibility data. " +
342
- "Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
343
- annotations: {
344
- title: "Frame Context",
345
- readOnlyHint: true,
346
- },
213
+ name: "send-notification",
214
+ description: "Send a notification to the screenpipe desktop UI with optional action buttons. " +
215
+ "Actions can re-run pipes with context, call API endpoints, or open deep links.",
216
+ annotations: { title: "Send Notification", destructiveHint: false },
347
217
  inputSchema: {
348
218
  type: "object",
349
219
  properties: {
350
- frame_id: {
351
- type: "integer",
352
- description: "The frame ID to get context for (from search results)",
220
+ title: { type: "string", description: "Notification title" },
221
+ body: { type: "string", description: "Notification body (markdown supported)" },
222
+ pipe_name: { type: "string", description: "Name of the pipe sending this notification" },
223
+ timeout_secs: { type: "integer", description: "Auto-dismiss seconds (default 20)", default: 20 },
224
+ actions: {
225
+ type: "array",
226
+ description: "Up to 5 action buttons",
227
+ items: {
228
+ type: "object",
229
+ properties: {
230
+ id: { type: "string", description: "Unique action ID" },
231
+ label: { type: "string", description: "Button label" },
232
+ type: { type: "string", enum: ["pipe", "api", "deeplink", "dismiss"], description: "Action type" },
233
+ pipe: { type: "string", description: "Pipe name to run (type=pipe)" },
234
+ context: { type: "object", description: "Context passed to pipe (type=pipe)" },
235
+ url: { type: "string", description: "URL for api/deeplink actions" },
236
+ },
237
+ required: ["id", "label", "type"],
238
+ },
353
239
  },
354
240
  },
355
- required: ["frame_id"],
241
+ required: ["title", "pipe_name"],
356
242
  },
357
243
  },
358
244
  ];
359
- // List tools handler
360
245
  server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
361
- return { tools: BASE_TOOLS };
246
+ return { tools: TOOLS };
362
247
  });
363
- // MCP Resources - provide dynamic context data
248
+ // ---------------------------------------------------------------------------
249
+ // Resources — dynamic context only (no duplicated reference docs)
250
+ // ---------------------------------------------------------------------------
364
251
  const RESOURCES = [
365
252
  {
366
253
  uri: "screenpipe://context",
367
254
  name: "Current Context",
368
- description: "Current date/time and pre-computed timestamps for common time ranges",
255
+ description: "Current date/time, timezone, and pre-computed timestamps for common time ranges",
369
256
  mimeType: "application/json",
370
257
  },
371
- {
372
- uri: "screenpipe://guide",
373
- name: "Usage Guide",
374
- description: "How to use screenpipe search effectively",
375
- mimeType: "text/markdown",
376
- },
377
- {
378
- uri: "ui://search",
379
- name: "Search Dashboard",
380
- description: "Interactive search UI for exploring screen recordings and audio transcriptions",
381
- mimeType: "text/html",
382
- },
383
258
  ];
384
- // List resources handler
385
259
  server.setRequestHandler(types_js_1.ListResourcesRequestSchema, async () => {
386
260
  return { resources: RESOURCES };
387
261
  });
388
- // Read resource handler
389
262
  server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) => {
390
263
  const { uri } = request.params;
391
- const dateInfo = getCurrentDateInfo();
392
- const now = Date.now();
393
- switch (uri) {
394
- case "screenpipe://context":
395
- return {
396
- contents: [
397
- {
398
- uri,
399
- mimeType: "application/json",
400
- text: JSON.stringify({
401
- current_time: dateInfo.isoDate,
402
- current_date_local: dateInfo.localDate,
403
- timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
404
- timestamps: {
405
- now: dateInfo.isoDate,
406
- one_hour_ago: new Date(now - 60 * 60 * 1000).toISOString(),
407
- three_hours_ago: new Date(now - 3 * 60 * 60 * 1000).toISOString(),
408
- today_start: `${new Date().toISOString().split("T")[0]}T00:00:00Z`,
409
- yesterday_start: `${new Date(now - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
410
- one_week_ago: new Date(now - 7 * 24 * 60 * 60 * 1000).toISOString(),
411
- },
412
- common_apps: ["Google Chrome", "Safari", "Slack", "zoom.us", "Microsoft Teams", "Code", "Terminal"],
413
- }, null, 2),
414
- },
415
- ],
416
- };
417
- case "screenpipe://guide":
418
- return {
419
- contents: [
420
- {
421
- uri,
422
- mimeType: "text/markdown",
423
- text: `# Screenpipe Search Guide
424
-
425
- ## Data Modalities
426
-
427
- Screenpipe captures four types of data:
428
- 1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
429
- 2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
430
- 3. **Audio** - Transcribed speech from microphone/system audio
431
- 4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
432
-
433
- ## Quick Start
434
- - **Get recent activity**: Call search-content with no parameters
435
- - **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
436
- - **Get keyboard input**: \`{"content_type": "input"}\`
437
- - **Get audio only**: \`{"content_type": "audio"}\`
438
-
439
- ## Common User Requests → Correct Tool Choice
440
- | User says | Use this tool | Key params |
441
- |-----------|--------------|------------|
442
- | "summarize my meeting/call" | search-content | content_type:"audio", NO q param, start_time |
443
- | "what did they/I say about X" | search-content | content_type:"audio", NO q param (scan results manually) |
444
- | "how long on X" / "which apps" / "time spent" | activity-summary | start_time, end_time |
445
- | "what was I doing" | activity-summary | start_time, end_time (then drill into search-content) |
446
- | "what was I reading/looking at" | search-content | content_type:"all", start_time |
447
-
448
- ## Behavior Rules
449
- - Act immediately on clear requests. NEVER ask "what time range?" or "which content type?" when the intent is obvious.
450
- - If search returns empty, silently retry with wider time range or fewer filters. Do NOT ask the user what to change.
451
- - For meetings: ALWAYS use content_type:"audio" and do NOT use the q param. Transcriptions are noisy — q filters too aggressively and misses relevant content.
452
-
453
- ## search-content
454
- | Parameter | Description | Default |
455
- |-----------|-------------|---------|
456
- | q | Search query | (none - returns all) |
457
- | content_type | all/ocr/audio/input/accessibility | all |
458
- | limit | Max results | 10 |
459
- | start_time | ISO 8601 UTC or relative (e.g. '16h ago') | (no filter) |
460
- | end_time | ISO 8601 UTC or relative (e.g. 'now') | (no filter) |
461
- | app_name | Filter by app | (no filter) |
462
- | include_frames | Include screenshots | false |
463
-
464
- ## Search Strategy (MANDATORY)
465
- 1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
466
- 2. Scan results to find correct app_name values and content patterns.
467
- 3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
468
- 4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
469
- 5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
470
-
471
- ## Progressive Disclosure (Token-Efficient Strategy)
472
- 1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
473
- 2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
474
- 3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
475
- 4. **Fetch frame-context** for URLs and accessibility tree of specific frames
476
- 5. **Screenshots** (include_frames=true) only when text isn't enough
477
-
478
- ## Chat History
479
- Previous screenpipe chat conversations are stored as individual JSON files in ~/.screenpipe/chats/{conversation-id}.json
480
- Each file contains: id, title, messages[], createdAt, updatedAt. You can read these files to reference or search previous conversations.
481
-
482
- ## Speaker Management
483
- screenpipe auto-identifies speakers in audio. API endpoints for managing them:
484
- - \`GET /speakers/unnamed?limit=10\` — list unnamed speakers
485
- - \`GET /speakers/search?name=John\` — search by name
486
- - \`POST /speakers/update\` with \`{"id": 5, "name": "John"}\` — rename a speaker
487
- - \`POST /speakers/merge\` with \`{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}\` — merge duplicates
488
- - \`GET /speakers/similar?speaker_id=5\` — find similar speakers for merging
489
- - \`POST /speakers/reassign\` — reassign audio chunk to different speaker
490
-
491
- ## Tips
492
- 1. Read screenpipe://context first to get current timestamps
493
- 2. Use activity-summary before search-content for broad overview questions
494
- 3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
495
- 4. Use content_type=input for "what did I type?" queries
496
- 5. Use content_type=accessibility for accessibility tree text
497
- 6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
498
-
499
- ## Deep Links (Clickable References)
500
- When showing search results to users, create clickable links so they can jump to that exact moment.
501
-
502
- **ALWAYS prefer frame-based links for OCR results** (frame IDs are exact DB keys):
503
- - \`[10:30 AM — Chrome](screenpipe://frame/12345)\` — use \`content.frame_id\` from OCR results
504
-
505
- **Use timestamp links only for audio results** (which have no frame_id):
506
- - \`[meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z)\` — use exact \`timestamp\` from audio results
507
-
508
- **NEVER fabricate frame IDs or timestamps.** Only use values copied from actual search results.`,
509
- },
510
- ],
511
- };
512
- case "ui://search": {
513
- // MCP App UI - Interactive search dashboard
514
- const uiHtmlPath = path.join(__dirname, "..", "ui", "search.html");
515
- let htmlContent;
516
- try {
517
- htmlContent = fs.readFileSync(uiHtmlPath, "utf-8");
518
- }
519
- catch {
520
- // Fallback: serve embedded minimal UI if file not found
521
- htmlContent = `<!DOCTYPE html>
522
- <html>
523
- <head>
524
- <style>
525
- body { font-family: system-ui; background: #0a0a0a; color: #fff; padding: 20px; }
526
- input { width: 100%; padding: 10px; margin-bottom: 10px; background: #1a1a1a; border: 1px solid #333; color: #fff; border-radius: 6px; }
527
- button { padding: 10px 20px; background: #fff; color: #000; border: none; border-radius: 6px; cursor: pointer; }
528
- #results { margin-top: 20px; }
529
- .result { background: #1a1a1a; padding: 12px; margin: 8px 0; border-radius: 8px; border: 1px solid #333; }
530
- </style>
531
- </head>
532
- <body>
533
- <h2>screenpipe search</h2>
534
- <input id="q" placeholder="search..." onkeydown="if(event.key==='Enter')search()"/>
535
- <button onclick="search()">search</button>
536
- <div id="results"></div>
537
- <script>
538
- function search() {
539
- window.parent.postMessage({jsonrpc:'2.0',method:'tools/call',params:{name:'search-content',arguments:{q:document.getElementById('q').value,limit:20}}},'*');
540
- }
541
- window.addEventListener('message',e=>{
542
- if(e.data?.result||e.data?.method==='tool/result'){
543
- const r=e.data.result||e.data.params?.result;
544
- const d=r?.data||r||[];
545
- document.getElementById('results').innerHTML=d.map(x=>'<div class="result"><b>'+((x.type||'')+'</b> '+(x.content?.app_name||'')+': '+(x.content?.text||x.content?.transcription||'').substring(0,200))+'</div>').join('');
546
- }
547
- });
548
- </script>
549
- </body>
550
- </html>`;
551
- }
552
- return {
553
- contents: [
554
- {
555
- uri,
556
- mimeType: "text/html",
557
- text: htmlContent,
558
- },
559
- ],
560
- };
561
- }
562
- default:
563
- throw new Error(`Unknown resource: ${uri}`);
564
- }
565
- });
566
- // MCP Prompts - static interaction templates
567
- const PROMPTS = [
568
- {
569
- name: "search-recent",
570
- description: "Search recent screen activity",
571
- arguments: [
572
- { name: "query", description: "Optional search term", required: false },
573
- { name: "hours", description: "Hours to look back (default: 1)", required: false },
574
- ],
575
- },
576
- {
577
- name: "find-in-app",
578
- description: "Find content from a specific application",
579
- arguments: [
580
- { name: "app", description: "App name (e.g., Chrome, Slack)", required: true },
581
- { name: "query", description: "Optional search term", required: false },
582
- ],
583
- },
584
- {
585
- name: "meeting-notes",
586
- description: "Get audio transcriptions from meetings",
587
- arguments: [
588
- { name: "hours", description: "Hours to look back (default: 3)", required: false },
589
- ],
590
- },
591
- ];
592
- // List prompts handler
593
- server.setRequestHandler(types_js_1.ListPromptsRequestSchema, async () => {
594
- return { prompts: PROMPTS };
595
- });
596
- // Get prompt handler
597
- server.setRequestHandler(types_js_1.GetPromptRequestSchema, async (request) => {
598
- const { name, arguments: promptArgs } = request.params;
599
- const dateInfo = getCurrentDateInfo();
600
- const now = Date.now();
601
- switch (name) {
602
- case "search-recent": {
603
- const query = promptArgs?.query || "";
604
- const hours = parseInt(promptArgs?.hours || "1", 10);
605
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
606
- return {
607
- description: `Search recent activity (last ${hours} hour${hours > 1 ? "s" : ""})`,
608
- messages: [
609
- {
610
- role: "user",
611
- content: {
612
- type: "text",
613
- text: `Search screenpipe for recent activity.
614
-
615
- Current time: ${dateInfo.isoDate}
616
-
617
- Use search-content with:
618
- ${query ? `- q: "${query}"` : "- No query filter (get all content)"}
619
- - start_time: "${startTime}"
620
- - limit: 50`,
621
- },
622
- },
623
- ],
624
- };
625
- }
626
- case "find-in-app": {
627
- const app = promptArgs?.app || "Google Chrome";
628
- const query = promptArgs?.query || "";
629
- return {
630
- description: `Find content from ${app}`,
631
- messages: [
632
- {
633
- role: "user",
634
- content: {
635
- type: "text",
636
- text: `Search screenpipe for content from ${app}.
637
-
638
- Current time: ${dateInfo.isoDate}
639
-
640
- Use search-content with:
641
- - app_name: "${app}"
642
- ${query ? `- q: "${query}"` : "- No query filter"}
643
- - content_type: "all"
644
- - limit: 50`,
645
- },
646
- },
647
- ],
648
- };
649
- }
650
- case "meeting-notes": {
651
- const hours = parseInt(promptArgs?.hours || "3", 10);
652
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
653
- return {
654
- description: `Get meeting transcriptions (last ${hours} hours)`,
655
- messages: [
656
- {
657
- role: "user",
658
- content: {
659
- type: "text",
660
- text: `Get audio transcriptions from recent meetings.
661
-
662
- Current time: ${dateInfo.isoDate}
663
-
664
- Use search-content with:
665
- - content_type: "audio"
666
- - start_time: "${startTime}"
667
- - limit: 100
668
-
669
- Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
264
+ if (uri === "screenpipe://context") {
265
+ const now = new Date();
266
+ const ms = now.getTime();
267
+ return {
268
+ contents: [
269
+ {
270
+ uri,
271
+ mimeType: "application/json",
272
+ text: JSON.stringify({
273
+ current_time: now.toISOString(),
274
+ current_date_local: now.toLocaleDateString("en-US", {
275
+ weekday: "long",
276
+ year: "numeric",
277
+ month: "long",
278
+ day: "numeric",
279
+ }),
280
+ timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
281
+ timestamps: {
282
+ now: now.toISOString(),
283
+ one_hour_ago: new Date(ms - 60 * 60 * 1000).toISOString(),
284
+ three_hours_ago: new Date(ms - 3 * 60 * 60 * 1000).toISOString(),
285
+ today_start: `${now.toISOString().split("T")[0]}T00:00:00Z`,
286
+ yesterday_start: `${new Date(ms - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
287
+ one_week_ago: new Date(ms - 7 * 24 * 60 * 60 * 1000).toISOString(),
670
288
  },
671
- },
672
- ],
673
- };
674
- }
675
- default:
676
- throw new Error(`Unknown prompt: ${name}`);
289
+ }, null, 2),
290
+ },
291
+ ],
292
+ };
677
293
  }
294
+ throw new Error(`Unknown resource: ${uri}`);
678
295
  });
679
- // Helper function to make HTTP requests
296
+ // ---------------------------------------------------------------------------
297
+ // Helper
298
+ // ---------------------------------------------------------------------------
680
299
  async function fetchAPI(endpoint, options = {}) {
681
300
  const url = `${SCREENPIPE_API}${endpoint}`;
682
301
  return fetch(url, {
@@ -687,7 +306,9 @@ async function fetchAPI(endpoint, options = {}) {
687
306
  },
688
307
  });
689
308
  }
690
- // Call tool handler
309
+ // ---------------------------------------------------------------------------
310
+ // Tool handlers
311
+ // ---------------------------------------------------------------------------
691
312
  server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
692
313
  const { name, arguments: args } = request.params;
693
314
  if (!args) {
@@ -704,9 +325,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
704
325
  }
705
326
  }
706
327
  const response = await fetchAPI(`/search?${params.toString()}`);
707
- if (!response.ok) {
328
+ if (!response.ok)
708
329
  throw new Error(`HTTP error: ${response.status}`);
709
- }
710
330
  const data = await response.json();
711
331
  const results = data.data || [];
712
332
  const pagination = data.pagination || {};
@@ -715,12 +335,11 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
715
335
  content: [
716
336
  {
717
337
  type: "text",
718
- text: "No results found. Try: broader search terms, different content_type, or wider time range.",
338
+ text: "No results found. Try: broader terms, different content_type, or wider time range.",
719
339
  },
720
340
  ],
721
341
  };
722
342
  }
723
- // Build content array with text and optional images
724
343
  const contentItems = [];
725
344
  const formattedResults = [];
726
345
  const images = [];
@@ -753,42 +372,173 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
753
372
  `${content.timestamp || ""}\n` +
754
373
  `${content.text || ""}`);
755
374
  }
375
+ else if (result.type === "Memory") {
376
+ const tagsStr = content.tags?.length ? ` [${content.tags.join(", ")}]` : "";
377
+ const importance = content.importance != null ? ` (importance: ${content.importance})` : "";
378
+ formattedResults.push(`[Memory #${content.id}]${tagsStr}${importance}\n` +
379
+ `${content.created_at || ""}\n` +
380
+ `${content.content || ""}`);
381
+ }
756
382
  }
757
- // Header with pagination info
758
383
  const header = `Results: ${results.length}/${pagination.total || "?"}` +
759
- (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
384
+ (pagination.total > results.length
385
+ ? ` (use offset=${(pagination.offset || 0) + results.length} for more)`
386
+ : "");
760
387
  contentItems.push({
761
388
  type: "text",
762
389
  text: header + "\n\n" + formattedResults.join("\n---\n"),
763
390
  });
764
- // Add images if requested
765
391
  for (const img of images) {
766
392
  contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
767
393
  contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
768
394
  }
769
395
  return { content: contentItems };
770
396
  }
771
- case "export-video": {
772
- const startTime = args.start_time;
773
- const endTime = args.end_time;
774
- const fps = args.fps || 1.0;
775
- // Validate time inputs
776
- if (!startTime || !endTime) {
397
+ case "list-meetings": {
398
+ const params = new URLSearchParams();
399
+ for (const [key, value] of Object.entries(args)) {
400
+ if (value !== null && value !== undefined) {
401
+ params.append(key, String(value));
402
+ }
403
+ }
404
+ const response = await fetchAPI(`/meetings?${params.toString()}`);
405
+ if (!response.ok)
406
+ throw new Error(`HTTP error: ${response.status}`);
407
+ const meetings = await response.json();
408
+ if (!Array.isArray(meetings) || meetings.length === 0) {
409
+ return {
410
+ content: [{ type: "text", text: "No meetings found in the given time range." }],
411
+ };
412
+ }
413
+ const formatted = meetings.map((m) => {
414
+ const start = m.meeting_start;
415
+ const end = m.meeting_end || "ongoing";
416
+ const app = m.meeting_app;
417
+ const title = m.title ? ` — ${m.title}` : "";
418
+ const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
419
+ return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
420
+ });
421
+ return {
422
+ content: [
423
+ { type: "text", text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}` },
424
+ ],
425
+ };
426
+ }
427
+ case "activity-summary": {
428
+ const params = new URLSearchParams();
429
+ for (const [key, value] of Object.entries(args)) {
430
+ if (value !== null && value !== undefined) {
431
+ params.append(key, String(value));
432
+ }
433
+ }
434
+ const response = await fetchAPI(`/activity-summary?${params.toString()}`);
435
+ if (!response.ok)
436
+ throw new Error(`HTTP error: ${response.status}`);
437
+ const data = await response.json();
438
+ const appsLines = (data.apps || []).map((a) => {
439
+ const timeSpan = a.first_seen && a.last_seen
440
+ ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
441
+ : "";
442
+ return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
443
+ });
444
+ const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
445
+ const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
446
+ const summary = [
447
+ `Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
448
+ `Total frames: ${data.total_frames}`,
449
+ "",
450
+ "Apps:",
451
+ ...(appsLines.length ? appsLines : [" (none)"]),
452
+ "",
453
+ `Audio: ${data.audio_summary?.segment_count || 0} segments`,
454
+ ...(speakerLines.length ? speakerLines : []),
455
+ "",
456
+ "Recent texts:",
457
+ ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
458
+ ].join("\n");
459
+ return { content: [{ type: "text", text: summary }] };
460
+ }
461
+ case "search-elements": {
462
+ const params = new URLSearchParams();
463
+ for (const [key, value] of Object.entries(args)) {
464
+ if (value !== null && value !== undefined) {
465
+ params.append(key, String(value));
466
+ }
467
+ }
468
+ const response = await fetchAPI(`/elements?${params.toString()}`);
469
+ if (!response.ok)
470
+ throw new Error(`HTTP error: ${response.status}`);
471
+ const data = await response.json();
472
+ const elements = data.data || [];
473
+ const pagination = data.pagination || {};
474
+ if (elements.length === 0) {
777
475
  return {
778
476
  content: [
779
477
  {
780
478
  type: "text",
781
- text: "Error: Both start_time and end_time are required in ISO 8601 format (e.g., '2024-01-15T10:00:00Z')",
479
+ text: "No elements found. Try: broader search, different role/source, or wider time range.",
782
480
  },
783
481
  ],
784
482
  };
785
483
  }
786
- // Step 1: Query the search API to get frame IDs for the time range
484
+ const formatted = elements.map((e) => {
485
+ const boundsStr = e.bounds
486
+ ? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
487
+ : "";
488
+ return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
489
+ });
490
+ const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
491
+ (pagination.total > elements.length
492
+ ? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
493
+ : "");
494
+ return {
495
+ content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
496
+ };
497
+ }
498
+ case "frame-context": {
499
+ const frameId = args.frame_id;
500
+ if (!frameId) {
501
+ return { content: [{ type: "text", text: "Error: frame_id is required" }] };
502
+ }
503
+ const response = await fetchAPI(`/frames/${frameId}/context`);
504
+ if (!response.ok)
505
+ throw new Error(`HTTP error: ${response.status}`);
506
+ const data = await response.json();
507
+ const lines = [`Frame ${data.frame_id} (source: ${data.text_source})`];
508
+ if (data.urls?.length) {
509
+ lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
510
+ }
511
+ if (data.nodes?.length) {
512
+ lines.push("", `Nodes: ${data.nodes.length}`);
513
+ for (const node of data.nodes.slice(0, 50)) {
514
+ const indent = " ".repeat(Math.min(node.depth, 5));
515
+ lines.push(`${indent}[${node.role}] ${node.text}`);
516
+ }
517
+ if (data.nodes.length > 50) {
518
+ lines.push(` ... and ${data.nodes.length - 50} more nodes`);
519
+ }
520
+ }
521
+ if (data.text) {
522
+ const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
523
+ lines.push("", "Full text:", truncated);
524
+ }
525
+ return { content: [{ type: "text", text: lines.join("\n") }] };
526
+ }
527
+ case "export-video": {
528
+ const startTime = args.start_time;
529
+ const endTime = args.end_time;
530
+ const fps = args.fps || 1.0;
531
+ if (!startTime || !endTime) {
532
+ return {
533
+ content: [{ type: "text", text: "Error: start_time and end_time are required" }],
534
+ };
535
+ }
536
+ // Get frame IDs for the time range
787
537
  const searchParams = new URLSearchParams({
788
538
  content_type: "ocr",
789
539
  start_time: startTime,
790
540
  end_time: endTime,
791
- limit: "10000", // Get all frames in range
541
+ limit: "10000",
792
542
  });
793
543
  const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
794
544
  if (!searchResponse.ok) {
@@ -801,12 +551,11 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
801
551
  content: [
802
552
  {
803
553
  type: "text",
804
- text: `No screen recordings found between ${startTime} and ${endTime}. Make sure screenpipe was recording during this time period.`,
554
+ text: `No screen recordings found between ${startTime} and ${endTime}.`,
805
555
  },
806
556
  ],
807
557
  };
808
558
  }
809
- // Extract unique frame IDs from OCR results
810
559
  const frameIds = [];
811
560
  const seenIds = new Set();
812
561
  for (const result of results) {
@@ -820,18 +569,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
820
569
  }
821
570
  if (frameIds.length === 0) {
822
571
  return {
823
- content: [
824
- {
825
- type: "text",
826
- text: `Found ${results.length} results but no valid frame IDs. The recordings may be audio-only.`,
827
- },
828
- ],
572
+ content: [{ type: "text", text: "No valid frame IDs found (audio-only?)." }],
829
573
  };
830
574
  }
831
- // Sort frame IDs
832
575
  frameIds.sort((a, b) => a - b);
833
- // Step 2: Connect to WebSocket and export video
834
- // Send frame_ids in message body to avoid URL length limits
835
576
  const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
836
577
  const exportResult = await new Promise((resolve) => {
837
578
  const ws = new ws_1.WebSocket(wsUrl);
@@ -842,9 +583,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
842
583
  ws.close();
843
584
  resolve({ success: false, error: "Export timed out after 5 minutes" });
844
585
  }
845
- }, 5 * 60 * 1000); // 5 minute timeout
586
+ }, 5 * 60 * 1000);
846
587
  ws.on("open", () => {
847
- // Send frame_ids in message body to avoid URL length limits
848
588
  ws.send(JSON.stringify({ frame_ids: frameIds }));
849
589
  });
850
590
  ws.on("error", (error) => {
@@ -865,7 +605,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
865
605
  try {
866
606
  const message = JSON.parse(data.toString());
867
607
  if (message.status === "completed" && message.video_data) {
868
- // Save video to temp file
869
608
  const tempDir = os.tmpdir();
870
609
  const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
871
610
  const filename = `screenpipe_export_${timestamp}.mp4`;
@@ -874,11 +613,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
874
613
  resolved = true;
875
614
  clearTimeout(timeout);
876
615
  ws.close();
877
- resolve({
878
- success: true,
879
- filePath,
880
- frameCount: frameIds.length,
881
- });
616
+ resolve({ success: true, filePath, frameCount: frameIds.length });
882
617
  }
883
618
  else if (message.status === "error") {
884
619
  resolved = true;
@@ -886,9 +621,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
886
621
  ws.close();
887
622
  resolve({ success: false, error: message.error || "Export failed" });
888
623
  }
889
- // Ignore "extracting" and "encoding" status updates
890
624
  }
891
- catch (parseError) {
625
+ catch {
892
626
  // Ignore parse errors for progress messages
893
627
  }
894
628
  });
@@ -898,176 +632,94 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
898
632
  content: [
899
633
  {
900
634
  type: "text",
901
- text: `Successfully exported video!\n\n` +
902
- `File: ${exportResult.filePath}\n` +
903
- `Frames: ${exportResult.frameCount}\n` +
904
- `Time range: ${startTime} to ${endTime}\n` +
905
- `FPS: ${fps}`,
635
+ text: `Video exported: ${exportResult.filePath}\n` +
636
+ `Frames: ${exportResult.frameCount} | ${startTime} → ${endTime} | ${fps} fps`,
906
637
  },
907
638
  ],
908
639
  };
909
640
  }
910
641
  else {
911
642
  return {
912
- content: [
913
- {
914
- type: "text",
915
- text: `Failed to export video: ${exportResult.error}`,
916
- },
917
- ],
643
+ content: [{ type: "text", text: `Export failed: ${exportResult.error}` }],
918
644
  };
919
645
  }
920
646
  }
921
- case "list-meetings": {
922
- const params = new URLSearchParams();
923
- for (const [key, value] of Object.entries(args)) {
924
- if (value !== null && value !== undefined) {
925
- params.append(key, String(value));
926
- }
647
+ case "update-memory": {
648
+ if (args.delete && args.id) {
649
+ const response = await fetchAPI(`/memories/${args.id}`, { method: "DELETE" });
650
+ if (!response.ok)
651
+ throw new Error(`HTTP error: ${response.status}`);
652
+ return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
927
653
  }
928
- const response = await fetchAPI(`/meetings?${params.toString()}`);
929
- if (!response.ok) {
930
- throw new Error(`HTTP error: ${response.status}`);
654
+ if (args.id) {
655
+ const body = {};
656
+ if (args.content !== undefined)
657
+ body.content = args.content;
658
+ if (args.tags !== undefined)
659
+ body.tags = args.tags;
660
+ if (args.importance !== undefined)
661
+ body.importance = args.importance;
662
+ if (args.source_context !== undefined)
663
+ body.source_context = args.source_context;
664
+ const response = await fetchAPI(`/memories/${args.id}`, {
665
+ method: "PUT",
666
+ body: JSON.stringify(body),
667
+ });
668
+ if (!response.ok)
669
+ throw new Error(`HTTP error: ${response.status}`);
670
+ const memory = await response.json();
671
+ return {
672
+ content: [{ type: "text", text: `Memory ${memory.id} updated: "${memory.content}"` }],
673
+ };
931
674
  }
932
- const meetings = await response.json();
933
- if (!Array.isArray(meetings) || meetings.length === 0) {
675
+ if (!args.content) {
934
676
  return {
935
- content: [
936
- {
937
- type: "text",
938
- text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
939
- },
940
- ],
677
+ content: [{ type: "text", text: "Error: 'content' is required to create a memory" }],
941
678
  };
942
679
  }
943
- const formatted = meetings.map((m) => {
944
- const start = m.meeting_start;
945
- const end = m.meeting_end || "ongoing";
946
- const app = m.meeting_app;
947
- const title = m.title ? ` — ${m.title}` : "";
948
- const attendees = m.attendees ? `\nAttendees: ${m.attendees}` : "";
949
- return `[${m.detection_source}] ${app}${title}\n ${start} → ${end}${attendees}`;
680
+ const memoryBody = {
681
+ content: args.content,
682
+ source: "mcp",
683
+ tags: args.tags || [],
684
+ importance: args.importance ?? 0.5,
685
+ };
686
+ if (args.source_context)
687
+ memoryBody.source_context = args.source_context;
688
+ const memoryResponse = await fetchAPI("/memories", {
689
+ method: "POST",
690
+ body: JSON.stringify(memoryBody),
950
691
  });
692
+ if (!memoryResponse.ok)
693
+ throw new Error(`HTTP error: ${memoryResponse.status}`);
694
+ const newMemory = await memoryResponse.json();
951
695
  return {
952
696
  content: [
953
- {
954
- type: "text",
955
- text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
956
- },
697
+ { type: "text", text: `Memory created (id: ${newMemory.id}): "${newMemory.content}"` },
957
698
  ],
958
699
  };
959
700
  }
960
- case "activity-summary": {
961
- const params = new URLSearchParams();
962
- for (const [key, value] of Object.entries(args)) {
963
- if (value !== null && value !== undefined) {
964
- params.append(key, String(value));
965
- }
966
- }
967
- const response = await fetchAPI(`/activity-summary?${params.toString()}`);
968
- if (!response.ok) {
969
- throw new Error(`HTTP error: ${response.status}`);
970
- }
971
- const data = await response.json();
972
- // Format apps
973
- const appsLines = (data.apps || []).map((a) => {
974
- const timeSpan = a.first_seen && a.last_seen
975
- ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
976
- : "";
977
- return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
978
- });
979
- // Format audio
980
- const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
981
- // Format recent texts
982
- const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
983
- const summary = [
984
- `Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
985
- `Total frames: ${data.total_frames}`,
986
- "",
987
- "Apps:",
988
- ...(appsLines.length ? appsLines : [" (none)"]),
989
- "",
990
- `Audio: ${data.audio_summary?.segment_count || 0} segments`,
991
- ...(speakerLines.length ? speakerLines : []),
992
- "",
993
- "Recent texts:",
994
- ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
995
- ].join("\n");
996
- return { content: [{ type: "text", text: summary }] };
997
- }
998
- case "search-elements": {
999
- const params = new URLSearchParams();
1000
- for (const [key, value] of Object.entries(args)) {
1001
- if (value !== null && value !== undefined) {
1002
- params.append(key, String(value));
1003
- }
1004
- }
1005
- const response = await fetchAPI(`/elements?${params.toString()}`);
1006
- if (!response.ok) {
1007
- throw new Error(`HTTP error: ${response.status}`);
1008
- }
1009
- const data = await response.json();
1010
- const elements = data.data || [];
1011
- const pagination = data.pagination || {};
1012
- if (elements.length === 0) {
1013
- return {
1014
- content: [
1015
- {
1016
- type: "text",
1017
- text: "No elements found. Try: broader search, different role/source, or wider time range.",
1018
- },
1019
- ],
1020
- };
1021
- }
1022
- const formatted = elements.map((e) => {
1023
- const boundsStr = e.bounds
1024
- ? ` [${e.bounds.left.toFixed(2)},${e.bounds.top.toFixed(2)} ${e.bounds.width.toFixed(2)}x${e.bounds.height.toFixed(2)}]`
1025
- : "";
1026
- return `[${e.source}] ${e.role} (frame:${e.frame_id}, depth:${e.depth})${boundsStr}\n ${e.text || "(no text)"}`;
701
+ case "send-notification": {
702
+ const notifBody = {
703
+ title: args.title,
704
+ pipe_name: args.pipe_name,
705
+ };
706
+ if (args.body)
707
+ notifBody.body = args.body;
708
+ if (args.timeout_secs)
709
+ notifBody.timeout_secs = args.timeout_secs;
710
+ if (args.actions)
711
+ notifBody.actions = args.actions;
712
+ const notifResponse = await fetchAPI("/notify", {
713
+ method: "POST",
714
+ body: JSON.stringify(notifBody),
1027
715
  });
1028
- const header = `Elements: ${elements.length}/${pagination.total || "?"}` +
1029
- (pagination.total > elements.length
1030
- ? ` (use offset=${(pagination.offset || 0) + elements.length} for more)`
1031
- : "");
716
+ if (!notifResponse.ok)
717
+ throw new Error(`HTTP error: ${notifResponse.status}`);
718
+ const notifResult = await notifResponse.json();
1032
719
  return {
1033
- content: [{ type: "text", text: header + "\n\n" + formatted.join("\n---\n") }],
720
+ content: [{ type: "text", text: `Notification sent (id: ${notifResult.id})` }],
1034
721
  };
1035
722
  }
1036
- case "frame-context": {
1037
- const frameId = args.frame_id;
1038
- if (!frameId) {
1039
- return {
1040
- content: [{ type: "text", text: "Error: frame_id is required" }],
1041
- };
1042
- }
1043
- const response = await fetchAPI(`/frames/${frameId}/context`);
1044
- if (!response.ok) {
1045
- throw new Error(`HTTP error: ${response.status}`);
1046
- }
1047
- const data = await response.json();
1048
- const lines = [
1049
- `Frame ${data.frame_id} (source: ${data.text_source})`,
1050
- ];
1051
- if (data.urls?.length) {
1052
- lines.push("", "URLs:", ...data.urls.map((u) => ` ${u}`));
1053
- }
1054
- if (data.nodes?.length) {
1055
- lines.push("", `Nodes: ${data.nodes.length}`);
1056
- for (const node of data.nodes.slice(0, 50)) {
1057
- const indent = " ".repeat(Math.min(node.depth, 5));
1058
- lines.push(`${indent}[${node.role}] ${node.text}`);
1059
- }
1060
- if (data.nodes.length > 50) {
1061
- lines.push(` ... and ${data.nodes.length - 50} more nodes`);
1062
- }
1063
- }
1064
- if (data.text) {
1065
- // Truncate to avoid massive outputs
1066
- const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
1067
- lines.push("", "Full text:", truncated);
1068
- }
1069
- return { content: [{ type: "text", text: lines.join("\n") }] };
1070
- }
1071
723
  default:
1072
724
  throw new Error(`Unknown tool: ${name}`);
1073
725
  }
@@ -1075,12 +727,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1075
727
  catch (error) {
1076
728
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
1077
729
  return {
1078
- content: [
1079
- {
1080
- type: "text",
1081
- text: `Error executing ${name}: ${errorMessage}`,
1082
- },
1083
- ],
730
+ content: [{ type: "text", text: `Error executing ${name}: ${errorMessage}` }],
1084
731
  };
1085
732
  }
1086
733
  });