screenpipe-mcp 0.8.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/bun.lock +429 -0
  2. package/dist/index.js +365 -718
  3. package/package.json +8 -8
  4. package/src/index.ts +407 -781
package/src/index.ts CHANGED
@@ -8,8 +8,6 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
8
8
  import {
9
9
  CallToolRequestSchema,
10
10
  ListToolsRequestSchema,
11
- ListPromptsRequestSchema,
12
- GetPromptRequestSchema,
13
11
  ListResourcesRequestSchema,
14
12
  ReadResourceRequestSchema,
15
13
  Tool,
@@ -19,20 +17,6 @@ import * as fs from "fs";
19
17
  import * as path from "path";
20
18
  import * as os from "os";
21
19
 
22
- // Helper to get current date in ISO format
23
- function getCurrentDateInfo(): { isoDate: string; localDate: string } {
24
- const now = new Date();
25
- return {
26
- isoDate: now.toISOString(),
27
- localDate: now.toLocaleDateString("en-US", {
28
- weekday: "long",
29
- year: "numeric",
30
- month: "long",
31
- day: "numeric",
32
- }),
33
- };
34
- }
35
-
36
20
  // Parse command line arguments
37
21
  const args = process.argv.slice(2);
38
22
  let port = 3030;
@@ -48,644 +32,266 @@ const SCREENPIPE_API = `http://localhost:${port}`;
48
32
  const server = new Server(
49
33
  {
50
34
  name: "screenpipe",
51
- version: "0.8.5",
35
+ version: "0.9.0",
52
36
  },
53
37
  {
54
38
  capabilities: {
55
39
  tools: {},
56
- prompts: {},
57
40
  resources: {},
58
41
  },
59
42
  }
60
43
  );
61
44
 
62
- // Tool definitions
63
- const BASE_TOOLS: Tool[] = [
45
+ // ---------------------------------------------------------------------------
46
+ // Tools minimal descriptions, no behavioral guidance (that belongs in resources)
47
+ // ---------------------------------------------------------------------------
48
+ const TOOLS: Tool[] = [
64
49
  {
65
50
  name: "search-content",
66
51
  description:
67
- "Search screenpipe's recorded content: screen text (accessibility APIs, with OCR fallback), audio transcriptions, and UI elements. " +
68
- "Returns timestamped results with app context. " +
69
- "Call with no parameters to get recent activity. " +
70
- "Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
71
- "WHEN TO USE WHICH content_type:\n" +
72
- "- For meetings/calls/conversations: content_type='audio', do NOT use q param (transcriptions are noisy, q filters too aggressively)\n" +
73
- "- For screen text/reading: content_type='all' or 'accessibility'\n" +
74
- "- For time spent/app usage questions: use activity-summary tool instead (this tool returns content, not time stats)\n\n" +
75
- "SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
76
- "This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
77
- "App names are case-sensitive (e.g. 'Discord' vs 'Discord.exe'). " +
78
- "The q param searches captured text, NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
79
- "DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
80
- "- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
81
- "- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
82
- "NEVER fabricate frame IDs or timestamps — only use values from actual search results.",
83
- annotations: {
84
- title: "Search Content",
85
- readOnlyHint: true,
86
- },
52
+ "Search screen text, audio transcriptions, input events, and memories. " +
53
+ "Returns timestamped results with app context. Call with no params for recent activity.",
54
+ annotations: { title: "Search Content", readOnlyHint: true },
87
55
  inputSchema: {
88
56
  type: "object",
89
57
  properties: {
90
58
  q: {
91
59
  type: "string",
92
- description: "Search query (full-text search on captured text). Optional - omit to return all content in time range. IMPORTANT: Do NOT use q for audio/meeting searches — transcriptions are noisy and q filters too aggressively. Only use q when searching for specific text the user saw on screen.",
60
+ description: "Full-text search query. Omit to return all content in time range.",
93
61
  },
94
62
  content_type: {
95
63
  type: "string",
96
- enum: ["all", "ocr", "audio", "input", "accessibility"],
97
- description: "Content type filter: 'audio' (transcriptions — use for meetings/calls/conversations), 'accessibility' (accessibility tree text, preferred for screen content), 'ocr' (screen text via OCR, legacy fallback), 'input' (clicks, keystrokes, clipboard, app switches), 'all'. Default: 'all'. For meeting/call queries, ALWAYS use 'audio'.",
64
+ enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
65
+ description: "Filter by content type. Default: 'all'.",
98
66
  default: "all",
99
67
  },
100
- limit: {
101
- type: "integer",
102
- description: "Max results. Default: 10",
103
- default: 10,
104
- },
105
- offset: {
106
- type: "integer",
107
- description: "Skip N results for pagination. Default: 0",
108
- default: 0,
109
- },
68
+ limit: { type: "integer", description: "Max results (default 10)", default: 10 },
69
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
110
70
  start_time: {
111
71
  type: "string",
112
- format: "date-time",
113
- description: "Start time: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', '2d ago', 'now')",
72
+ description: "ISO 8601 UTC or relative (e.g. '2h ago')",
114
73
  },
115
74
  end_time: {
116
75
  type: "string",
117
- format: "date-time",
118
- description: "End time: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
119
- },
120
- app_name: {
121
- type: "string",
122
- description: "Filter by app (e.g., 'Google Chrome', 'Slack', 'zoom.us')",
123
- },
124
- window_name: {
125
- type: "string",
126
- description: "Filter by window title",
127
- },
128
- min_length: {
129
- type: "integer",
130
- description: "Minimum content length in characters",
131
- },
132
- max_length: {
133
- type: "integer",
134
- description: "Maximum content length in characters",
76
+ description: "ISO 8601 UTC or relative (e.g. 'now')",
135
77
  },
78
+ app_name: { type: "string", description: "Filter by app name" },
79
+ window_name: { type: "string", description: "Filter by window title" },
80
+ min_length: { type: "integer", description: "Min content length" },
81
+ max_length: { type: "integer", description: "Max content length" },
136
82
  include_frames: {
137
83
  type: "boolean",
138
- description: "Include base64 screenshots (OCR only). Default: false",
84
+ description: "Include base64 screenshots (OCR only)",
139
85
  default: false,
140
86
  },
141
- speaker_ids: {
142
- type: "string",
143
- description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
144
- },
145
- speaker_name: {
146
- type: "string",
147
- description: "Filter audio by speaker name (case-insensitive partial match)",
148
- },
87
+ speaker_ids: { type: "string", description: "Comma-separated speaker IDs" },
88
+ speaker_name: { type: "string", description: "Filter audio by speaker name" },
149
89
  max_content_length: {
150
90
  type: "integer",
151
- description: "Truncate each result's text/transcription to this many characters using middle-truncation (keeps first half + last half). Useful for limiting token usage with small-context models.",
91
+ description: "Truncate each result via middle-truncation",
152
92
  },
153
93
  },
154
94
  },
155
95
  },
156
96
  {
157
- name: "export-video",
158
- description:
159
- "Export a video of screen recordings for a specific time range. " +
160
- "Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
161
- "IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z) or relative times (e.g., '16h ago', 'now')\n\n" +
162
- "EXAMPLES:\n" +
163
- "- Last 30 minutes: Calculate timestamps from current time\n" +
164
- "- Specific meeting: Use the meeting's start and end times in UTC",
165
- annotations: {
166
- title: "Export Video",
167
- destructiveHint: true,
97
+ name: "list-meetings",
98
+ description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees.",
99
+ annotations: { title: "List Meetings", readOnlyHint: true },
100
+ inputSchema: {
101
+ type: "object",
102
+ properties: {
103
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
104
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
105
+ limit: { type: "integer", description: "Max results (default 20)", default: 20 },
106
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
107
+ },
168
108
  },
109
+ },
110
+ {
111
+ name: "activity-summary",
112
+ description:
113
+ "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
114
+ "Use for 'how long on X?', 'which apps?', 'what was I doing?' questions.",
115
+ annotations: { title: "Activity Summary", readOnlyHint: true },
169
116
  inputSchema: {
170
117
  type: "object",
171
118
  properties: {
172
- start_time: {
173
- type: "string",
174
- format: "date-time",
175
- description:
176
- "Start time: ISO 8601 UTC (e.g., '2024-01-15T10:00:00Z') or relative (e.g., '16h ago', 'now')",
177
- },
178
- end_time: {
179
- type: "string",
180
- format: "date-time",
181
- description:
182
- "End time: ISO 8601 UTC (e.g., '2024-01-15T10:30:00Z') or relative (e.g., 'now', '1h ago')",
183
- },
184
- fps: {
185
- type: "number",
186
- description:
187
- "Frames per second for the output video. Lower values (0.5-1.0) create smaller files, higher values (5-10) create smoother playback. Default: 1.0",
188
- default: 1.0,
189
- },
119
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
120
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
121
+ app_name: { type: "string", description: "Optional app name filter" },
190
122
  },
191
123
  required: ["start_time", "end_time"],
192
124
  },
193
125
  },
194
126
  {
195
- name: "list-meetings",
127
+ name: "search-elements",
196
128
  description:
197
- "List detected meetings with duration, app, and attendees. " +
198
- "Returns meetings detected via app focus (Zoom, Meet, Teams) and audio. " +
199
- "Only available when screenpipe runs in smart transcription mode.",
200
- annotations: {
201
- title: "List Meetings",
202
- readOnlyHint: true,
203
- },
129
+ "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
130
+ "Lighter than search-content for targeted UI lookups.",
131
+ annotations: { title: "Search Elements", readOnlyHint: true },
204
132
  inputSchema: {
205
133
  type: "object",
206
134
  properties: {
207
- start_time: {
208
- type: "string",
209
- format: "date-time",
210
- description: "Start filter: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
211
- },
212
- end_time: {
135
+ q: { type: "string", description: "Full-text search on element text" },
136
+ frame_id: { type: "integer", description: "Filter to specific frame" },
137
+ source: {
213
138
  type: "string",
214
- format: "date-time",
215
- description: "End filter: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
216
- },
217
- limit: {
218
- type: "integer",
219
- description: "Max results. Default: 20",
220
- default: 20,
221
- },
222
- offset: {
223
- type: "integer",
224
- description: "Skip N results for pagination. Default: 0",
225
- default: 0,
226
- },
139
+ enum: ["accessibility", "ocr"],
140
+ description: "Element source filter",
141
+ },
142
+ role: { type: "string", description: "Element role (e.g. AXButton, AXLink)" },
143
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
144
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
145
+ app_name: { type: "string", description: "Filter by app name" },
146
+ limit: { type: "integer", description: "Max results (default 50)", default: 50 },
147
+ offset: { type: "integer", description: "Pagination offset", default: 0 },
227
148
  },
228
149
  },
229
150
  },
230
151
  {
231
- name: "activity-summary",
152
+ name: "frame-context",
232
153
  description:
233
- "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
234
- "Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
235
- "Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
236
- "first_seen/last_seen show the wall-clock span per app.\n\n" +
237
- "USE THIS TOOL (not search-content or raw SQL) for:\n" +
238
- "- 'how long did I spend on X?' active_minutes per app\n" +
239
- "- 'which apps did I use today?' → app list sorted by active_minutes\n" +
240
- "- 'what was I doing?' → broad overview before drilling deeper\n" +
241
- "- Any time-spent or app-usage question\n\n" +
242
- "WARNING: Do NOT estimate time from raw frame counts or SQL queries — those are inaccurate. " +
243
- "This endpoint calculates actual active session time correctly.",
244
- annotations: {
245
- title: "Activity Summary",
246
- readOnlyHint: true,
154
+ "Get accessibility text, parsed tree nodes, and URLs for a specific frame ID.",
155
+ annotations: { title: "Frame Context", readOnlyHint: true },
156
+ inputSchema: {
157
+ type: "object",
158
+ properties: {
159
+ frame_id: { type: "integer", description: "Frame ID from search results" },
160
+ },
161
+ required: ["frame_id"],
247
162
  },
163
+ },
164
+ {
165
+ name: "export-video",
166
+ description: "Export an MP4 video of screen recordings for a time range.",
167
+ annotations: { title: "Export Video", destructiveHint: true },
248
168
  inputSchema: {
249
169
  type: "object",
250
170
  properties: {
251
- start_time: {
252
- type: "string",
253
- format: "date-time",
254
- description: "Start of time range: ISO 8601 UTC (e.g., 2024-01-15T10:00:00Z) or relative (e.g., '16h ago', 'now')",
255
- },
256
- end_time: {
257
- type: "string",
258
- format: "date-time",
259
- description: "End of time range: ISO 8601 UTC (e.g., 2024-01-15T18:00:00Z) or relative (e.g., 'now', '1h ago')",
260
- },
261
- app_name: {
262
- type: "string",
263
- description: "Optional app name filter (e.g., 'Google Chrome', 'VS Code')",
264
- },
171
+ start_time: { type: "string", description: "ISO 8601 UTC or relative" },
172
+ end_time: { type: "string", description: "ISO 8601 UTC or relative" },
173
+ fps: { type: "number", description: "Output FPS (default 1.0)", default: 1.0 },
265
174
  },
266
175
  required: ["start_time", "end_time"],
267
176
  },
268
177
  },
269
178
  {
270
- name: "search-elements",
179
+ name: "update-memory",
271
180
  description:
272
- "Search structured UI elements (accessibility tree nodes and OCR text blocks). " +
273
- "Returns ~100-500 bytes per element — much lighter than search-content for targeted lookups. " +
274
- "Each element has: id, frame_id, source (accessibility/ocr), role (AXButton, AXStaticText, AXLink, etc.), text, bounds, depth.\n\n" +
275
- "Use for: finding specific buttons, links, text fields, or UI components. " +
276
- "Prefer this over search-content when you need structural UI detail rather than full screen text.",
277
- annotations: {
278
- title: "Search Elements",
279
- readOnlyHint: true,
280
- },
181
+ "Create, update, or delete a persistent memory (facts, preferences, decisions). " +
182
+ "Retrieve memories via search-content with content_type='memory'.",
183
+ annotations: { title: "Update Memory", destructiveHint: false },
281
184
  inputSchema: {
282
185
  type: "object",
283
186
  properties: {
284
- q: {
285
- type: "string",
286
- description: "Full-text search query across element text. Optional.",
287
- },
288
- frame_id: {
289
- type: "integer",
290
- description: "Filter to elements from a specific frame",
291
- },
292
- source: {
293
- type: "string",
294
- enum: ["accessibility", "ocr"],
295
- description: "Filter by element source: 'accessibility' (structured tree) or 'ocr' (text blocks)",
296
- },
297
- role: {
298
- type: "string",
299
- description: "Filter by element role (e.g., 'AXButton', 'AXStaticText', 'AXLink', 'AXTextField', 'line')",
300
- },
301
- start_time: {
302
- type: "string",
303
- format: "date-time",
304
- description: "Start time: ISO 8601 UTC or relative (e.g., '16h ago', 'now')",
305
- },
306
- end_time: {
307
- type: "string",
308
- format: "date-time",
309
- description: "End time: ISO 8601 UTC or relative (e.g., 'now', '1h ago')",
310
- },
311
- app_name: {
312
- type: "string",
313
- description: "Filter by app name",
314
- },
315
- limit: {
316
- type: "integer",
317
- description: "Max results. Default: 50",
318
- default: 50,
319
- },
320
- offset: {
321
- type: "integer",
322
- description: "Skip N results for pagination. Default: 0",
323
- default: 0,
324
- },
187
+ id: { type: "integer", description: "Memory ID (omit to create new)" },
188
+ content: { type: "string", description: "Memory text" },
189
+ tags: { type: "array", items: { type: "string" }, description: "Categorization tags" },
190
+ importance: { type: "number", description: "0.0-1.0 (default 0.5)" },
191
+ source_context: { type: "object", description: "Optional source data links" },
192
+ delete: { type: "boolean", description: "Delete the memory identified by id" },
325
193
  },
326
194
  },
327
195
  },
328
196
  {
329
- name: "frame-context",
197
+ name: "send-notification",
330
198
  description:
331
- "Get accessibility text, parsed tree nodes, and extracted URLs for a specific frame. " +
332
- "Falls back to OCR data for legacy frames without accessibility data. " +
333
- "Use after finding a frame_id from search-content or search-elements to get full structural detail and URLs.",
334
- annotations: {
335
- title: "Frame Context",
336
- readOnlyHint: true,
337
- },
199
+ "Send a notification to the screenpipe desktop UI with optional action buttons. " +
200
+ "Actions can re-run pipes with context, call API endpoints, or open deep links.",
201
+ annotations: { title: "Send Notification", destructiveHint: false },
338
202
  inputSchema: {
339
203
  type: "object",
340
204
  properties: {
341
- frame_id: {
342
- type: "integer",
343
- description: "The frame ID to get context for (from search results)",
205
+ title: { type: "string", description: "Notification title" },
206
+ body: { type: "string", description: "Notification body (markdown supported)" },
207
+ pipe_name: { type: "string", description: "Name of the pipe sending this notification" },
208
+ timeout_secs: { type: "integer", description: "Auto-dismiss seconds (default 20)", default: 20 },
209
+ actions: {
210
+ type: "array",
211
+ description: "Up to 5 action buttons",
212
+ items: {
213
+ type: "object",
214
+ properties: {
215
+ id: { type: "string", description: "Unique action ID" },
216
+ label: { type: "string", description: "Button label" },
217
+ type: { type: "string", enum: ["pipe", "api", "deeplink", "dismiss"], description: "Action type" },
218
+ pipe: { type: "string", description: "Pipe name to run (type=pipe)" },
219
+ context: { type: "object", description: "Context passed to pipe (type=pipe)" },
220
+ url: { type: "string", description: "URL for api/deeplink actions" },
221
+ },
222
+ required: ["id", "label", "type"],
223
+ },
344
224
  },
345
225
  },
346
- required: ["frame_id"],
226
+ required: ["title", "pipe_name"],
347
227
  },
348
228
  },
349
229
  ];
350
230
 
351
- // List tools handler
352
231
  server.setRequestHandler(ListToolsRequestSchema, async () => {
353
- return { tools: BASE_TOOLS };
232
+ return { tools: TOOLS };
354
233
  });
355
234
 
356
- // MCP Resources - provide dynamic context data
235
+ // ---------------------------------------------------------------------------
236
+ // Resources — dynamic context only (no duplicated reference docs)
237
+ // ---------------------------------------------------------------------------
357
238
  const RESOURCES = [
358
239
  {
359
240
  uri: "screenpipe://context",
360
241
  name: "Current Context",
361
- description: "Current date/time and pre-computed timestamps for common time ranges",
242
+ description: "Current date/time, timezone, and pre-computed timestamps for common time ranges",
362
243
  mimeType: "application/json",
363
244
  },
364
- {
365
- uri: "screenpipe://guide",
366
- name: "Usage Guide",
367
- description: "How to use screenpipe search effectively",
368
- mimeType: "text/markdown",
369
- },
370
- {
371
- uri: "ui://search",
372
- name: "Search Dashboard",
373
- description: "Interactive search UI for exploring screen recordings and audio transcriptions",
374
- mimeType: "text/html",
375
- },
376
245
  ];
377
246
 
378
- // List resources handler
379
247
  server.setRequestHandler(ListResourcesRequestSchema, async () => {
380
248
  return { resources: RESOURCES };
381
249
  });
382
250
 
383
- // Read resource handler
384
251
  server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
385
252
  const { uri } = request.params;
386
- const dateInfo = getCurrentDateInfo();
387
- const now = Date.now();
388
-
389
- switch (uri) {
390
- case "screenpipe://context":
391
- return {
392
- contents: [
393
- {
394
- uri,
395
- mimeType: "application/json",
396
- text: JSON.stringify({
397
- current_time: dateInfo.isoDate,
398
- current_date_local: dateInfo.localDate,
253
+
254
+ if (uri === "screenpipe://context") {
255
+ const now = new Date();
256
+ const ms = now.getTime();
257
+ return {
258
+ contents: [
259
+ {
260
+ uri,
261
+ mimeType: "application/json",
262
+ text: JSON.stringify(
263
+ {
264
+ current_time: now.toISOString(),
265
+ current_date_local: now.toLocaleDateString("en-US", {
266
+ weekday: "long",
267
+ year: "numeric",
268
+ month: "long",
269
+ day: "numeric",
270
+ }),
399
271
  timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
400
272
  timestamps: {
401
- now: dateInfo.isoDate,
402
- one_hour_ago: new Date(now - 60 * 60 * 1000).toISOString(),
403
- three_hours_ago: new Date(now - 3 * 60 * 60 * 1000).toISOString(),
404
- today_start: `${new Date().toISOString().split("T")[0]}T00:00:00Z`,
405
- yesterday_start: `${new Date(now - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
406
- one_week_ago: new Date(now - 7 * 24 * 60 * 60 * 1000).toISOString(),
273
+ now: now.toISOString(),
274
+ one_hour_ago: new Date(ms - 60 * 60 * 1000).toISOString(),
275
+ three_hours_ago: new Date(ms - 3 * 60 * 60 * 1000).toISOString(),
276
+ today_start: `${now.toISOString().split("T")[0]}T00:00:00Z`,
277
+ yesterday_start: `${new Date(ms - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
278
+ one_week_ago: new Date(ms - 7 * 24 * 60 * 60 * 1000).toISOString(),
407
279
  },
408
- common_apps: ["Google Chrome", "Safari", "Slack", "zoom.us", "Microsoft Teams", "Code", "Terminal"],
409
- }, null, 2),
410
- },
411
- ],
412
- };
413
-
414
- case "screenpipe://guide":
415
- return {
416
- contents: [
417
- {
418
- uri,
419
- mimeType: "text/markdown",
420
- text: `# Screenpipe Search Guide
421
-
422
- ## Data Modalities
423
-
424
- Screenpipe captures four types of data:
425
- 1. **Accessibility** - Screen text via accessibility APIs (primary, preferred for screen content)
426
- 2. **OCR** - Screen text from screenshots (legacy fallback for apps without accessibility support)
427
- 3. **Audio** - Transcribed speech from microphone/system audio
428
- 4. **Input** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
429
-
430
- ## Quick Start
431
- - **Get recent activity**: Call search-content with no parameters
432
- - **Search screen text**: \`{"q": "search term", "content_type": "all"}\`
433
- - **Get keyboard input**: \`{"content_type": "input"}\`
434
- - **Get audio only**: \`{"content_type": "audio"}\`
435
-
436
- ## Common User Requests → Correct Tool Choice
437
- | User says | Use this tool | Key params |
438
- |-----------|--------------|------------|
439
- | "summarize my meeting/call" | search-content | content_type:"audio", NO q param, start_time |
440
- | "what did they/I say about X" | search-content | content_type:"audio", NO q param (scan results manually) |
441
- | "how long on X" / "which apps" / "time spent" | activity-summary | start_time, end_time |
442
- | "what was I doing" | activity-summary | start_time, end_time (then drill into search-content) |
443
- | "what was I reading/looking at" | search-content | content_type:"all", start_time |
444
-
445
- ## Behavior Rules
446
- - Act immediately on clear requests. NEVER ask "what time range?" or "which content type?" when the intent is obvious.
447
- - If search returns empty, silently retry with wider time range or fewer filters. Do NOT ask the user what to change.
448
- - For meetings: ALWAYS use content_type:"audio" and do NOT use the q param. Transcriptions are noisy — q filters too aggressively and misses relevant content.
449
-
450
- ## search-content
451
- | Parameter | Description | Default |
452
- |-----------|-------------|---------|
453
- | q | Search query | (none - returns all) |
454
- | content_type | all/ocr/audio/input/accessibility | all |
455
- | limit | Max results | 10 |
456
- | start_time | ISO 8601 UTC or relative (e.g. '16h ago') | (no filter) |
457
- | end_time | ISO 8601 UTC or relative (e.g. 'now') | (no filter) |
458
- | app_name | Filter by app | (no filter) |
459
- | include_frames | Include screenshots | false |
460
-
461
- ## Search Strategy (MANDATORY)
462
- 1. First search: ONLY use time params (start_time/end_time). No q, no app_name, no content_type. This gives ground truth of what's recorded.
463
- 2. Scan results to find correct app_name values and content patterns.
464
- 3. Only THEN narrow with filters using exact observed values. App names are case-sensitive and may differ from user input (e.g. "Discord" vs "Discord.exe").
465
- 4. The q param searches captured text (accessibility/OCR), NOT app names — an app can be visible without its name in the captured text.
466
- 5. NEVER report "no data found" after one filtered search. Verify with unfiltered time-only search first.
467
-
468
- ## Progressive Disclosure (Token-Efficient Strategy)
469
- 1. **Start with activity-summary** (~200 tokens) for broad questions ("what was I doing?")
470
- 2. **Narrow with search-content** (~500-1000 tokens) using filters from step 1
471
- 3. **Drill into search-elements** (~200 tokens each) for structural UI detail (buttons, links)
472
- 4. **Fetch frame-context** for URLs and accessibility tree of specific frames
473
- 5. **Screenshots** (include_frames=true) only when text isn't enough
474
-
475
- ## Chat History
476
- Previous screenpipe chat conversations are stored as individual JSON files in ~/.screenpipe/chats/{conversation-id}.json
477
- Each file contains: id, title, messages[], createdAt, updatedAt. You can read these files to reference or search previous conversations.
478
-
479
- ## Speaker Management
480
- screenpipe auto-identifies speakers in audio. API endpoints for managing them:
481
- - \`GET /speakers/unnamed?limit=10\` — list unnamed speakers
482
- - \`GET /speakers/search?name=John\` — search by name
483
- - \`POST /speakers/update\` with \`{"id": 5, "name": "John"}\` — rename a speaker
484
- - \`POST /speakers/merge\` with \`{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}\` — merge duplicates
485
- - \`GET /speakers/similar?speaker_id=5\` — find similar speakers for merging
486
- - \`POST /speakers/reassign\` — reassign audio chunk to different speaker
487
-
488
- ## Tips
489
- 1. Read screenpipe://context first to get current timestamps
490
- 2. Use activity-summary before search-content for broad overview questions
491
- 3. Use search-elements instead of search-content for targeted UI lookups (10x lighter)
492
- 4. Use content_type=input for "what did I type?" queries
493
- 5. Use content_type=accessibility for accessibility tree text
494
- 6. For large aggregations (e.g. "what apps did I use today?"), paginate with offset or suggest the user run raw SQL via \`curl -X POST http://localhost:3030/raw_sql\` for efficient GROUP BY queries
495
-
496
- ## Deep Links (Clickable References)
497
- When showing search results to users, create clickable links so they can jump to that exact moment.
498
-
499
- **ALWAYS prefer frame-based links for OCR results** (frame IDs are exact DB keys):
500
- - \`[10:30 AM — Chrome](screenpipe://frame/12345)\` — use \`content.frame_id\` from OCR results
501
-
502
- **Use timestamp links only for audio results** (which have no frame_id):
503
- - \`[meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z)\` — use exact \`timestamp\` from audio results
504
-
505
- **NEVER fabricate frame IDs or timestamps.** Only use values copied from actual search results.`,
506
- },
507
- ],
508
- };
509
-
510
- case "ui://search": {
511
- // MCP App UI - Interactive search dashboard
512
- const uiHtmlPath = path.join(__dirname, "..", "ui", "search.html");
513
- let htmlContent: string;
514
- try {
515
- htmlContent = fs.readFileSync(uiHtmlPath, "utf-8");
516
- } catch {
517
- // Fallback: serve embedded minimal UI if file not found
518
- htmlContent = `<!DOCTYPE html>
519
- <html>
520
- <head>
521
- <style>
522
- body { font-family: system-ui; background: #0a0a0a; color: #fff; padding: 20px; }
523
- input { width: 100%; padding: 10px; margin-bottom: 10px; background: #1a1a1a; border: 1px solid #333; color: #fff; border-radius: 6px; }
524
- button { padding: 10px 20px; background: #fff; color: #000; border: none; border-radius: 6px; cursor: pointer; }
525
- #results { margin-top: 20px; }
526
- .result { background: #1a1a1a; padding: 12px; margin: 8px 0; border-radius: 8px; border: 1px solid #333; }
527
- </style>
528
- </head>
529
- <body>
530
- <h2>screenpipe search</h2>
531
- <input id="q" placeholder="search..." onkeydown="if(event.key==='Enter')search()"/>
532
- <button onclick="search()">search</button>
533
- <div id="results"></div>
534
- <script>
535
- function search() {
536
- window.parent.postMessage({jsonrpc:'2.0',method:'tools/call',params:{name:'search-content',arguments:{q:document.getElementById('q').value,limit:20}}},'*');
537
- }
538
- window.addEventListener('message',e=>{
539
- if(e.data?.result||e.data?.method==='tool/result'){
540
- const r=e.data.result||e.data.params?.result;
541
- const d=r?.data||r||[];
542
- document.getElementById('results').innerHTML=d.map(x=>'<div class="result"><b>'+((x.type||'')+'</b> '+(x.content?.app_name||'')+': '+(x.content?.text||x.content?.transcription||'').substring(0,200))+'</div>').join('');
543
- }
544
- });
545
- </script>
546
- </body>
547
- </html>`;
548
- }
549
- return {
550
- contents: [
551
- {
552
- uri,
553
- mimeType: "text/html",
554
- text: htmlContent,
555
- },
556
- ],
557
- };
558
- }
559
-
560
- default:
561
- throw new Error(`Unknown resource: ${uri}`);
562
- }
563
- });
564
-
565
- // MCP Prompts - static interaction templates
566
- const PROMPTS = [
567
- {
568
- name: "search-recent",
569
- description: "Search recent screen activity",
570
- arguments: [
571
- { name: "query", description: "Optional search term", required: false },
572
- { name: "hours", description: "Hours to look back (default: 1)", required: false },
573
- ],
574
- },
575
- {
576
- name: "find-in-app",
577
- description: "Find content from a specific application",
578
- arguments: [
579
- { name: "app", description: "App name (e.g., Chrome, Slack)", required: true },
580
- { name: "query", description: "Optional search term", required: false },
581
- ],
582
- },
583
- {
584
- name: "meeting-notes",
585
- description: "Get audio transcriptions from meetings",
586
- arguments: [
587
- { name: "hours", description: "Hours to look back (default: 3)", required: false },
588
- ],
589
- },
590
- ];
591
-
592
- // List prompts handler
593
- server.setRequestHandler(ListPromptsRequestSchema, async () => {
594
- return { prompts: PROMPTS };
595
- });
596
-
597
- // Get prompt handler
598
- server.setRequestHandler(GetPromptRequestSchema, async (request) => {
599
- const { name, arguments: promptArgs } = request.params;
600
- const dateInfo = getCurrentDateInfo();
601
- const now = Date.now();
602
-
603
- switch (name) {
604
- case "search-recent": {
605
- const query = promptArgs?.query || "";
606
- const hours = parseInt(promptArgs?.hours || "1", 10);
607
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
608
-
609
- return {
610
- description: `Search recent activity (last ${hours} hour${hours > 1 ? "s" : ""})`,
611
- messages: [
612
- {
613
- role: "user" as const,
614
- content: {
615
- type: "text" as const,
616
- text: `Search screenpipe for recent activity.
617
-
618
- Current time: ${dateInfo.isoDate}
619
-
620
- Use search-content with:
621
- ${query ? `- q: "${query}"` : "- No query filter (get all content)"}
622
- - start_time: "${startTime}"
623
- - limit: 50`,
624
- },
625
- },
626
- ],
627
- };
628
- }
629
-
630
- case "find-in-app": {
631
- const app = promptArgs?.app || "Google Chrome";
632
- const query = promptArgs?.query || "";
633
-
634
- return {
635
- description: `Find content from ${app}`,
636
- messages: [
637
- {
638
- role: "user" as const,
639
- content: {
640
- type: "text" as const,
641
- text: `Search screenpipe for content from ${app}.
642
-
643
- Current time: ${dateInfo.isoDate}
644
-
645
- Use search-content with:
646
- - app_name: "${app}"
647
- ${query ? `- q: "${query}"` : "- No query filter"}
648
- - content_type: "all"
649
- - limit: 50`,
650
280
  },
651
- },
652
- ],
653
- };
654
- }
655
-
656
- case "meeting-notes": {
657
- const hours = parseInt(promptArgs?.hours || "3", 10);
658
- const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
659
-
660
- return {
661
- description: `Get meeting transcriptions (last ${hours} hours)`,
662
- messages: [
663
- {
664
- role: "user" as const,
665
- content: {
666
- type: "text" as const,
667
- text: `Get audio transcriptions from recent meetings.
668
-
669
- Current time: ${dateInfo.isoDate}
670
-
671
- Use search-content with:
672
- - content_type: "audio"
673
- - start_time: "${startTime}"
674
- - limit: 100
675
-
676
- Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
677
- },
678
- },
679
- ],
680
- };
681
- }
682
-
683
- default:
684
- throw new Error(`Unknown prompt: ${name}`);
281
+ null,
282
+ 2
283
+ ),
284
+ },
285
+ ],
286
+ };
685
287
  }
288
+
289
+ throw new Error(`Unknown resource: ${uri}`);
686
290
  });
687
291
 
688
- // Helper function to make HTTP requests
292
+ // ---------------------------------------------------------------------------
293
+ // Helper
294
+ // ---------------------------------------------------------------------------
689
295
  async function fetchAPI(
690
296
  endpoint: string,
691
297
  options: RequestInit = {}
@@ -700,7 +306,9 @@ async function fetchAPI(
700
306
  });
701
307
  }
702
308
 
703
- // Call tool handler
309
+ // ---------------------------------------------------------------------------
310
+ // Tool handlers
311
+ // ---------------------------------------------------------------------------
704
312
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
705
313
  const { name, arguments: args } = request.params;
706
314
 
@@ -720,9 +328,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
720
328
  }
721
329
 
722
330
  const response = await fetchAPI(`/search?${params.toString()}`);
723
- if (!response.ok) {
724
- throw new Error(`HTTP error: ${response.status}`);
725
- }
331
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
726
332
 
727
333
  const data = await response.json();
728
334
  const results = data.data || [];
@@ -733,13 +339,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
733
339
  content: [
734
340
  {
735
341
  type: "text",
736
- text: "No results found. Try: broader search terms, different content_type, or wider time range.",
342
+ text: "No results found. Try: broader terms, different content_type, or wider time range.",
737
343
  },
738
344
  ],
739
345
  };
740
346
  }
741
347
 
742
- // Build content array with text and optional images
743
348
  const contentItems: Array<
744
349
  | { type: "text"; text: string }
745
350
  | { type: "image"; data: string; mimeType: string }
@@ -756,9 +361,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
756
361
  const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
757
362
  formattedResults.push(
758
363
  `[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
759
- `${content.timestamp || ""}\n` +
760
- `${content.text || ""}` +
761
- tagsStr
364
+ `${content.timestamp || ""}\n` +
365
+ `${content.text || ""}` +
366
+ tagsStr
762
367
  );
763
368
  if (includeFrames && content.frame) {
764
369
  images.push({
@@ -770,29 +375,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
770
375
  const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
771
376
  formattedResults.push(
772
377
  `[Audio] ${content.device_name || "?"}\n` +
773
- `${content.timestamp || ""}\n` +
774
- `${content.transcription || ""}` +
775
- tagsStr
378
+ `${content.timestamp || ""}\n` +
379
+ `${content.transcription || ""}` +
380
+ tagsStr
776
381
  );
777
382
  } else if (result.type === "UI" || result.type === "Accessibility") {
778
383
  formattedResults.push(
779
384
  `[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
780
- `${content.timestamp || ""}\n` +
781
- `${content.text || ""}`
385
+ `${content.timestamp || ""}\n` +
386
+ `${content.text || ""}`
387
+ );
388
+ } else if (result.type === "Memory") {
389
+ const tagsStr = content.tags?.length ? ` [${content.tags.join(", ")}]` : "";
390
+ const importance =
391
+ content.importance != null ? ` (importance: ${content.importance})` : "";
392
+ formattedResults.push(
393
+ `[Memory #${content.id}]${tagsStr}${importance}\n` +
394
+ `${content.created_at || ""}\n` +
395
+ `${content.content || ""}`
782
396
  );
783
397
  }
784
398
  }
785
399
 
786
- // Header with pagination info
787
- const header = `Results: ${results.length}/${pagination.total || "?"}` +
788
- (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
400
+ const header =
401
+ `Results: ${results.length}/${pagination.total || "?"}` +
402
+ (pagination.total > results.length
403
+ ? ` (use offset=${(pagination.offset || 0) + results.length} for more)`
404
+ : "");
789
405
 
790
406
  contentItems.push({
791
407
  type: "text",
792
408
  text: header + "\n\n" + formattedResults.join("\n---\n"),
793
409
  });
794
410
 
795
- // Add images if requested
796
411
  for (const img of images) {
797
412
  contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
798
413
  contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
@@ -801,178 +416,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
801
416
  return { content: contentItems };
802
417
  }
803
418
 
804
- case "export-video": {
805
- const startTime = args.start_time as string;
806
- const endTime = args.end_time as string;
807
- const fps = (args.fps as number) || 1.0;
808
-
809
- // Validate time inputs
810
- if (!startTime || !endTime) {
811
- return {
812
- content: [
813
- {
814
- type: "text",
815
- text: "Error: Both start_time and end_time are required in ISO 8601 format (e.g., '2024-01-15T10:00:00Z')",
816
- },
817
- ],
818
- };
819
- }
820
-
821
- // Step 1: Query the search API to get frame IDs for the time range
822
- const searchParams = new URLSearchParams({
823
- content_type: "ocr",
824
- start_time: startTime,
825
- end_time: endTime,
826
- limit: "10000", // Get all frames in range
827
- });
828
-
829
- const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
830
- if (!searchResponse.ok) {
831
- throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
832
- }
833
-
834
- const searchData = await searchResponse.json();
835
- const results = searchData.data || [];
836
-
837
- if (results.length === 0) {
838
- return {
839
- content: [
840
- {
841
- type: "text",
842
- text: `No screen recordings found between ${startTime} and ${endTime}. Make sure screenpipe was recording during this time period.`,
843
- },
844
- ],
845
- };
846
- }
847
-
848
- // Extract unique frame IDs from OCR results
849
- const frameIds: number[] = [];
850
- const seenIds = new Set<number>();
851
- for (const result of results) {
852
- if (result.type === "OCR" && result.content?.frame_id) {
853
- const frameId = result.content.frame_id;
854
- if (!seenIds.has(frameId)) {
855
- seenIds.add(frameId);
856
- frameIds.push(frameId);
857
- }
858
- }
859
- }
860
-
861
- if (frameIds.length === 0) {
862
- return {
863
- content: [
864
- {
865
- type: "text",
866
- text: `Found ${results.length} results but no valid frame IDs. The recordings may be audio-only.`,
867
- },
868
- ],
869
- };
870
- }
871
-
872
- // Sort frame IDs
873
- frameIds.sort((a, b) => a - b);
874
-
875
- // Step 2: Connect to WebSocket and export video
876
- // Send frame_ids in message body to avoid URL length limits
877
- const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
878
-
879
- const exportResult = await new Promise<{
880
- success: boolean;
881
- filePath?: string;
882
- error?: string;
883
- frameCount?: number;
884
- }>((resolve) => {
885
- const ws = new WebSocket(wsUrl);
886
- let resolved = false;
887
-
888
- const timeout = setTimeout(() => {
889
- if (!resolved) {
890
- resolved = true;
891
- ws.close();
892
- resolve({ success: false, error: "Export timed out after 5 minutes" });
893
- }
894
- }, 5 * 60 * 1000); // 5 minute timeout
895
-
896
- ws.on("open", () => {
897
- // Send frame_ids in message body to avoid URL length limits
898
- ws.send(JSON.stringify({ frame_ids: frameIds }));
899
- });
900
-
901
- ws.on("error", (error) => {
902
- if (!resolved) {
903
- resolved = true;
904
- clearTimeout(timeout);
905
- resolve({ success: false, error: `WebSocket error: ${error.message}` });
906
- }
907
- });
908
-
909
- ws.on("close", () => {
910
- if (!resolved) {
911
- resolved = true;
912
- clearTimeout(timeout);
913
- resolve({ success: false, error: "Connection closed unexpectedly" });
914
- }
915
- });
916
-
917
- ws.on("message", (data) => {
918
- try {
919
- const message = JSON.parse(data.toString());
920
-
921
- if (message.status === "completed" && message.video_data) {
922
- // Save video to temp file
923
- const tempDir = os.tmpdir();
924
- const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
925
- const filename = `screenpipe_export_${timestamp}.mp4`;
926
- const filePath = path.join(tempDir, filename);
927
-
928
- fs.writeFileSync(filePath, Buffer.from(message.video_data));
929
-
930
- resolved = true;
931
- clearTimeout(timeout);
932
- ws.close();
933
- resolve({
934
- success: true,
935
- filePath,
936
- frameCount: frameIds.length,
937
- });
938
- } else if (message.status === "error") {
939
- resolved = true;
940
- clearTimeout(timeout);
941
- ws.close();
942
- resolve({ success: false, error: message.error || "Export failed" });
943
- }
944
- // Ignore "extracting" and "encoding" status updates
945
- } catch (parseError) {
946
- // Ignore parse errors for progress messages
947
- }
948
- });
949
- });
950
-
951
- if (exportResult.success && exportResult.filePath) {
952
- return {
953
- content: [
954
- {
955
- type: "text",
956
- text: `Successfully exported video!\n\n` +
957
- `File: ${exportResult.filePath}\n` +
958
- `Frames: ${exportResult.frameCount}\n` +
959
- `Time range: ${startTime} to ${endTime}\n` +
960
- `FPS: ${fps}`,
961
- },
962
- ],
963
- };
964
- } else {
965
- return {
966
- content: [
967
- {
968
- type: "text",
969
- text: `Failed to export video: ${exportResult.error}`,
970
- },
971
- ],
972
- };
973
- }
974
- }
975
-
976
419
  case "list-meetings": {
977
420
  const params = new URLSearchParams();
978
421
  for (const [key, value] of Object.entries(args)) {
@@ -982,20 +425,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
982
425
  }
983
426
 
984
427
  const response = await fetchAPI(`/meetings?${params.toString()}`);
985
- if (!response.ok) {
986
- throw new Error(`HTTP error: ${response.status}`);
987
- }
428
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
988
429
 
989
430
  const meetings = await response.json();
990
431
 
991
432
  if (!Array.isArray(meetings) || meetings.length === 0) {
992
433
  return {
993
- content: [
994
- {
995
- type: "text",
996
- text: "No meetings found. Make sure screenpipe is running in smart transcription mode.",
997
- },
998
- ],
434
+ content: [{ type: "text", text: "No meetings found in the given time range." }],
999
435
  };
1000
436
  }
1001
437
 
@@ -1010,10 +446,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1010
446
 
1011
447
  return {
1012
448
  content: [
1013
- {
1014
- type: "text",
1015
- text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}`,
1016
- },
449
+ { type: "text", text: `Meetings: ${meetings.length}\n\n${formatted.join("\n---\n")}` },
1017
450
  ],
1018
451
  };
1019
452
  }
@@ -1027,29 +460,31 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1027
460
  }
1028
461
 
1029
462
  const response = await fetchAPI(`/activity-summary?${params.toString()}`);
1030
- if (!response.ok) {
1031
- throw new Error(`HTTP error: ${response.status}`);
1032
- }
463
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1033
464
 
1034
465
  const data = await response.json();
1035
466
 
1036
- // Format apps
1037
467
  const appsLines = (data.apps || []).map(
1038
- (a: { name: string; frame_count: number; minutes: number; first_seen?: string; last_seen?: string }) => {
1039
- const timeSpan = a.first_seen && a.last_seen
1040
- ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
1041
- : "";
468
+ (a: {
469
+ name: string;
470
+ frame_count: number;
471
+ minutes: number;
472
+ first_seen?: string;
473
+ last_seen?: string;
474
+ }) => {
475
+ const timeSpan =
476
+ a.first_seen && a.last_seen
477
+ ? `, ${a.first_seen.slice(11, 16)}–${a.last_seen.slice(11, 16)} UTC`
478
+ : "";
1042
479
  return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
1043
480
  }
1044
481
  );
1045
482
 
1046
- // Format audio
1047
483
  const speakerLines = (data.audio_summary?.speakers || []).map(
1048
484
  (s: { name: string; segment_count: number }) =>
1049
485
  ` ${s.name}: ${s.segment_count} segments`
1050
486
  );
1051
487
 
1052
- // Format recent texts
1053
488
  const textLines = (data.recent_texts || []).map(
1054
489
  (t: { text: string; app_name: string; timestamp: string }) =>
1055
490
  ` [${t.app_name}] ${t.text}`
@@ -1081,9 +516,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1081
516
  }
1082
517
 
1083
518
  const response = await fetchAPI(`/elements?${params.toString()}`);
1084
- if (!response.ok) {
1085
- throw new Error(`HTTP error: ${response.status}`);
1086
- }
519
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1087
520
 
1088
521
  const data = await response.json();
1089
522
  const elements = data.data || [];
@@ -1131,21 +564,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1131
564
  case "frame-context": {
1132
565
  const frameId = args.frame_id as number;
1133
566
  if (!frameId) {
1134
- return {
1135
- content: [{ type: "text", text: "Error: frame_id is required" }],
1136
- };
567
+ return { content: [{ type: "text", text: "Error: frame_id is required" }] };
1137
568
  }
1138
569
 
1139
570
  const response = await fetchAPI(`/frames/${frameId}/context`);
1140
- if (!response.ok) {
1141
- throw new Error(`HTTP error: ${response.status}`);
1142
- }
571
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1143
572
 
1144
573
  const data = await response.json();
1145
-
1146
- const lines = [
1147
- `Frame ${data.frame_id} (source: ${data.text_source})`,
1148
- ];
574
+ const lines = [`Frame ${data.frame_id} (source: ${data.text_source})`];
1149
575
 
1150
576
  if (data.urls?.length) {
1151
577
  lines.push("", "URLs:", ...data.urls.map((u: string) => ` ${u}`));
@@ -1163,27 +589,227 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1163
589
  }
1164
590
 
1165
591
  if (data.text) {
1166
- // Truncate to avoid massive outputs
1167
- const truncated = data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
592
+ const truncated =
593
+ data.text.length > 2000 ? data.text.substring(0, 2000) + "..." : data.text;
1168
594
  lines.push("", "Full text:", truncated);
1169
595
  }
1170
596
 
1171
597
  return { content: [{ type: "text", text: lines.join("\n") }] };
1172
598
  }
1173
599
 
600
+ case "export-video": {
601
+ const startTime = args.start_time as string;
602
+ const endTime = args.end_time as string;
603
+ const fps = (args.fps as number) || 1.0;
604
+
605
+ if (!startTime || !endTime) {
606
+ return {
607
+ content: [{ type: "text", text: "Error: start_time and end_time are required" }],
608
+ };
609
+ }
610
+
611
+ // Get frame IDs for the time range
612
+ const searchParams = new URLSearchParams({
613
+ content_type: "ocr",
614
+ start_time: startTime,
615
+ end_time: endTime,
616
+ limit: "10000",
617
+ });
618
+
619
+ const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
620
+ if (!searchResponse.ok) {
621
+ throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
622
+ }
623
+
624
+ const searchData = await searchResponse.json();
625
+ const results = searchData.data || [];
626
+
627
+ if (results.length === 0) {
628
+ return {
629
+ content: [
630
+ {
631
+ type: "text",
632
+ text: `No screen recordings found between ${startTime} and ${endTime}.`,
633
+ },
634
+ ],
635
+ };
636
+ }
637
+
638
+ const frameIds: number[] = [];
639
+ const seenIds = new Set<number>();
640
+ for (const result of results) {
641
+ if (result.type === "OCR" && result.content?.frame_id) {
642
+ const frameId = result.content.frame_id;
643
+ if (!seenIds.has(frameId)) {
644
+ seenIds.add(frameId);
645
+ frameIds.push(frameId);
646
+ }
647
+ }
648
+ }
649
+
650
+ if (frameIds.length === 0) {
651
+ return {
652
+ content: [{ type: "text", text: "No valid frame IDs found (audio-only?)." }],
653
+ };
654
+ }
655
+
656
+ frameIds.sort((a, b) => a - b);
657
+
658
+ const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
659
+
660
+ const exportResult = await new Promise<{
661
+ success: boolean;
662
+ filePath?: string;
663
+ error?: string;
664
+ frameCount?: number;
665
+ }>((resolve) => {
666
+ const ws = new WebSocket(wsUrl);
667
+ let resolved = false;
668
+
669
+ const timeout = setTimeout(() => {
670
+ if (!resolved) {
671
+ resolved = true;
672
+ ws.close();
673
+ resolve({ success: false, error: "Export timed out after 5 minutes" });
674
+ }
675
+ }, 5 * 60 * 1000);
676
+
677
+ ws.on("open", () => {
678
+ ws.send(JSON.stringify({ frame_ids: frameIds }));
679
+ });
680
+
681
+ ws.on("error", (error) => {
682
+ if (!resolved) {
683
+ resolved = true;
684
+ clearTimeout(timeout);
685
+ resolve({ success: false, error: `WebSocket error: ${error.message}` });
686
+ }
687
+ });
688
+
689
+ ws.on("close", () => {
690
+ if (!resolved) {
691
+ resolved = true;
692
+ clearTimeout(timeout);
693
+ resolve({ success: false, error: "Connection closed unexpectedly" });
694
+ }
695
+ });
696
+
697
+ ws.on("message", (data) => {
698
+ try {
699
+ const message = JSON.parse(data.toString());
700
+ if (message.status === "completed" && message.video_data) {
701
+ const tempDir = os.tmpdir();
702
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
703
+ const filename = `screenpipe_export_${timestamp}.mp4`;
704
+ const filePath = path.join(tempDir, filename);
705
+ fs.writeFileSync(filePath, Buffer.from(message.video_data));
706
+ resolved = true;
707
+ clearTimeout(timeout);
708
+ ws.close();
709
+ resolve({ success: true, filePath, frameCount: frameIds.length });
710
+ } else if (message.status === "error") {
711
+ resolved = true;
712
+ clearTimeout(timeout);
713
+ ws.close();
714
+ resolve({ success: false, error: message.error || "Export failed" });
715
+ }
716
+ } catch {
717
+ // Ignore parse errors for progress messages
718
+ }
719
+ });
720
+ });
721
+
722
+ if (exportResult.success && exportResult.filePath) {
723
+ return {
724
+ content: [
725
+ {
726
+ type: "text",
727
+ text:
728
+ `Video exported: ${exportResult.filePath}\n` +
729
+ `Frames: ${exportResult.frameCount} | ${startTime} → ${endTime} | ${fps} fps`,
730
+ },
731
+ ],
732
+ };
733
+ } else {
734
+ return {
735
+ content: [{ type: "text", text: `Export failed: ${exportResult.error}` }],
736
+ };
737
+ }
738
+ }
739
+
740
+ case "update-memory": {
741
+ if (args.delete && args.id) {
742
+ const response = await fetchAPI(`/memories/${args.id}`, { method: "DELETE" });
743
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
744
+ return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
745
+ }
746
+ if (args.id) {
747
+ const body: Record<string, unknown> = {};
748
+ if (args.content !== undefined) body.content = args.content;
749
+ if (args.tags !== undefined) body.tags = args.tags;
750
+ if (args.importance !== undefined) body.importance = args.importance;
751
+ if (args.source_context !== undefined) body.source_context = args.source_context;
752
+ const response = await fetchAPI(`/memories/${args.id}`, {
753
+ method: "PUT",
754
+ body: JSON.stringify(body),
755
+ });
756
+ if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
757
+ const memory = await response.json();
758
+ return {
759
+ content: [{ type: "text", text: `Memory ${memory.id} updated: "${memory.content}"` }],
760
+ };
761
+ }
762
+ if (!args.content) {
763
+ return {
764
+ content: [{ type: "text", text: "Error: 'content' is required to create a memory" }],
765
+ };
766
+ }
767
+ const memoryBody: Record<string, unknown> = {
768
+ content: args.content,
769
+ source: "mcp",
770
+ tags: args.tags || [],
771
+ importance: args.importance ?? 0.5,
772
+ };
773
+ if (args.source_context) memoryBody.source_context = args.source_context;
774
+ const memoryResponse = await fetchAPI("/memories", {
775
+ method: "POST",
776
+ body: JSON.stringify(memoryBody),
777
+ });
778
+ if (!memoryResponse.ok) throw new Error(`HTTP error: ${memoryResponse.status}`);
779
+ const newMemory = await memoryResponse.json();
780
+ return {
781
+ content: [
782
+ { type: "text", text: `Memory created (id: ${newMemory.id}): "${newMemory.content}"` },
783
+ ],
784
+ };
785
+ }
786
+
787
+ case "send-notification": {
788
+ const notifBody: Record<string, unknown> = {
789
+ title: args.title,
790
+ pipe_name: args.pipe_name,
791
+ };
792
+ if (args.body) notifBody.body = args.body;
793
+ if (args.timeout_secs) notifBody.timeout_secs = args.timeout_secs;
794
+ if (args.actions) notifBody.actions = args.actions;
795
+ const notifResponse = await fetchAPI("/notify", {
796
+ method: "POST",
797
+ body: JSON.stringify(notifBody),
798
+ });
799
+ if (!notifResponse.ok) throw new Error(`HTTP error: ${notifResponse.status}`);
800
+ const notifResult = await notifResponse.json();
801
+ return {
802
+ content: [{ type: "text", text: `Notification sent (id: ${notifResult.id})` }],
803
+ };
804
+ }
805
+
1174
806
  default:
1175
807
  throw new Error(`Unknown tool: ${name}`);
1176
808
  }
1177
809
  } catch (error) {
1178
- const errorMessage =
1179
- error instanceof Error ? error.message : "Unknown error";
810
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
1180
811
  return {
1181
- content: [
1182
- {
1183
- type: "text",
1184
- text: `Error executing ${name}: ${errorMessage}`,
1185
- },
1186
- ],
812
+ content: [{ type: "text", text: `Error executing ${name}: ${errorMessage}` }],
1187
813
  };
1188
814
  }
1189
815
  });