screenpipe-mcp 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +120 -59
  2. package/package.json +1 -1
  3. package/src/index.ts +123 -58
package/dist/index.js CHANGED
@@ -64,63 +64,67 @@ const server = new index_js_1.Server({
64
64
  },
65
65
  });
66
66
  // ---------------------------------------------------------------------------
67
- // Tools — minimal descriptions, no behavioral guidance (that belongs in resources)
67
+ // Tools
68
68
  // ---------------------------------------------------------------------------
69
69
  const TOOLS = [
70
70
  {
71
71
  name: "search-content",
72
72
  description: "Search screen text, audio transcriptions, input events, and memories. " +
73
- "Returns timestamped results with app context. Call with no params for recent activity.",
74
- annotations: { title: "Search Content", readOnlyHint: true },
73
+ "Returns timestamped results with app context. " +
74
+ "IMPORTANT: prefer activity-summary for broad questions ('what was I doing?'). " +
75
+ "Use search-content only when you need specific text/content. " +
76
+ "Start with limit=5, increase only if needed. Results can be large — use max_content_length=500 to truncate.",
77
+ annotations: { title: "Search Content", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
75
78
  inputSchema: {
76
79
  type: "object",
77
80
  properties: {
78
81
  q: {
79
82
  type: "string",
80
- description: "Full-text search query. Omit to return all content in time range.",
83
+ description: "Full-text search query. Omit to return all content in time range. Avoid for audio — transcriptions are noisy, q filters too aggressively.",
81
84
  },
82
85
  content_type: {
83
86
  type: "string",
84
87
  enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
85
- description: "Filter by content type. Default: 'all'.",
88
+ description: "Filter by content type. 'accessibility' is preferred for screen text (OS-native). 'ocr' is fallback for apps without accessibility support. Default: 'all'.",
86
89
  default: "all",
87
90
  },
88
- limit: { type: "integer", description: "Max results (default 10)", default: 10 },
89
- offset: { type: "integer", description: "Pagination offset", default: 0 },
91
+ limit: { type: "integer", description: "Max results (default 10, max 20). Start with 5 for exploration.", default: 10 },
92
+ offset: { type: "integer", description: "Pagination offset. Use when results say 'use offset=N for more'.", default: 0 },
90
93
  start_time: {
91
94
  type: "string",
92
- description: "ISO 8601 UTC or relative (e.g. '2h ago')",
95
+ description: "ISO 8601 UTC or relative (e.g. '2h ago', '1d ago'). Always provide to avoid scanning entire history.",
93
96
  },
94
97
  end_time: {
95
98
  type: "string",
96
- description: "ISO 8601 UTC or relative (e.g. 'now')",
99
+ description: "ISO 8601 UTC or relative (e.g. 'now'). Defaults to now.",
97
100
  },
98
- app_name: { type: "string", description: "Filter by app name" },
99
- window_name: { type: "string", description: "Filter by window title" },
100
- min_length: { type: "integer", description: "Min content length" },
101
- max_length: { type: "integer", description: "Max content length" },
101
+ app_name: { type: "string", description: "Filter by app name (e.g. 'Google Chrome', 'Slack', 'zoom.us'). Case-sensitive." },
102
+ window_name: { type: "string", description: "Filter by window title substring" },
103
+ min_length: { type: "integer", description: "Min content length in characters" },
104
+ max_length: { type: "integer", description: "Max content length in characters" },
102
105
  include_frames: {
103
106
  type: "boolean",
104
- description: "Include base64 screenshots (OCR only)",
107
+ description: "Include base64 screenshots (OCR only). Warning: large response.",
105
108
  default: false,
106
109
  },
107
- speaker_ids: { type: "string", description: "Comma-separated speaker IDs" },
108
- speaker_name: { type: "string", description: "Filter audio by speaker name" },
110
+ speaker_ids: { type: "string", description: "Comma-separated speaker IDs to filter audio" },
111
+ speaker_name: { type: "string", description: "Filter audio by speaker name (case-insensitive partial match)" },
109
112
  max_content_length: {
110
113
  type: "integer",
111
- description: "Truncate each result via middle-truncation",
114
+ description: "Truncate each result's text via middle-truncation. Use 200-500 to keep responses compact.",
112
115
  },
113
116
  },
114
117
  },
115
118
  },
116
119
  {
117
120
  name: "list-meetings",
118
- description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees.",
119
- annotations: { title: "List Meetings", readOnlyHint: true },
121
+ description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees. " +
122
+ "Only available when screenpipe runs in smart transcription mode.",
123
+ annotations: { title: "List Meetings", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
120
124
  inputSchema: {
121
125
  type: "object",
122
126
  properties: {
123
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
127
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '1d ago')" },
124
128
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
125
129
  limit: { type: "integer", description: "Max results (default 20)", default: 20 },
126
130
  offset: { type: "integer", description: "Pagination offset", default: 0 },
@@ -130,14 +134,15 @@ const TOOLS = [
130
134
  {
131
135
  name: "activity-summary",
132
136
  description: "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
133
- "Use for 'how long on X?', 'which apps?', 'what was I doing?' questions.",
134
- annotations: { title: "Activity Summary", readOnlyHint: true },
137
+ "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
138
+ "Only escalate to search-content if you need specific text content.",
139
+ annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
135
140
  inputSchema: {
136
141
  type: "object",
137
142
  properties: {
138
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
139
- end_time: { type: "string", description: "ISO 8601 UTC or relative" },
140
- app_name: { type: "string", description: "Optional app name filter" },
143
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '3h ago')" },
144
+ end_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. 'now')" },
145
+ app_name: { type: "string", description: "Optional app name filter to focus on one app" },
141
146
  },
142
147
  required: ["start_time", "end_time"],
143
148
  },
@@ -145,85 +150,89 @@ const TOOLS = [
145
150
  {
146
151
  name: "search-elements",
147
152
  description: "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
148
- "Lighter than search-content for targeted UI lookups.",
149
- annotations: { title: "Search Elements", readOnlyHint: true },
153
+ "Lighter than search-content for targeted UI lookups. " +
154
+ "Use when you need to find specific UI controls or page structure, not general content.",
155
+ annotations: { title: "Search Elements", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
150
156
  inputSchema: {
151
157
  type: "object",
152
158
  properties: {
153
159
  q: { type: "string", description: "Full-text search on element text" },
154
- frame_id: { type: "integer", description: "Filter to specific frame" },
160
+ frame_id: { type: "integer", description: "Filter to specific frame ID from search results" },
155
161
  source: {
156
162
  type: "string",
157
163
  enum: ["accessibility", "ocr"],
158
- description: "Element source filter",
164
+ description: "Element source. 'accessibility' is preferred (OS-native tree). 'ocr' for apps without a11y.",
159
165
  },
160
- role: { type: "string", description: "Element role (e.g. AXButton, AXLink)" },
166
+ role: { type: "string", description: "Element role filter (e.g. 'AXButton', 'AXLink', 'AXTextField')" },
161
167
  start_time: { type: "string", description: "ISO 8601 UTC or relative" },
162
168
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
163
169
  app_name: { type: "string", description: "Filter by app name" },
164
- limit: { type: "integer", description: "Max results (default 50)", default: 50 },
170
+ limit: { type: "integer", description: "Max results (default 50). Start with 10-20.", default: 50 },
165
171
  offset: { type: "integer", description: "Pagination offset", default: 0 },
166
172
  },
167
173
  },
168
174
  },
169
175
  {
170
176
  name: "frame-context",
171
- description: "Get accessibility text, parsed tree nodes, and URLs for a specific frame ID.",
172
- annotations: { title: "Frame Context", readOnlyHint: true },
177
+ description: "Get full accessibility text, parsed tree nodes, and URLs for a specific frame ID. " +
178
+ "Use after search-content to get detailed context for a specific moment.",
179
+ annotations: { title: "Frame Context", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
173
180
  inputSchema: {
174
181
  type: "object",
175
182
  properties: {
176
- frame_id: { type: "integer", description: "Frame ID from search results" },
183
+ frame_id: { type: "integer", description: "Frame ID from search-content results (content.frame_id field)" },
177
184
  },
178
185
  required: ["frame_id"],
179
186
  },
180
187
  },
181
188
  {
182
189
  name: "export-video",
183
- description: "Export an MP4 video of screen recordings for a time range.",
184
- annotations: { title: "Export Video", destructiveHint: true },
190
+ description: "Export an MP4 video of screen recordings for a time range. " +
191
+ "Returns the file path. Can take a few minutes for long ranges.",
192
+ annotations: { title: "Export Video", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
185
193
  inputSchema: {
186
194
  type: "object",
187
195
  properties: {
188
196
  start_time: { type: "string", description: "ISO 8601 UTC or relative" },
189
197
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
190
- fps: { type: "number", description: "Output FPS (default 1.0)", default: 1.0 },
198
+ fps: { type: "number", description: "Output FPS (default 1.0). Higher = smoother but larger file.", default: 1.0 },
191
199
  },
192
200
  required: ["start_time", "end_time"],
193
201
  },
194
202
  },
195
203
  {
196
204
  name: "update-memory",
197
- description: "Create, update, or delete a persistent memory (facts, preferences, decisions). " +
198
- "Retrieve memories via search-content with content_type='memory'.",
199
- annotations: { title: "Update Memory", destructiveHint: false },
205
+ description: "Create, update, or delete a persistent memory (facts, preferences, decisions the user wants to remember). " +
206
+ "To retrieve memories, use search-content with content_type='memory'. " +
207
+ "To create: provide content + tags. To update: provide id + fields to change. To delete: provide id + delete=true.",
208
+ annotations: { title: "Update Memory", readOnlyHint: false, destructiveHint: false, openWorldHint: false, idempotentHint: true },
200
209
  inputSchema: {
201
210
  type: "object",
202
211
  properties: {
203
- id: { type: "integer", description: "Memory ID (omit to create new)" },
204
- content: { type: "string", description: "Memory text" },
205
- tags: { type: "array", items: { type: "string" }, description: "Categorization tags" },
206
- importance: { type: "number", description: "0.0-1.0 (default 0.5)" },
207
- source_context: { type: "object", description: "Optional source data links" },
208
- delete: { type: "boolean", description: "Delete the memory identified by id" },
212
+ id: { type: "integer", description: "Memory ID omit to create new, provide to update/delete" },
213
+ content: { type: "string", description: "Memory text (required for creation)" },
214
+ tags: { type: "array", items: { type: "string" }, description: "Categorization tags (e.g. ['work', 'project-x'])" },
215
+ importance: { type: "number", description: "0.0 (trivial) to 1.0 (critical). Default 0.5." },
216
+ source_context: { type: "object", description: "Optional metadata linking to source (app, timestamp, etc.)" },
217
+ delete: { type: "boolean", description: "Set true to delete the memory identified by id" },
209
218
  },
210
219
  },
211
220
  },
212
221
  {
213
222
  name: "send-notification",
214
- description: "Send a notification to the screenpipe desktop UI with optional action buttons. " +
215
- "Actions can re-run pipes with context, call API endpoints, or open deep links.",
216
- annotations: { title: "Send Notification", destructiveHint: false },
223
+ description: "Send a notification to the screenpipe desktop UI. " +
224
+ "Use to alert the user about findings, completed tasks, or actions needing attention.",
225
+ annotations: { title: "Send Notification", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
217
226
  inputSchema: {
218
227
  type: "object",
219
228
  properties: {
220
- title: { type: "string", description: "Notification title" },
229
+ title: { type: "string", description: "Notification title (short, descriptive)" },
221
230
  body: { type: "string", description: "Notification body (markdown supported)" },
222
- pipe_name: { type: "string", description: "Name of the pipe sending this notification" },
223
- timeout_secs: { type: "integer", description: "Auto-dismiss seconds (default 20)", default: 20 },
231
+ pipe_name: { type: "string", description: "Name of the pipe/tool sending this notification" },
232
+ timeout_secs: { type: "integer", description: "Auto-dismiss after N seconds (default 20). Use 0 for persistent.", default: 20 },
224
233
  actions: {
225
234
  type: "array",
226
- description: "Up to 5 action buttons",
235
+ description: "Up to 5 action buttons. Each needs id, label, type ('pipe'|'api'|'deeplink'|'dismiss').",
227
236
  items: {
228
237
  type: "object",
229
238
  properties: {
@@ -232,6 +241,7 @@ const TOOLS = [
232
241
  type: { type: "string", enum: ["pipe", "api", "deeplink", "dismiss"], description: "Action type" },
233
242
  pipe: { type: "string", description: "Pipe name to run (type=pipe)" },
234
243
  context: { type: "object", description: "Context passed to pipe (type=pipe)" },
244
+ open_in_chat: { type: "boolean", description: "Open pipe run in chat UI instead of background (type=pipe)" },
235
245
  url: { type: "string", description: "URL for api/deeplink actions" },
236
246
  },
237
247
  required: ["id", "label", "type"],
@@ -255,6 +265,12 @@ const RESOURCES = [
255
265
  description: "Current date/time, timezone, and pre-computed timestamps for common time ranges",
256
266
  mimeType: "application/json",
257
267
  },
268
+ {
269
+ uri: "screenpipe://guide",
270
+ name: "Usage Guide",
271
+ description: "How to use screenpipe tools effectively — search strategy, progressive disclosure, and common patterns",
272
+ mimeType: "text/markdown",
273
+ },
258
274
  ];
259
275
  server.setRequestHandler(types_js_1.ListResourcesRequestSchema, async () => {
260
276
  return { resources: RESOURCES };
@@ -291,6 +307,51 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
291
307
  ],
292
308
  };
293
309
  }
310
+ if (uri === "screenpipe://guide") {
311
+ return {
312
+ contents: [
313
+ {
314
+ uri,
315
+ mimeType: "text/markdown",
316
+ text: `# Screenpipe Usage Guide
317
+
318
+ ## Progressive Disclosure — start light, escalate only when needed
319
+
320
+ | Step | Tool | When to use |
321
+ |------|------|-------------|
322
+ | 1 | activity-summary | Broad questions: "what was I doing?", "which apps?", "how long on X?" |
323
+ | 2 | search-content | Need specific text, transcriptions, or content |
324
+ | 3 | search-elements | Need UI structure — buttons, links, form fields |
325
+ | 4 | frame-context | Need full detail for a specific moment (use frame_id from step 2) |
326
+
327
+ ## Search Strategy
328
+
329
+ - **Always provide start_time** — without it, search scans the entire history
330
+ - **Start with limit=5** — increase only if you need more results
331
+ - **Use max_content_length=500** to keep responses compact
332
+ - **Don't use q for audio** — transcriptions are noisy, q filters too aggressively. Search audio by time range and speaker instead
333
+ - **app_name is case-sensitive** — use exact names: "Google Chrome" not "chrome"
334
+ - **content_type=accessibility is preferred** for screen text (OS-native). ocr is fallback for apps without accessibility support
335
+
336
+ ## Common Patterns
337
+
338
+ - "What was I doing for the last 2 hours?" → activity-summary with start_time='2h ago'
339
+ - "What did I discuss in my meeting?" → list-meetings to find it, then search-content with audio + that time range
340
+ - "Find when I was on Twitter" → search-content with app_name='Arc' (or the browser name), q='twitter'
341
+ - "Remember that I prefer X" → update-memory with content describing the preference
342
+ - "What do you remember about X?" → search-content with content_type='memory', q='X'
343
+
344
+ ## Deep Links
345
+
346
+ When referencing specific moments in results, create clickable links:
347
+ - Frame: [10:30 AM — Chrome](screenpipe://frame/{frame_id}) — use frame_id from search results
348
+ - Timeline: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from results
349
+ Never fabricate IDs or timestamps — only use values from actual results.
350
+ `,
351
+ },
352
+ ],
353
+ };
354
+ }
294
355
  throw new Error(`Unknown resource: ${uri}`);
295
356
  });
296
357
  // ---------------------------------------------------------------------------
@@ -701,23 +762,23 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
701
762
  case "send-notification": {
702
763
  const notifBody = {
703
764
  title: args.title,
704
- pipe_name: args.pipe_name,
765
+ body: args.body || "",
766
+ type: "pipe",
705
767
  };
706
- if (args.body)
707
- notifBody.body = args.body;
708
768
  if (args.timeout_secs)
709
- notifBody.timeout_secs = args.timeout_secs;
769
+ notifBody.timeout = Number(args.timeout_secs) * 1000;
710
770
  if (args.actions)
711
771
  notifBody.actions = args.actions;
712
- const notifResponse = await fetchAPI("/notify", {
772
+ const notifResponse = await fetch("http://localhost:11435/notify", {
713
773
  method: "POST",
774
+ headers: { "Content-Type": "application/json" },
714
775
  body: JSON.stringify(notifBody),
715
776
  });
716
777
  if (!notifResponse.ok)
717
778
  throw new Error(`HTTP error: ${notifResponse.status}`);
718
779
  const notifResult = await notifResponse.json();
719
780
  return {
720
- content: [{ type: "text", text: `Notification sent (id: ${notifResult.id})` }],
781
+ content: [{ type: "text", text: `Notification sent: ${notifResult.message}` }],
721
782
  };
722
783
  }
723
784
  default:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "screenpipe-mcp",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "MCP server for screenpipe - search your screen recordings and audio transcriptions",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
package/src/index.ts CHANGED
@@ -43,64 +43,69 @@ const server = new Server(
43
43
  );
44
44
 
45
45
  // ---------------------------------------------------------------------------
46
- // Tools — minimal descriptions, no behavioral guidance (that belongs in resources)
46
+ // Tools
47
47
  // ---------------------------------------------------------------------------
48
48
  const TOOLS: Tool[] = [
49
49
  {
50
50
  name: "search-content",
51
51
  description:
52
52
  "Search screen text, audio transcriptions, input events, and memories. " +
53
- "Returns timestamped results with app context. Call with no params for recent activity.",
54
- annotations: { title: "Search Content", readOnlyHint: true },
53
+ "Returns timestamped results with app context. " +
54
+ "IMPORTANT: prefer activity-summary for broad questions ('what was I doing?'). " +
55
+ "Use search-content only when you need specific text/content. " +
56
+ "Start with limit=5, increase only if needed. Results can be large — use max_content_length=500 to truncate.",
57
+ annotations: { title: "Search Content", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
55
58
  inputSchema: {
56
59
  type: "object",
57
60
  properties: {
58
61
  q: {
59
62
  type: "string",
60
- description: "Full-text search query. Omit to return all content in time range.",
63
+ description: "Full-text search query. Omit to return all content in time range. Avoid for audio — transcriptions are noisy, q filters too aggressively.",
61
64
  },
62
65
  content_type: {
63
66
  type: "string",
64
67
  enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
65
- description: "Filter by content type. Default: 'all'.",
68
+ description: "Filter by content type. 'accessibility' is preferred for screen text (OS-native). 'ocr' is fallback for apps without accessibility support. Default: 'all'.",
66
69
  default: "all",
67
70
  },
68
- limit: { type: "integer", description: "Max results (default 10)", default: 10 },
69
- offset: { type: "integer", description: "Pagination offset", default: 0 },
71
+ limit: { type: "integer", description: "Max results (default 10, max 20). Start with 5 for exploration.", default: 10 },
72
+ offset: { type: "integer", description: "Pagination offset. Use when results say 'use offset=N for more'.", default: 0 },
70
73
  start_time: {
71
74
  type: "string",
72
- description: "ISO 8601 UTC or relative (e.g. '2h ago')",
75
+ description: "ISO 8601 UTC or relative (e.g. '2h ago', '1d ago'). Always provide to avoid scanning entire history.",
73
76
  },
74
77
  end_time: {
75
78
  type: "string",
76
- description: "ISO 8601 UTC or relative (e.g. 'now')",
79
+ description: "ISO 8601 UTC or relative (e.g. 'now'). Defaults to now.",
77
80
  },
78
- app_name: { type: "string", description: "Filter by app name" },
79
- window_name: { type: "string", description: "Filter by window title" },
80
- min_length: { type: "integer", description: "Min content length" },
81
- max_length: { type: "integer", description: "Max content length" },
81
+ app_name: { type: "string", description: "Filter by app name (e.g. 'Google Chrome', 'Slack', 'zoom.us'). Case-sensitive." },
82
+ window_name: { type: "string", description: "Filter by window title substring" },
83
+ min_length: { type: "integer", description: "Min content length in characters" },
84
+ max_length: { type: "integer", description: "Max content length in characters" },
82
85
  include_frames: {
83
86
  type: "boolean",
84
- description: "Include base64 screenshots (OCR only)",
87
+ description: "Include base64 screenshots (OCR only). Warning: large response.",
85
88
  default: false,
86
89
  },
87
- speaker_ids: { type: "string", description: "Comma-separated speaker IDs" },
88
- speaker_name: { type: "string", description: "Filter audio by speaker name" },
90
+ speaker_ids: { type: "string", description: "Comma-separated speaker IDs to filter audio" },
91
+ speaker_name: { type: "string", description: "Filter audio by speaker name (case-insensitive partial match)" },
89
92
  max_content_length: {
90
93
  type: "integer",
91
- description: "Truncate each result via middle-truncation",
94
+ description: "Truncate each result's text via middle-truncation. Use 200-500 to keep responses compact.",
92
95
  },
93
96
  },
94
97
  },
95
98
  },
96
99
  {
97
100
  name: "list-meetings",
98
- description: "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees.",
99
- annotations: { title: "List Meetings", readOnlyHint: true },
101
+ description:
102
+ "List detected meetings (Zoom, Teams, Meet, etc.) with duration, app, and attendees. " +
103
+ "Only available when screenpipe runs in smart transcription mode.",
104
+ annotations: { title: "List Meetings", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
100
105
  inputSchema: {
101
106
  type: "object",
102
107
  properties: {
103
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
108
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '1d ago')" },
104
109
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
105
110
  limit: { type: "integer", description: "Max results (default 20)", default: 20 },
106
111
  offset: { type: "integer", description: "Pagination offset", default: 0 },
@@ -111,14 +116,15 @@ const TOOLS: Tool[] = [
111
116
  name: "activity-summary",
112
117
  description:
113
118
  "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
114
- "Use for 'how long on X?', 'which apps?', 'what was I doing?' questions.",
115
- annotations: { title: "Activity Summary", readOnlyHint: true },
119
+ "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
120
+ "Only escalate to search-content if you need specific text content.",
121
+ annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
116
122
  inputSchema: {
117
123
  type: "object",
118
124
  properties: {
119
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
120
- end_time: { type: "string", description: "ISO 8601 UTC or relative" },
121
- app_name: { type: "string", description: "Optional app name filter" },
125
+ start_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. '3h ago')" },
126
+ end_time: { type: "string", description: "ISO 8601 UTC or relative (e.g. 'now')" },
127
+ app_name: { type: "string", description: "Optional app name filter to focus on one app" },
122
128
  },
123
129
  required: ["start_time", "end_time"],
124
130
  },
@@ -127,23 +133,24 @@ const TOOLS: Tool[] = [
127
133
  name: "search-elements",
128
134
  description:
129
135
  "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
130
- "Lighter than search-content for targeted UI lookups.",
131
- annotations: { title: "Search Elements", readOnlyHint: true },
136
+ "Lighter than search-content for targeted UI lookups. " +
137
+ "Use when you need to find specific UI controls or page structure, not general content.",
138
+ annotations: { title: "Search Elements", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
132
139
  inputSchema: {
133
140
  type: "object",
134
141
  properties: {
135
142
  q: { type: "string", description: "Full-text search on element text" },
136
- frame_id: { type: "integer", description: "Filter to specific frame" },
143
+ frame_id: { type: "integer", description: "Filter to specific frame ID from search results" },
137
144
  source: {
138
145
  type: "string",
139
146
  enum: ["accessibility", "ocr"],
140
- description: "Element source filter",
147
+ description: "Element source. 'accessibility' is preferred (OS-native tree). 'ocr' for apps without a11y.",
141
148
  },
142
- role: { type: "string", description: "Element role (e.g. AXButton, AXLink)" },
149
+ role: { type: "string", description: "Element role filter (e.g. 'AXButton', 'AXLink', 'AXTextField')" },
143
150
  start_time: { type: "string", description: "ISO 8601 UTC or relative" },
144
151
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
145
152
  app_name: { type: "string", description: "Filter by app name" },
146
- limit: { type: "integer", description: "Max results (default 50)", default: 50 },
153
+ limit: { type: "integer", description: "Max results (default 50). Start with 10-20.", default: 50 },
147
154
  offset: { type: "integer", description: "Pagination offset", default: 0 },
148
155
  },
149
156
  },
@@ -151,26 +158,29 @@ const TOOLS: Tool[] = [
151
158
  {
152
159
  name: "frame-context",
153
160
  description:
154
- "Get accessibility text, parsed tree nodes, and URLs for a specific frame ID.",
155
- annotations: { title: "Frame Context", readOnlyHint: true },
161
+ "Get full accessibility text, parsed tree nodes, and URLs for a specific frame ID. " +
162
+ "Use after search-content to get detailed context for a specific moment.",
163
+ annotations: { title: "Frame Context", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
156
164
  inputSchema: {
157
165
  type: "object",
158
166
  properties: {
159
- frame_id: { type: "integer", description: "Frame ID from search results" },
167
+ frame_id: { type: "integer", description: "Frame ID from search-content results (content.frame_id field)" },
160
168
  },
161
169
  required: ["frame_id"],
162
170
  },
163
171
  },
164
172
  {
165
173
  name: "export-video",
166
- description: "Export an MP4 video of screen recordings for a time range.",
167
- annotations: { title: "Export Video", destructiveHint: true },
174
+ description:
175
+ "Export an MP4 video of screen recordings for a time range. " +
176
+ "Returns the file path. Can take a few minutes for long ranges.",
177
+ annotations: { title: "Export Video", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
168
178
  inputSchema: {
169
179
  type: "object",
170
180
  properties: {
171
181
  start_time: { type: "string", description: "ISO 8601 UTC or relative" },
172
182
  end_time: { type: "string", description: "ISO 8601 UTC or relative" },
173
- fps: { type: "number", description: "Output FPS (default 1.0)", default: 1.0 },
183
+ fps: { type: "number", description: "Output FPS (default 1.0). Higher = smoother but larger file.", default: 1.0 },
174
184
  },
175
185
  required: ["start_time", "end_time"],
176
186
  },
@@ -178,37 +188,38 @@ const TOOLS: Tool[] = [
178
188
  {
179
189
  name: "update-memory",
180
190
  description:
181
- "Create, update, or delete a persistent memory (facts, preferences, decisions). " +
182
- "Retrieve memories via search-content with content_type='memory'.",
183
- annotations: { title: "Update Memory", destructiveHint: false },
191
+ "Create, update, or delete a persistent memory (facts, preferences, decisions the user wants to remember). " +
192
+ "To retrieve memories, use search-content with content_type='memory'. " +
193
+ "To create: provide content + tags. To update: provide id + fields to change. To delete: provide id + delete=true.",
194
+ annotations: { title: "Update Memory", readOnlyHint: false, destructiveHint: false, openWorldHint: false, idempotentHint: true },
184
195
  inputSchema: {
185
196
  type: "object",
186
197
  properties: {
187
- id: { type: "integer", description: "Memory ID (omit to create new)" },
188
- content: { type: "string", description: "Memory text" },
189
- tags: { type: "array", items: { type: "string" }, description: "Categorization tags" },
190
- importance: { type: "number", description: "0.0-1.0 (default 0.5)" },
191
- source_context: { type: "object", description: "Optional source data links" },
192
- delete: { type: "boolean", description: "Delete the memory identified by id" },
198
+ id: { type: "integer", description: "Memory ID omit to create new, provide to update/delete" },
199
+ content: { type: "string", description: "Memory text (required for creation)" },
200
+ tags: { type: "array", items: { type: "string" }, description: "Categorization tags (e.g. ['work', 'project-x'])" },
201
+ importance: { type: "number", description: "0.0 (trivial) to 1.0 (critical). Default 0.5." },
202
+ source_context: { type: "object", description: "Optional metadata linking to source (app, timestamp, etc.)" },
203
+ delete: { type: "boolean", description: "Set true to delete the memory identified by id" },
193
204
  },
194
205
  },
195
206
  },
196
207
  {
197
208
  name: "send-notification",
198
209
  description:
199
- "Send a notification to the screenpipe desktop UI with optional action buttons. " +
200
- "Actions can re-run pipes with context, call API endpoints, or open deep links.",
201
- annotations: { title: "Send Notification", destructiveHint: false },
210
+ "Send a notification to the screenpipe desktop UI. " +
211
+ "Use to alert the user about findings, completed tasks, or actions needing attention.",
212
+ annotations: { title: "Send Notification", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
202
213
  inputSchema: {
203
214
  type: "object",
204
215
  properties: {
205
- title: { type: "string", description: "Notification title" },
216
+ title: { type: "string", description: "Notification title (short, descriptive)" },
206
217
  body: { type: "string", description: "Notification body (markdown supported)" },
207
- pipe_name: { type: "string", description: "Name of the pipe sending this notification" },
208
- timeout_secs: { type: "integer", description: "Auto-dismiss seconds (default 20)", default: 20 },
218
+ pipe_name: { type: "string", description: "Name of the pipe/tool sending this notification" },
219
+ timeout_secs: { type: "integer", description: "Auto-dismiss after N seconds (default 20). Use 0 for persistent.", default: 20 },
209
220
  actions: {
210
221
  type: "array",
211
- description: "Up to 5 action buttons",
222
+ description: "Up to 5 action buttons. Each needs id, label, type ('pipe'|'api'|'deeplink'|'dismiss').",
212
223
  items: {
213
224
  type: "object",
214
225
  properties: {
@@ -217,6 +228,7 @@ const TOOLS: Tool[] = [
217
228
  type: { type: "string", enum: ["pipe", "api", "deeplink", "dismiss"], description: "Action type" },
218
229
  pipe: { type: "string", description: "Pipe name to run (type=pipe)" },
219
230
  context: { type: "object", description: "Context passed to pipe (type=pipe)" },
231
+ open_in_chat: { type: "boolean", description: "Open pipe run in chat UI instead of background (type=pipe)" },
220
232
  url: { type: "string", description: "URL for api/deeplink actions" },
221
233
  },
222
234
  required: ["id", "label", "type"],
@@ -242,6 +254,12 @@ const RESOURCES = [
242
254
  description: "Current date/time, timezone, and pre-computed timestamps for common time ranges",
243
255
  mimeType: "application/json",
244
256
  },
257
+ {
258
+ uri: "screenpipe://guide",
259
+ name: "Usage Guide",
260
+ description: "How to use screenpipe tools effectively — search strategy, progressive disclosure, and common patterns",
261
+ mimeType: "text/markdown",
262
+ },
245
263
  ];
246
264
 
247
265
  server.setRequestHandler(ListResourcesRequestSchema, async () => {
@@ -286,6 +304,52 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
286
304
  };
287
305
  }
288
306
 
307
+ if (uri === "screenpipe://guide") {
308
+ return {
309
+ contents: [
310
+ {
311
+ uri,
312
+ mimeType: "text/markdown",
313
+ text: `# Screenpipe Usage Guide
314
+
315
+ ## Progressive Disclosure — start light, escalate only when needed
316
+
317
+ | Step | Tool | When to use |
318
+ |------|------|-------------|
319
+ | 1 | activity-summary | Broad questions: "what was I doing?", "which apps?", "how long on X?" |
320
+ | 2 | search-content | Need specific text, transcriptions, or content |
321
+ | 3 | search-elements | Need UI structure — buttons, links, form fields |
322
+ | 4 | frame-context | Need full detail for a specific moment (use frame_id from step 2) |
323
+
324
+ ## Search Strategy
325
+
326
+ - **Always provide start_time** — without it, search scans the entire history
327
+ - **Start with limit=5** — increase only if you need more results
328
+ - **Use max_content_length=500** to keep responses compact
329
+ - **Don't use q for audio** — transcriptions are noisy, q filters too aggressively. Search audio by time range and speaker instead
330
+ - **app_name is case-sensitive** — use exact names: "Google Chrome" not "chrome"
331
+ - **content_type=accessibility is preferred** for screen text (OS-native). ocr is fallback for apps without accessibility support
332
+
333
+ ## Common Patterns
334
+
335
+ - "What was I doing for the last 2 hours?" → activity-summary with start_time='2h ago'
336
+ - "What did I discuss in my meeting?" → list-meetings to find it, then search-content with audio + that time range
337
+ - "Find when I was on Twitter" → search-content with app_name='Arc' (or the browser name), q='twitter'
338
+ - "Remember that I prefer X" → update-memory with content describing the preference
339
+ - "What do you remember about X?" → search-content with content_type='memory', q='X'
340
+
341
+ ## Deep Links
342
+
343
+ When referencing specific moments in results, create clickable links:
344
+ - Frame: [10:30 AM — Chrome](screenpipe://frame/{frame_id}) — use frame_id from search results
345
+ - Timeline: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from results
346
+ Never fabricate IDs or timestamps — only use values from actual results.
347
+ `,
348
+ },
349
+ ],
350
+ };
351
+ }
352
+
289
353
  throw new Error(`Unknown resource: ${uri}`);
290
354
  });
291
355
 
@@ -787,19 +851,20 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
787
851
  case "send-notification": {
788
852
  const notifBody: Record<string, unknown> = {
789
853
  title: args.title,
790
- pipe_name: args.pipe_name,
854
+ body: args.body || "",
855
+ type: "pipe",
791
856
  };
792
- if (args.body) notifBody.body = args.body;
793
- if (args.timeout_secs) notifBody.timeout_secs = args.timeout_secs;
857
+ if (args.timeout_secs) notifBody.timeout = Number(args.timeout_secs) * 1000;
794
858
  if (args.actions) notifBody.actions = args.actions;
795
- const notifResponse = await fetchAPI("/notify", {
859
+ const notifResponse = await fetch("http://localhost:11435/notify", {
796
860
  method: "POST",
861
+ headers: { "Content-Type": "application/json" },
797
862
  body: JSON.stringify(notifBody),
798
863
  });
799
864
  if (!notifResponse.ok) throw new Error(`HTTP error: ${notifResponse.status}`);
800
865
  const notifResult = await notifResponse.json();
801
866
  return {
802
- content: [{ type: "text", text: `Notification sent (id: ${notifResult.id})` }],
867
+ content: [{ type: "text", text: `Notification sent: ${notifResult.message}` }],
803
868
  };
804
869
  }
805
870