screenpipe-mcp 0.13.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,89 +86,86 @@ async function fetchAPI(endpoint, options = {}) {
86
86
  },
87
87
  });
88
88
  }
89
- // Create MCP server
90
- const server = new index_js_1.Server({
91
- name: "screenpipe-http",
92
- version: "0.8.2",
93
- }, {
94
- capabilities: {
95
- tools: {},
96
- },
97
- });
98
- // List tools handler
99
- server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
100
- return { tools: TOOLS };
101
- });
102
- // Call tool handler
103
- server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
104
- const { name, arguments: args } = request.params;
105
- if (!args) {
106
- throw new Error("Missing arguments");
107
- }
108
- if (name === "search_content") {
109
- const params = new URLSearchParams();
110
- for (const [key, value] of Object.entries(args)) {
111
- if (value !== null && value !== undefined) {
112
- params.append(key, String(value));
113
- }
114
- }
115
- const response = await fetchAPI(`/search?${params.toString()}`);
116
- if (!response.ok) {
117
- throw new Error(`HTTP error: ${response.status}`);
118
- }
119
- const data = await response.json();
120
- const results = data.data || [];
121
- const pagination = data.pagination || {};
122
- if (results.length === 0) {
123
- return {
124
- content: [
125
- {
126
- type: "text",
127
- text: "No results found. Try: broader search terms, different content_type, or wider time range.",
128
- },
129
- ],
130
- };
131
- }
132
- const formattedResults = [];
133
- for (const result of results) {
134
- const content = result.content;
135
- if (!content)
136
- continue;
137
- if (result.type === "OCR") {
138
- formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
139
- `${content.timestamp || ""}\n` +
140
- `${content.text || ""}`);
141
- }
142
- else if (result.type === "Audio") {
143
- formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
144
- `${content.timestamp || ""}\n` +
145
- `${content.transcription || ""}`);
146
- }
147
- else if (result.type === "UI" || result.type === "Accessibility") {
148
- formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
149
- `${content.timestamp || ""}\n` +
150
- `${content.text || ""}`);
151
- }
89
+ // Tool handler for search_content
90
+ async function handleSearchContent(args) {
91
+ const params = new URLSearchParams();
92
+ for (const [key, value] of Object.entries(args)) {
93
+ if (value !== null && value !== undefined) {
94
+ params.append(key, String(value));
152
95
  }
153
- const header = `Results: ${results.length}/${pagination.total || "?"}` +
154
- (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
96
+ }
97
+ const response = await fetchAPI(`/search?${params.toString()}`);
98
+ if (!response.ok) {
99
+ throw new Error(`HTTP error: ${response.status}`);
100
+ }
101
+ const data = await response.json();
102
+ const results = data.data || [];
103
+ const pagination = data.pagination || {};
104
+ if (results.length === 0) {
155
105
  return {
156
106
  content: [
157
107
  {
158
108
  type: "text",
159
- text: header + "\n\n" + formattedResults.join("\n---\n"),
109
+ text: "No results found. Try: broader search terms, different content_type, or wider time range.",
160
110
  },
161
111
  ],
162
112
  };
163
113
  }
164
- throw new Error(`Unknown tool: ${name}`);
165
- });
166
- // Create HTTP server with MCP transport
167
- const transports = new Map();
114
+ const formattedResults = [];
115
+ for (const result of results) {
116
+ const content = result.content;
117
+ if (!content)
118
+ continue;
119
+ if (result.type === "OCR") {
120
+ formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
121
+ `${content.timestamp || ""}\n` +
122
+ `${content.text || ""}`);
123
+ }
124
+ else if (result.type === "Audio") {
125
+ formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
126
+ `${content.timestamp || ""}\n` +
127
+ `${content.transcription || ""}`);
128
+ }
129
+ else if (result.type === "UI" || result.type === "Accessibility") {
130
+ formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
131
+ `${content.timestamp || ""}\n` +
132
+ `${content.text || ""}`);
133
+ }
134
+ }
135
+ const header = `Results: ${results.length}/${pagination.total || "?"}` +
136
+ (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
137
+ return {
138
+ content: [
139
+ {
140
+ type: "text",
141
+ text: header + "\n\n" + formattedResults.join("\n---\n"),
142
+ },
143
+ ],
144
+ };
145
+ }
146
+ // Create a fresh MCP Server instance with handlers registered.
147
+ // Each HTTP session gets its own Server — the MCP SDK requires a 1:1
148
+ // mapping between Server and transport (reusing a Server across
149
+ // transports throws "Already connected to a transport").
150
+ function createMcpServer() {
151
+ const s = new index_js_1.Server({ name: "screenpipe-http", version: "0.14.0" }, { capabilities: { tools: {} } });
152
+ s.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => ({ tools: TOOLS }));
153
+ s.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
154
+ const { name, arguments: args } = request.params;
155
+ if (!args)
156
+ throw new Error("Missing arguments");
157
+ if (name === "search_content")
158
+ return handleSearchContent(args);
159
+ throw new Error(`Unknown tool: ${name}`);
160
+ });
161
+ return s;
162
+ }
163
+ // Per-session state: each session gets its own Server + transport pair.
164
+ const sessions = new Map();
168
165
  const httpServer = (0, http_1.createServer)(async (req, res) => {
169
166
  // CORS headers
170
167
  res.setHeader("Access-Control-Allow-Origin", "*");
171
- res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
168
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS");
172
169
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization, mcp-session-id");
173
170
  if (req.method === "OPTIONS") {
174
171
  res.writeHead(204);
@@ -178,23 +175,25 @@ const httpServer = (0, http_1.createServer)(async (req, res) => {
178
175
  // Health check
179
176
  if (req.url === "/health") {
180
177
  res.writeHead(200, { "Content-Type": "application/json" });
181
- res.end(JSON.stringify({ status: "ok" }));
178
+ res.end(JSON.stringify({ status: "ok", sessions: sessions.size }));
182
179
  return;
183
180
  }
184
181
  // MCP endpoint
185
182
  if (req.url === "/mcp" || req.url?.startsWith("/mcp?")) {
186
183
  const sessionId = req.headers["mcp-session-id"];
187
- let transport = sessionId ? transports.get(sessionId) : undefined;
188
- if (!transport) {
189
- transport = new streamableHttp_js_1.StreamableHTTPServerTransport({
184
+ let session = sessionId ? sessions.get(sessionId) : undefined;
185
+ if (!session) {
186
+ const server = createMcpServer();
187
+ const transport = new streamableHttp_js_1.StreamableHTTPServerTransport({
190
188
  sessionIdGenerator: () => crypto.randomUUID(),
191
189
  });
192
190
  await server.connect(transport);
193
191
  if (transport.sessionId) {
194
- transports.set(transport.sessionId, transport);
192
+ sessions.set(transport.sessionId, { server, transport });
195
193
  }
194
+ session = { server, transport };
196
195
  }
197
- await transport.handleRequest(req, res);
196
+ await session.transport.handleRequest(req, res);
198
197
  return;
199
198
  }
200
199
  res.writeHead(404, { "Content-Type": "application/json" });
package/dist/index.js CHANGED
@@ -136,9 +136,10 @@ const TOOLS = [
136
136
  },
137
137
  {
138
138
  name: "activity-summary",
139
- description: "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
139
+ description: "Rich activity overview: app usage, window/tab titles with URLs and time spent, key text per context, audio transcriptions. " +
140
140
  "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
141
- "Only escalate to search-content if you need specific text content.",
141
+ "The 'windows' field shows exactly what the user worked on (e.g. 'Debug crash issue — 20 min', 'Stripe pricing page — 5 min'). " +
142
+ "Usually sufficient without further searches.",
142
143
  annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
143
144
  inputSchema: {
144
145
  type: "object",
@@ -661,8 +662,19 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
661
662
  : "";
662
663
  return ` ${a.name}: ${a.minutes} min (${a.frame_count} frames${timeSpan})`;
663
664
  });
665
+ // Window/tab activity — what pages/documents were open
666
+ const windowLines = (data.windows || []).map((w) => {
667
+ const url = w.browser_url ? ` (${w.browser_url})` : "";
668
+ return ` [${w.app_name}] ${w.window_name}${url} — ${w.minutes} min`;
669
+ });
664
670
  const speakerLines = (data.audio_summary?.speakers || []).map((s) => ` ${s.name}: ${s.segment_count} segments`);
665
- const textLines = (data.recent_texts || []).map((t) => ` [${t.app_name}] ${t.text}`);
671
+ // Actual audio transcriptions (not just counts)
672
+ const transcriptLines = (data.audio_summary?.top_transcriptions || []).map((t) => ` [${t.speaker}, ${t.timestamp.slice(11, 19)}] ${t.transcription}`);
673
+ // Key text content sampled across the time range
674
+ const textLines = (data.key_texts || data.recent_texts || []).map((t) => {
675
+ const win = t.window_name ? ` | ${t.window_name}` : "";
676
+ return ` [${t.app_name}${win}, ${t.timestamp.slice(11, 19)}] ${t.text}`;
677
+ });
666
678
  const summary = [
667
679
  `Activity Summary (${data.time_range?.start} → ${data.time_range?.end})`,
668
680
  `Total frames: ${data.total_frames}`,
@@ -670,11 +682,15 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
670
682
  "Apps:",
671
683
  ...(appsLines.length ? appsLines : [" (none)"]),
672
684
  "",
685
+ "Windows & Tabs:",
686
+ ...(windowLines.length ? windowLines.slice(0, 20) : [" (none)"]),
687
+ "",
673
688
  `Audio: ${data.audio_summary?.segment_count || 0} segments`,
674
689
  ...(speakerLines.length ? speakerLines : []),
690
+ ...(transcriptLines.length ? ["", "Audio transcriptions:", ...transcriptLines.slice(0, 15)] : []),
675
691
  "",
676
- "Recent texts:",
677
- ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
692
+ "Key content (sampled across time range):",
693
+ ...(textLines.length ? textLines.slice(0, 20) : [" (none)"]),
678
694
  ].join("\n");
679
695
  return { content: [{ type: "text", text: summary }] };
680
696
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "screenpipe-mcp",
3
- "version": "0.13.0",
3
+ "version": "0.14.1",
4
4
  "description": "MCP server for screenpipe - search your screen recordings and audio transcriptions",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -96,109 +96,103 @@ async function fetchAPI(endpoint: string, options: RequestInit = {}): Promise<Re
96
96
  });
97
97
  }
98
98
 
99
- // Create MCP server
100
- const server = new Server(
101
- {
102
- name: "screenpipe-http",
103
- version: "0.8.2",
104
- },
105
- {
106
- capabilities: {
107
- tools: {},
108
- },
99
+ // Tool handler for search_content
100
+ async function handleSearchContent(args: Record<string, unknown>) {
101
+ const params = new URLSearchParams();
102
+ for (const [key, value] of Object.entries(args)) {
103
+ if (value !== null && value !== undefined) {
104
+ params.append(key, String(value));
105
+ }
109
106
  }
110
- );
111
-
112
- // List tools handler
113
- server.setRequestHandler(ListToolsRequestSchema, async () => {
114
- return { tools: TOOLS };
115
- });
116
-
117
- // Call tool handler
118
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
119
- const { name, arguments: args } = request.params;
120
107
 
121
- if (!args) {
122
- throw new Error("Missing arguments");
108
+ const response = await fetchAPI(`/search?${params.toString()}`);
109
+ if (!response.ok) {
110
+ throw new Error(`HTTP error: ${response.status}`);
123
111
  }
124
112
 
125
- if (name === "search_content") {
126
- const params = new URLSearchParams();
127
- for (const [key, value] of Object.entries(args)) {
128
- if (value !== null && value !== undefined) {
129
- params.append(key, String(value));
130
- }
131
- }
132
-
133
- const response = await fetchAPI(`/search?${params.toString()}`);
134
- if (!response.ok) {
135
- throw new Error(`HTTP error: ${response.status}`);
136
- }
137
-
138
- const data = await response.json();
139
- const results = data.data || [];
140
- const pagination = data.pagination || {};
141
-
142
- if (results.length === 0) {
143
- return {
144
- content: [
145
- {
146
- type: "text",
147
- text: "No results found. Try: broader search terms, different content_type, or wider time range.",
148
- },
149
- ],
150
- };
151
- }
152
-
153
- const formattedResults: string[] = [];
154
- for (const result of results) {
155
- const content = result.content;
156
- if (!content) continue;
157
-
158
- if (result.type === "OCR") {
159
- formattedResults.push(
160
- `[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
161
- `${content.timestamp || ""}\n` +
162
- `${content.text || ""}`
163
- );
164
- } else if (result.type === "Audio") {
165
- formattedResults.push(
166
- `[Audio] ${content.device_name || "?"}\n` +
167
- `${content.timestamp || ""}\n` +
168
- `${content.transcription || ""}`
169
- );
170
- } else if (result.type === "UI" || result.type === "Accessibility") {
171
- formattedResults.push(
172
- `[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
173
- `${content.timestamp || ""}\n` +
174
- `${content.text || ""}`
175
- );
176
- }
177
- }
178
-
179
- const header = `Results: ${results.length}/${pagination.total || "?"}` +
180
- (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
113
+ const data = await response.json();
114
+ const results = data.data || [];
115
+ const pagination = data.pagination || {};
181
116
 
117
+ if (results.length === 0) {
182
118
  return {
183
119
  content: [
184
120
  {
185
121
  type: "text",
186
- text: header + "\n\n" + formattedResults.join("\n---\n"),
122
+ text: "No results found. Try: broader search terms, different content_type, or wider time range.",
187
123
  },
188
124
  ],
189
125
  };
190
126
  }
191
127
 
192
- throw new Error(`Unknown tool: ${name}`);
193
- });
128
+ const formattedResults: string[] = [];
129
+ for (const result of results) {
130
+ const content = result.content;
131
+ if (!content) continue;
132
+
133
+ if (result.type === "OCR") {
134
+ formattedResults.push(
135
+ `[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
136
+ `${content.timestamp || ""}\n` +
137
+ `${content.text || ""}`
138
+ );
139
+ } else if (result.type === "Audio") {
140
+ formattedResults.push(
141
+ `[Audio] ${content.device_name || "?"}\n` +
142
+ `${content.timestamp || ""}\n` +
143
+ `${content.transcription || ""}`
144
+ );
145
+ } else if (result.type === "UI" || result.type === "Accessibility") {
146
+ formattedResults.push(
147
+ `[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
148
+ `${content.timestamp || ""}\n` +
149
+ `${content.text || ""}`
150
+ );
151
+ }
152
+ }
153
+
154
+ const header = `Results: ${results.length}/${pagination.total || "?"}` +
155
+ (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
156
+
157
+ return {
158
+ content: [
159
+ {
160
+ type: "text",
161
+ text: header + "\n\n" + formattedResults.join("\n---\n"),
162
+ },
163
+ ],
164
+ };
165
+ }
166
+
167
+ // Create a fresh MCP Server instance with handlers registered.
168
+ // Each HTTP session gets its own Server — the MCP SDK requires a 1:1
169
+ // mapping between Server and transport (reusing a Server across
170
+ // transports throws "Already connected to a transport").
171
+ function createMcpServer(): Server {
172
+ const s = new Server(
173
+ { name: "screenpipe-http", version: "0.14.0" },
174
+ { capabilities: { tools: {} } }
175
+ );
176
+
177
+ s.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
178
+
179
+ s.setRequestHandler(CallToolRequestSchema, async (request) => {
180
+ const { name, arguments: args } = request.params;
181
+ if (!args) throw new Error("Missing arguments");
182
+ if (name === "search_content") return handleSearchContent(args);
183
+ throw new Error(`Unknown tool: ${name}`);
184
+ });
185
+
186
+ return s;
187
+ }
194
188
 
195
- // Create HTTP server with MCP transport
196
- const transports = new Map<string, StreamableHTTPServerTransport>();
189
+ // Per-session state: each session gets its own Server + transport pair.
190
+ const sessions = new Map<string, { server: Server; transport: StreamableHTTPServerTransport }>();
197
191
 
198
192
  const httpServer = createServer(async (req, res) => {
199
193
  // CORS headers
200
194
  res.setHeader("Access-Control-Allow-Origin", "*");
201
- res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
195
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS");
202
196
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization, mcp-session-id");
203
197
 
204
198
  if (req.method === "OPTIONS") {
@@ -210,7 +204,7 @@ const httpServer = createServer(async (req, res) => {
210
204
  // Health check
211
205
  if (req.url === "/health") {
212
206
  res.writeHead(200, { "Content-Type": "application/json" });
213
- res.end(JSON.stringify({ status: "ok" }));
207
+ res.end(JSON.stringify({ status: "ok", sessions: sessions.size }));
214
208
  return;
215
209
  }
216
210
 
@@ -218,21 +212,23 @@ const httpServer = createServer(async (req, res) => {
218
212
  if (req.url === "/mcp" || req.url?.startsWith("/mcp?")) {
219
213
  const sessionId = req.headers["mcp-session-id"] as string | undefined;
220
214
 
221
- let transport = sessionId ? transports.get(sessionId) : undefined;
215
+ let session = sessionId ? sessions.get(sessionId) : undefined;
222
216
 
223
- if (!transport) {
224
- transport = new StreamableHTTPServerTransport({
217
+ if (!session) {
218
+ const server = createMcpServer();
219
+ const transport = new StreamableHTTPServerTransport({
225
220
  sessionIdGenerator: () => crypto.randomUUID(),
226
221
  });
227
222
 
228
223
  await server.connect(transport);
229
224
 
230
225
  if (transport.sessionId) {
231
- transports.set(transport.sessionId, transport);
226
+ sessions.set(transport.sessionId, { server, transport });
232
227
  }
228
+ session = { server, transport };
233
229
  }
234
230
 
235
- await transport.handleRequest(req, res);
231
+ await session.transport.handleRequest(req, res);
236
232
  return;
237
233
  }
238
234
 
package/src/index.ts CHANGED
@@ -119,9 +119,10 @@ const TOOLS: Tool[] = [
119
119
  {
120
120
  name: "activity-summary",
121
121
  description:
122
- "Lightweight activity overview (~200-500 tokens): app usage with active minutes, audio speakers, recent texts. " +
122
+ "Rich activity overview: app usage, window/tab titles with URLs and time spent, key text per context, audio transcriptions. " +
123
123
  "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
124
- "Only escalate to search-content if you need specific text content.",
124
+ "The 'windows' field shows exactly what the user worked on (e.g. 'Debug crash issue — 20 min', 'Stripe pricing page — 5 min'). " +
125
+ "Usually sufficient without further searches.",
125
126
  annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
126
127
  inputSchema: {
127
128
  type: "object",
@@ -709,14 +710,37 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
709
710
  }
710
711
  );
711
712
 
713
+ // Window/tab activity — what pages/documents were open
714
+ const windowLines = (data.windows || []).map(
715
+ (w: {
716
+ app_name: string;
717
+ window_name: string;
718
+ browser_url: string;
719
+ minutes: number;
720
+ frame_count: number;
721
+ }) => {
722
+ const url = w.browser_url ? ` (${w.browser_url})` : "";
723
+ return ` [${w.app_name}] ${w.window_name}${url} — ${w.minutes} min`;
724
+ }
725
+ );
726
+
712
727
  const speakerLines = (data.audio_summary?.speakers || []).map(
713
728
  (s: { name: string; segment_count: number }) =>
714
729
  ` ${s.name}: ${s.segment_count} segments`
715
730
  );
716
731
 
717
- const textLines = (data.recent_texts || []).map(
718
- (t: { text: string; app_name: string; timestamp: string }) =>
719
- ` [${t.app_name}] ${t.text}`
732
+ // Actual audio transcriptions (not just counts)
733
+ const transcriptLines = (data.audio_summary?.top_transcriptions || []).map(
734
+ (t: { transcription: string; speaker: string; device: string; timestamp: string }) =>
735
+ ` [${t.speaker}, ${t.timestamp.slice(11, 19)}] ${t.transcription}`
736
+ );
737
+
738
+ // Key text content sampled across the time range
739
+ const textLines = (data.key_texts || data.recent_texts || []).map(
740
+ (t: { text: string; app_name: string; window_name?: string; timestamp: string }) => {
741
+ const win = t.window_name ? ` | ${t.window_name}` : "";
742
+ return ` [${t.app_name}${win}, ${t.timestamp.slice(11, 19)}] ${t.text}`;
743
+ }
720
744
  );
721
745
 
722
746
  const summary = [
@@ -726,11 +750,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
726
750
  "Apps:",
727
751
  ...(appsLines.length ? appsLines : [" (none)"]),
728
752
  "",
753
+ "Windows & Tabs:",
754
+ ...(windowLines.length ? windowLines.slice(0, 20) : [" (none)"]),
755
+ "",
729
756
  `Audio: ${data.audio_summary?.segment_count || 0} segments`,
730
757
  ...(speakerLines.length ? speakerLines : []),
758
+ ...(transcriptLines.length ? ["", "Audio transcriptions:", ...transcriptLines.slice(0, 15)] : []),
731
759
  "",
732
- "Recent texts:",
733
- ...(textLines.length ? textLines.slice(0, 10) : [" (none)"]),
760
+ "Key content (sampled across time range):",
761
+ ...(textLines.length ? textLines.slice(0, 20) : [" (none)"]),
734
762
  ].join("\n");
735
763
 
736
764
  return { content: [{ type: "text", text: summary }] };