screenpipe-mcp 0.18.1 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +499 -194
  2. package/package.json +1 -1
  3. package/src/index.ts +509 -178
package/src/index.ts CHANGED
@@ -31,16 +31,20 @@ const SCREENPIPE_API = `http://localhost:${port}`;
31
31
  // Discover the local API key, in priority order:
32
32
  //
33
33
  // 1. env vars set by the launcher (Claude Desktop config, terminal, etc.)
34
- // 2. direct sqlite3 read of ~/.screenpipe/db.sqlite (plaintext entries only
35
- // encrypted ones need keychain, handled by 3+)
36
- // 3. bundled `bun` shipped with the desktop app → `bun x screenpipe@latest auth token`
37
- // this is the kill-shot for Claude-Desktop-via-MCP: Claude strips PATH so
38
- // `npx` and `sqlite3` lookups fail, but the desktop app's bundled bun is
39
- // at a deterministic path. We invoke it with an absolute path, which
40
- // then runs the screenpipe CLI's `auth token` command — which goes
41
- // through `find_api_auth_key` (handles the encrypted-secret-store case).
42
- // 4. node-adjacent npx (legacy fallback for users without the desktop app)
43
- // 5. PATH-based npx (very last resort)
34
+ // 2. CLI via bundled `bun` from screenpipe.app at a deterministic absolute
35
+ // path. Runs `bun x screenpipe@latest auth token` → goes through the
36
+ // Rust CLI's `find_api_auth_key` resolver, which handles the encrypted
37
+ // keychain-backed secret store. This is the canonical path: same
38
+ // contract as `screenpipe auth token` in a terminal, no PATH needed.
39
+ // 3. CLI via node-adjacent npx for dev environments that have node but
40
+ // not the desktop app.
41
+ // 4. CLI via PATH-based npx — last CLI fallback.
42
+ // 5. Direct sqlite3 read of ~/.screenpipe/db.sqlite plaintext entries
43
+ // only (encrypted entries need the keychain, which only the CLI can
44
+ // reach). Kept as a final last-resort for users who have screenpipe
45
+ // *data* but no working CLI install (rare). Demoted below the CLI
46
+ // paths because it reimplements logic that lives in `auth_key.rs` and
47
+ // can silently drift on storage-format changes.
44
48
  //
45
49
  // If all 5 miss we log a loud stderr warning so it surfaces in the host's
46
50
  // MCP log instead of the user just seeing 403s with no explanation.
@@ -57,58 +61,14 @@ function discoverApiKey(): string {
57
61
  // eslint-disable-next-line @typescript-eslint/no-var-requires
58
62
  const { execFileSync, execSync } = require("child_process");
59
63
 
60
- // Common absolute paths for `sqlite3`. Claude Desktop's MCP launcher
61
- // strips PATH so the bare command name `sqlite3` would fail spawn
62
- // even though `/usr/bin/sqlite3` is always present on macOS. Try the
63
- // bare name first (cheap; works on dev machines with a normal shell)
64
- // then walk known absolute paths.
65
- const sqliteCandidates: string[] =
66
- process.platform === "win32"
67
- ? ["sqlite3.exe", "C\\:Windows\\System32\\sqlite3.exe"]
68
- : process.platform === "darwin"
69
- ? ["sqlite3", "/usr/bin/sqlite3", "/opt/homebrew/bin/sqlite3", "/usr/local/bin/sqlite3"]
70
- : ["sqlite3", "/usr/bin/sqlite3", "/usr/local/bin/sqlite3"];
71
-
72
- // 2. Direct sqlite3 read of the secret store. Only succeeds for
73
- // plaintext entries (nonce all zeros). Encrypted entries fall
74
- // through to the CLI path which can decrypt via keychain.
75
- try {
76
- const dbPath = path.join(os.homedir(), ".screenpipe", "db.sqlite");
77
- if (fs.existsSync(dbPath)) {
78
- let row: string | null = null;
79
- for (const candidate of sqliteCandidates) {
80
- try {
81
- row = execFileSync(
82
- candidate,
83
- [dbPath, "SELECT hex(nonce), value FROM secrets WHERE key = 'api_auth_key';"],
84
- { timeout: 5000, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] },
85
- ).trim();
86
- break;
87
- } catch {
88
- // try next candidate
89
- }
90
- }
91
- if (row) {
92
- const sepIdx = row.indexOf("|");
93
- const nonceHex = sepIdx >= 0 ? row.substring(0, sepIdx) : "";
94
- const value = sepIdx >= 0 ? row.substring(sepIdx + 1) : row;
95
- const isPlaintext = !nonceHex || /^0+$/.test(nonceHex);
96
- if (isPlaintext && value) {
97
- const decoded = Buffer.from(value, "base64").toString("utf-8");
98
- if (decoded && decoded.startsWith("sp-")) return decoded;
99
- if (value.startsWith("sp-")) return value;
100
- }
101
- // Non-zero nonce = encrypted — fall through to bun/npx which decrypt via keychain.
102
- }
103
- }
104
- } catch {}
105
-
106
- // 3. Bundled `bun` shipped with the desktop app. The Tauri externalBin
107
- // config (apps/screenpipe-app-tauri/src-tauri/tauri.prod.conf.json)
108
- // places it next to the main app executable; on each OS the install
109
- // path is deterministic so we don't need PATH or current_exe — both
110
- // of which Claude Desktop's MCP launcher rolls back.
111
64
  const home = os.homedir();
65
+
66
+ // 2. CLI via bundled `bun` shipped with the desktop app. The Tauri
67
+ // externalBin config places `bun` next to the main app exe at a
68
+ // deterministic install path on each OS, so we don't need PATH —
69
+ // which Claude Desktop's MCP launcher strips. The CLI's `auth
70
+ // token` goes through `find_api_auth_key` and decrypts via
71
+ // keychain when needed.
112
72
  const bunCandidates: string[] =
113
73
  process.platform === "darwin"
114
74
  ? [
@@ -146,9 +106,8 @@ function discoverApiKey(): string {
146
106
  }
147
107
  }
148
108
 
149
- // 4. npx adjacent to the running node works in dev environments
150
- // where the user installed @screenpipe/mcp via npx without the
151
- // desktop app.
109
+ // 3. CLI via npx adjacent to the running node. Works for dev
110
+ // environments without the desktop app.
152
111
  try {
153
112
  const npxName = process.platform === "win32" ? "npx.cmd" : "npx";
154
113
  const npxPath = path.join(path.dirname(process.execPath), npxName);
@@ -162,8 +121,8 @@ function discoverApiKey(): string {
162
121
  }
163
122
  } catch {}
164
123
 
165
- // 5. PATH-based npx last-ditch. Will fail under Claude Desktop's
166
- // sanitized env; useful only on raw shells.
124
+ // 4. CLI via PATH-based npx. Last CLI try; works on raw shells with
125
+ // npx on PATH.
167
126
  try {
168
127
  const token = execSync("npx screenpipe@latest auth token", {
169
128
  timeout: 30000,
@@ -173,6 +132,48 @@ function discoverApiKey(): string {
173
132
  if (token && token.startsWith("sp-")) return token;
174
133
  } catch {}
175
134
 
135
+ // 5. Direct sqlite3 read of the secret store (last-resort). Plaintext
136
+ // entries only — encrypted ones live behind the keychain, which the
137
+ // CLI paths above already cover. Used when the user has screenpipe
138
+ // data on disk but no working CLI install.
139
+ const sqliteCandidates: string[] =
140
+ process.platform === "win32"
141
+ ? ["sqlite3.exe", "C:\\Windows\\System32\\sqlite3.exe"]
142
+ : process.platform === "darwin"
143
+ ? ["sqlite3", "/usr/bin/sqlite3", "/opt/homebrew/bin/sqlite3", "/usr/local/bin/sqlite3"]
144
+ : ["sqlite3", "/usr/bin/sqlite3", "/usr/local/bin/sqlite3"];
145
+ try {
146
+ const dbPath = path.join(home, ".screenpipe", "db.sqlite");
147
+ if (fs.existsSync(dbPath)) {
148
+ let row: string | null = null;
149
+ for (const candidate of sqliteCandidates) {
150
+ try {
151
+ row = execFileSync(
152
+ candidate,
153
+ [dbPath, "SELECT hex(nonce), value FROM secrets WHERE key = 'api_auth_key';"],
154
+ { timeout: 5000, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] },
155
+ ).trim();
156
+ break;
157
+ } catch {
158
+ // try next candidate
159
+ }
160
+ }
161
+ if (row) {
162
+ const sepIdx = row.indexOf("|");
163
+ const nonceHex = sepIdx >= 0 ? row.substring(0, sepIdx) : "";
164
+ const value = sepIdx >= 0 ? row.substring(sepIdx + 1) : row;
165
+ const isPlaintext = !nonceHex || /^0+$/.test(nonceHex);
166
+ if (isPlaintext && value) {
167
+ const decoded = Buffer.from(value, "base64").toString("utf-8");
168
+ if (decoded && decoded.startsWith("sp-")) return decoded;
169
+ if (value.startsWith("sp-")) return value;
170
+ }
171
+ // Encrypted — only the CLI paths above can decrypt this; we
172
+ // already tried them.
173
+ }
174
+ }
175
+ } catch {}
176
+
176
177
  // All five paths missed. Log loudly to stderr so the host's MCP
177
178
  // panel surfaces this instead of the user seeing cryptic 403s from
178
179
  // the screenpipe server on every tool call.
@@ -180,9 +181,9 @@ function discoverApiKey(): string {
180
181
  [
181
182
  "[screenpipe-mcp] could not discover SCREENPIPE_LOCAL_API_KEY from any source.",
182
183
  " - env vars (SCREENPIPE_LOCAL_API_KEY / SCREENPIPE_API_KEY) not set",
183
- " - direct sqlite3 read of ~/.screenpipe/db.sqlite failed",
184
184
  " - bundled `bun` from screenpipe.app not found at any known install path",
185
185
  " - npx fallback unavailable",
186
+ " - direct sqlite3 read of ~/.screenpipe/db.sqlite failed",
186
187
  "Fix: set SCREENPIPE_LOCAL_API_KEY in your MCP launcher's env block,",
187
188
  "or install the screenpipe desktop app (https://screenpi.pe).",
188
189
  "",
@@ -193,6 +194,49 @@ function discoverApiKey(): string {
193
194
 
194
195
  const API_KEY = discoverApiKey();
195
196
 
197
+ // Enterprise team token — when present, this MCP additionally registers
198
+ // `team-*` tools that query the org-wide telemetry control plane
199
+ // (https://screenpi.pe/api/enterprise/v1/*) instead of just the local
200
+ // recordings. Same audience: an enterprise admin running screenpipe-mcp
201
+ // inside Claude Desktop / Cursor / Windsurf wants to ask "what did MY
202
+ // machine do" AND "what did MY TEAM do" without juggling two MCPs.
203
+ //
204
+ // Resolution order matches discoverApiKey() in spirit:
205
+ // 1. SCREENPIPE_ENTERPRISE_TOKEN env var (Claude config, terminal)
206
+ // 2. team_api_token field in ~/.screenpipe/enterprise.json (written by
207
+ // the desktop app's Settings → Privacy → Admin Team API Token)
208
+ //
209
+ // Token format is `sk_ent_…`. Empty / missing → team tools are not
210
+ // registered; non-admin users of screenpipe-mcp see exactly what they
211
+ // see today.
212
+ function discoverTeamToken(): string {
213
+ const envTok = process.env.SCREENPIPE_ENTERPRISE_TOKEN;
214
+ if (envTok && envTok.startsWith("sk_ent_")) return envTok;
215
+ try {
216
+ const entPath = path.join(os.homedir(), ".screenpipe", "enterprise.json");
217
+ if (fs.existsSync(entPath)) {
218
+ const raw = fs.readFileSync(entPath, "utf-8");
219
+ const parsed = JSON.parse(raw);
220
+ const tok = typeof parsed?.team_api_token === "string" ? parsed.team_api_token : "";
221
+ if (tok && tok.startsWith("sk_ent_")) return tok;
222
+ }
223
+ } catch {}
224
+ return "";
225
+ }
226
+
227
+ const TEAM_TOKEN = discoverTeamToken();
228
+ const TEAM_API = "https://screenpi.pe/api/enterprise/v1";
229
+
230
+ async function fetchTeam(p: string, init: RequestInit = {}): Promise<Response> {
231
+ return fetch(`${TEAM_API}${p}`, {
232
+ ...init,
233
+ headers: {
234
+ Authorization: `Bearer ${TEAM_TOKEN}`,
235
+ ...(init.headers || {}),
236
+ },
237
+ });
238
+ }
239
+
196
240
  // Read version from package.json (single source of truth)
197
241
  // eslint-disable-next-line @typescript-eslint/no-var-requires
198
242
  const PKG_VERSION: string = require("../package.json").version;
@@ -218,11 +262,11 @@ const TOOLS: Tool[] = [
218
262
  {
219
263
  name: "search-content",
220
264
  description:
221
- "Search screen text, audio transcriptions, input events, and memories. " +
222
- "Returns timestamped results with app context. " +
223
- "IMPORTANT: prefer activity-summary for broad questions ('what was I doing?'). " +
224
- "Use search-content only when you need specific text/content. " +
225
- "Start with limit=5, increase only if needed. Results can be large use max_content_length=500 to truncate.",
265
+ "Search screen text, audio transcriptions, input events, and memories. Returns timestamped results with app context. " +
266
+ "USE WHEN: you need the actual text/content of a moment — quotes, OCR snippets, transcript lines — or want to filter by speaker/window. " +
267
+ "DO NOT USE for: broad questions like 'what was I doing?' (use activity-summary, it pre-summarizes apps + windows + transcripts). " +
268
+ "Also DO NOT USE for: targeted UI controls (use search-elements). " +
269
+ "Start with limit=5, increase only if needed. Per-result text is auto-truncated to 1000 chars; pass max_content_length=0 to opt out, or a custom integer to override.",
226
270
  annotations: { title: "Search Content", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
227
271
  inputSchema: {
228
272
  type: "object",
@@ -234,14 +278,15 @@ const TOOLS: Tool[] = [
234
278
  content_type: {
235
279
  type: "string",
236
280
  enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
237
- description: "Filter by content type. 'accessibility' is preferred for screen text (OS-native). 'ocr' is fallback for apps without accessibility support. Default: 'all'.",
281
+ description:
282
+ "Filter by content type. NOTE on screen text: 'ocr' is a legacy label — it returns ALL screen-text rows, which are accessibility-derived for most apps (the result tag [Screen·a11y] vs [Screen·ocr] tells you which). Use 'ocr' for screen text (covers both paths), 'audio' for transcriptions, 'input' for keyboard/mouse events, 'memory' for stored facts. Default: 'all'.",
238
283
  default: "all",
239
284
  },
240
285
  limit: { type: "integer", description: "Max results (default 10, max 20). Start with 5 for exploration.", default: 10 },
241
286
  offset: { type: "integer", description: "Pagination offset. Use when results say 'use offset=N for more'.", default: 0 },
242
287
  start_time: {
243
288
  type: "string",
244
- description: "ISO 8601 UTC or relative (e.g. '2h ago', '1d ago'). Always provide to avoid scanning entire history.",
289
+ description: "Accepted: ISO 8601 ('2024-01-15T10:00:00Z'), 'Nh ago' / 'Nd ago' / 'Nw ago', 'now', 'yesterday', 'today', or bare 'YYYY-MM-DD'. Always provide to avoid scanning entire history.",
245
290
  },
246
291
  end_time: {
247
292
  type: "string",
@@ -285,9 +330,9 @@ const TOOLS: Tool[] = [
285
330
  name: "activity-summary",
286
331
  description:
287
332
  "Rich activity overview: app usage, window/tab titles with URLs and time spent, key text per context, audio transcriptions. " +
288
- "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
289
- "The 'windows' field shows exactly what the user worked on (e.g. 'Debug crash issue 20 min', 'Stripe pricing page — 5 min'). " +
290
- "Usually sufficient without further searches.",
333
+ "USE WHEN: any broad question about what the user did — 'what was I doing?', 'how long on X?', 'which apps?', 'recap my morning'. " +
334
+ "This is almost always the right first call for time-range questionsusually sufficient without follow-up searches. " +
335
+ "DO NOT USE for: finding a specific keyword (use keyword-search) or a specific UI control (use search-elements).",
291
336
  annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
292
337
  inputSchema: {
293
338
  type: "object",
@@ -302,9 +347,9 @@ const TOOLS: Tool[] = [
302
347
  {
303
348
  name: "search-elements",
304
349
  description:
305
- "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
306
- "Lighter than search-content for targeted UI lookups. " +
307
- "Use when you need to find specific UI controls or page structure, not general content.",
350
+ "Search UI elements (buttons, links, text fields) from the accessibility tree, filterable by role. " +
351
+ "USE WHEN: you want a specific UI control or page-structure question — 'find every Submit button I saw', 'list the links in that page'. " +
352
+ "DO NOT USE for: general text/content (use search-content) or fast keyword lookup (use keyword-search).",
308
353
  annotations: { title: "Search Elements", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
309
354
  inputSchema: {
310
355
  type: "object",
@@ -552,19 +597,21 @@ const TOOLS: Tool[] = [
552
597
  {
553
598
  name: "keyword-search",
554
599
  description:
555
- "Fast keyword search using FTS index. Faster than search-content for exact keyword matching. " +
556
- "Returns frame IDs and matched text.",
600
+ "Fast FTS5 keyword search across OCR + audio combined. Returns matches with frame_id, app, timestamp, and text positions. " +
601
+ "USE WHEN: you have a specific keyword/phrase and want the fastest hit-list (e.g. 'find every screen where I typed \"stripe\"'). " +
602
+ "DO NOT USE for: structured filters by content_type / speaker / window — this endpoint ignores those (use search-content instead). " +
603
+ "DO NOT USE for: broad questions like 'what was I doing' (use activity-summary).",
557
604
  annotations: { title: "Keyword Search", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
558
605
  inputSchema: {
559
606
  type: "object",
560
607
  properties: {
561
- q: { type: "string", description: "Keyword search query" },
562
- content_type: { type: "string", enum: ["ocr", "audio", "all"], description: "Content type filter", default: "all" },
563
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
564
- end_time: { type: "string", description: "ISO 8601 UTC or relative" },
565
- app_name: { type: "string", description: "Filter by app name" },
608
+ q: { type: "string", description: "Keyword query (FTS5 syntax: quoted phrases, AND/OR, prefix*)" },
609
+ start_time: { type: "string", description: "ISO 8601 UTC, 'Nh ago' / 'Nd ago' / 'Nw ago', 'now', 'yesterday', 'today', or 'YYYY-MM-DD'" },
610
+ end_time: { type: "string", description: "Same formats as start_time" },
611
+ app_name: { type: "string", description: "Filter by exact app name (case-sensitive, e.g. 'Google Chrome')" },
566
612
  limit: { type: "integer", description: "Max results (default 20)", default: 20 },
567
613
  offset: { type: "integer", description: "Pagination offset", default: 0 },
614
+ fuzzy_match: { type: "boolean", description: "Enable typo-tolerant matching", default: false },
568
615
  },
569
616
  required: ["q"],
570
617
  },
@@ -597,8 +644,74 @@ const TOOLS: Tool[] = [
597
644
  },
598
645
  ];
599
646
 
647
+ // ---------------------------------------------------------------------------
648
+ // Enterprise team tools — registered only when a team API token is present.
649
+ // Same endpoint surface as the desktop `screenpipe-team` pi-agent skill:
650
+ // proxy GETs to https://screenpi.pe/api/enterprise/v1/* with Bearer auth.
651
+ //
652
+ // Naming convention: every team tool is `team-*` so it's obvious at a glance
653
+ // which scope (just-me vs the-whole-org) any given call is hitting.
654
+ // ---------------------------------------------------------------------------
655
+ const TEAM_TOOLS: Tool[] = [
656
+ {
657
+ name: "team-search",
658
+ description:
659
+ "Substring-search across the ENTIRE ORG's telemetry (every enrolled " +
660
+ "device). Use when the question is about the team or another teammate " +
661
+ "(\"what did engineering work on yesterday\", \"did alice touch the auth code\"). " +
662
+ "For your own machine only, use search-content. " +
663
+ "Auth: enterprise admin token (sk_ent_…). " +
664
+ "Defaults: since=now-24h, limit=50. Returns matched records with device + timestamp.",
665
+ annotations: { title: "Team Search", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
666
+ inputSchema: {
667
+ type: "object",
668
+ properties: {
669
+ q: { type: "string", description: "Substring to match (case-insensitive). Empty = all records in window." },
670
+ device_id: { type: "string", description: "Restrict to one device. Get the ID from team-devices." },
671
+ app_name: { type: "string", description: "Restrict to records whose app_name equals this (case-insensitive)." },
672
+ since: { type: "string", description: "ISO 8601 lower bound. Default = now - 24h." },
673
+ until: { type: "string", description: "ISO 8601 upper bound. Default = now." },
674
+ since_hours_ago: { type: "integer", description: "Convenience: equivalent to since=now-N*h." },
675
+ limit: { type: "integer", description: "Max records (default 50, max 200).", default: 50 },
676
+ },
677
+ },
678
+ },
679
+ {
680
+ name: "team-devices",
681
+ description:
682
+ "List all devices enrolled under this org's license — hostname, OS, " +
683
+ "app version, last-seen timestamp. Use to discover device IDs to pass " +
684
+ "to team-search or team-records, or to spot stale machines.",
685
+ annotations: { title: "Team Devices", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
686
+ inputSchema: { type: "object", properties: {} },
687
+ },
688
+ {
689
+ name: "team-records",
690
+ description:
691
+ "Chronological raw dump of the org's telemetry for a time window. " +
692
+ "Returns oldest → newest (vs team-search which is recency-ranked). " +
693
+ "Use for ETL or \"walk me through X from Y to Z\" — NOT for question-answering, use team-search for that. " +
694
+ "Auth: enterprise admin token.",
695
+ annotations: { title: "Team Records", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
696
+ inputSchema: {
697
+ type: "object",
698
+ properties: {
699
+ device_id: { type: "string", description: "Restrict to one device (optional)." },
700
+ kind: { type: "string", enum: ["frame", "audio", "all"], description: "Record kind filter. Default: all.", default: "all" },
701
+ since: { type: "string", description: "ISO 8601 lower bound." },
702
+ until: { type: "string", description: "ISO 8601 upper bound." },
703
+ since_hours_ago: { type: "integer", description: "Convenience: equivalent to since=now-N*h." },
704
+ limit: { type: "integer", description: "Max records (default 50, max 200).", default: 50 },
705
+ },
706
+ },
707
+ },
708
+ ];
709
+
600
710
  server.setRequestHandler(ListToolsRequestSchema, async () => {
601
- return { tools: TOOLS };
711
+ // Team tools only surface when an enterprise token was discovered at boot.
712
+ // No token = consumer / non-admin user; their MCP looks identical to today.
713
+ const tools = TEAM_TOKEN ? [...TOOLS, ...TEAM_TOOLS] : TOOLS;
714
+ return { tools };
602
715
  });
603
716
 
604
717
  // ---------------------------------------------------------------------------
@@ -685,7 +798,7 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
685
798
  - **Use max_content_length=500** to keep responses compact
686
799
  - **Don't use q for audio** — transcriptions are noisy, q filters too aggressively. Search audio by time range and speaker instead
687
800
  - **app_name is case-sensitive** — use exact names: "Google Chrome" not "chrome"
688
- - **content_type=accessibility is preferred** for screen text (OS-native). ocr is fallback for apps without accessibility support
801
+ - **Screen text is mostly accessibility-derived, not OCR.** Screenpipe walks the OS accessibility tree first; OCR is only a fallback (terminals, canvas-rendered apps, games). \`content_type=ocr\` returns both paths the result label \`[Screen·a11y]\` vs \`[Screen·ocr]\` tells you which produced the row. Don't pre-filter to a11y/ocr unless you specifically need one or the other
689
802
 
690
803
  ## Common Patterns
691
804
 
@@ -711,21 +824,154 @@ Never fabricate IDs or timestamps — only use values from actual results.
711
824
  });
712
825
 
713
826
  // ---------------------------------------------------------------------------
714
- // Helper
827
+ // Helpers
715
828
  // ---------------------------------------------------------------------------
829
+
830
+ // Thrown by fetchAPI / callAPI when the backend is unreachable. Caught in the
831
+ // tool dispatcher to surface an actionable hint ("backend not running")
832
+ // instead of the opaque "fetch failed" the model used to see.
833
+ class BackendDownError extends Error {
834
+ constructor(public readonly cause: unknown) {
835
+ super(
836
+ `screenpipe backend not running on ${SCREENPIPE_API}. ` +
837
+ `Start it with \`screenpipe\` in a terminal, or open the screenpipe desktop app.`,
838
+ );
839
+ this.name = "BackendDownError";
840
+ }
841
+ }
842
+
843
+ // Thrown when the backend returns a non-2xx. Carries the server's response
844
+ // body so the dispatcher can include it in the user-visible error message.
845
+ class BackendHttpError extends Error {
846
+ constructor(
847
+ public readonly status: number,
848
+ public readonly bodyText: string,
849
+ endpoint: string,
850
+ ) {
851
+ let hint = "";
852
+ if (status === 401 || status === 403) {
853
+ hint =
854
+ " — API key not accepted. Set SCREENPIPE_LOCAL_API_KEY in your MCP " +
855
+ "launcher env, or install the screenpipe desktop app so the MCP can " +
856
+ "discover the key automatically.";
857
+ } else if (status === 404) {
858
+ hint =
859
+ " — endpoint not found. The backend may be on a different version than this MCP.";
860
+ } else if (status === 400) {
861
+ hint = " — bad request. Check argument names and types against the tool schema.";
862
+ } else if (status >= 500) {
863
+ hint = " — backend error. Check screenpipe logs.";
864
+ }
865
+ const trimmed = bodyText.trim().slice(0, 300);
866
+ const bodyPart = trimmed ? ` body: ${trimmed}` : "";
867
+ super(`HTTP ${status} from ${endpoint}${hint}${bodyPart}`);
868
+ this.name = "BackendHttpError";
869
+ }
870
+ }
871
+
716
872
  async function fetchAPI(
717
873
  endpoint: string,
718
874
  options: RequestInit = {}
719
875
  ): Promise<Response> {
720
876
  const url = `${SCREENPIPE_API}${endpoint}`;
721
- return fetch(url, {
722
- ...options,
723
- headers: {
724
- "Content-Type": "application/json",
725
- ...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
726
- ...options.headers,
727
- },
728
- });
877
+ try {
878
+ return await fetch(url, {
879
+ ...options,
880
+ headers: {
881
+ "Content-Type": "application/json",
882
+ ...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
883
+ ...options.headers,
884
+ },
885
+ });
886
+ } catch (e) {
887
+ throw new BackendDownError(e);
888
+ }
889
+ }
890
+
891
+ // Wrap a fetchAPI call: throw BackendHttpError on non-2xx with body included.
892
+ // Use from handlers instead of `if (!response.ok) throw new Error(...)`.
893
+ async function callAPI(endpoint: string, options: RequestInit = {}): Promise<Response> {
894
+ const response = await fetchAPI(endpoint, options);
895
+ if (!response.ok) {
896
+ let body = "";
897
+ try {
898
+ body = await response.text();
899
+ } catch {
900
+ // body may not be readable; that's fine
901
+ }
902
+ throw new BackendHttpError(response.status, body, endpoint);
903
+ }
904
+ return response;
905
+ }
906
+
907
+ // Server's deserialize_flexible_datetime accepts ISO 8601 + "Nh ago" / "Nd ago"
908
+ // / "Nw ago" / "now". Models also try "yesterday", "today", and bare dates
909
+ // ("2026-05-17") — normalize those here so the request doesn't 400.
910
+ function normalizeTime(input: string | undefined): string | undefined {
911
+ if (!input) return input;
912
+ const s = input.trim();
913
+ if (!s) return input;
914
+ const lower = s.toLowerCase();
915
+ if (lower === "yesterday") return "1d ago";
916
+ if (lower === "today") {
917
+ return `${new Date().toISOString().split("T")[0]}T00:00:00Z`;
918
+ }
919
+ if (lower === "tomorrow") {
920
+ const t = new Date();
921
+ t.setUTCDate(t.getUTCDate() + 1);
922
+ return `${t.toISOString().split("T")[0]}T00:00:00Z`;
923
+ }
924
+ // Bare YYYY-MM-DD → start of day UTC
925
+ if (/^\d{4}-\d{2}-\d{2}$/.test(s)) return `${s}T00:00:00Z`;
926
+ return s;
927
+ }
928
+
929
+ // Apply normalizeTime to start_time/end_time fields in an args object.
930
+ // Returns a new object — does not mutate the input.
931
+ function normalizeTimeFields(
932
+ args: Record<string, unknown>,
933
+ ): Record<string, unknown> {
934
+ const out = { ...args };
935
+ for (const k of ["start_time", "end_time"] as const) {
936
+ if (typeof out[k] === "string") {
937
+ out[k] = normalizeTime(out[k] as string);
938
+ }
939
+ }
940
+ return out;
941
+ }
942
+
943
+ // Middle-truncate long strings: keep head + tail, mark the gap with how much
944
+ // was cut. Used to cap OCR/transcription text in search-content responses
945
+ // so a single call doesn't blow past Claude Code's per-tool output limit
946
+ // (one logged call returned 131k chars from a limit:10 search).
947
+ function truncateMiddle(text: string | null | undefined, max: number): string {
948
+ if (!text) return text ?? "";
949
+ if (max <= 0 || text.length <= max) return text;
950
+ const halfLeft = Math.floor(max / 2);
951
+ const halfRight = max - halfLeft;
952
+ const cut = text.length - max;
953
+ return (
954
+ text.slice(0, halfLeft) +
955
+ `…[${cut} chars truncated — pass max_content_length=0 for full text]…` +
956
+ text.slice(text.length - halfRight)
957
+ );
958
+ }
959
+
960
+ // Default per-result text cap for search-content when the caller didn't
961
+ // specify one. Tuned to keep limit=10 responses well under tool-output limits
962
+ // while still giving the model enough text to reason over.
963
+ const DEFAULT_SEARCH_CONTENT_TRUNCATE = 1000;
964
+
965
+ // Format the screen-text tag for a result. The server's `text_source` is
966
+ // "accessibility" (OS-native tree, primary path) or "ocr" (fallback for
967
+ // terminals, canvas, weak a11y). Older rows have no text_source, so we
968
+ // fall back to a bare `[Screen]`. The result type is historically called
969
+ // OCR in the engine but most captures are accessibility-derived — surface
970
+ // the actual source so the model picks filters correctly.
971
+ function screenTag(textSource: unknown): string {
972
+ if (textSource === "accessibility") return "[Screen·a11y]";
973
+ if (textSource === "ocr") return "[Screen·ocr]";
974
+ return "[Screen]";
729
975
  }
730
976
 
731
977
  // ---------------------------------------------------------------------------
@@ -742,16 +988,24 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
742
988
  switch (name) {
743
989
  case "search-content": {
744
990
  const includeFrames = args.include_frames === true;
991
+ const normalized = normalizeTimeFields(args);
992
+ // Default text cap if the caller didn't pass max_content_length.
993
+ // Keeps single calls under Claude Code's per-tool output limit.
994
+ const userCap = normalized.max_content_length;
995
+ const effectiveCap =
996
+ typeof userCap === "number"
997
+ ? userCap
998
+ : userCap === undefined
999
+ ? DEFAULT_SEARCH_CONTENT_TRUNCATE
1000
+ : Number(userCap);
745
1001
  const params = new URLSearchParams();
746
- for (const [key, value] of Object.entries(args)) {
1002
+ for (const [key, value] of Object.entries(normalized)) {
747
1003
  if (value !== null && value !== undefined) {
748
1004
  params.append(key, String(value));
749
1005
  }
750
1006
  }
751
1007
 
752
- const response = await fetchAPI(`/search?${params.toString()}`);
753
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
754
-
1008
+ const response = await callAPI(`/search?${params.toString()}`);
755
1009
  const data = await response.json();
756
1010
  const results = data.data || [];
757
1011
  const pagination = data.pagination || {};
@@ -781,10 +1035,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
781
1035
 
782
1036
  if (result.type === "OCR") {
783
1037
  const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
1038
+ // result.type is "OCR" by historical naming, but content.text_source
1039
+ // tells us if the text actually came from the accessibility tree
1040
+ // (primary path) or OCR (fallback). Use it to label honestly.
1041
+ const tag = screenTag(content.text_source);
784
1042
  formattedResults.push(
785
- `[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
1043
+ `${tag} ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
786
1044
  `${content.timestamp || ""}\n` +
787
- `${content.text || ""}` +
1045
+ `${truncateMiddle(content.text || "", effectiveCap)}` +
788
1046
  tagsStr
789
1047
  );
790
1048
  if (includeFrames && content.frame) {
@@ -798,14 +1056,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
798
1056
  formattedResults.push(
799
1057
  `[Audio] ${content.device_name || "?"}\n` +
800
1058
  `${content.timestamp || ""}\n` +
801
- `${content.transcription || ""}` +
1059
+ `${truncateMiddle(content.transcription || "", effectiveCap)}` +
802
1060
  tagsStr
803
1061
  );
804
1062
  } else if (result.type === "UI" || result.type === "Accessibility") {
805
1063
  formattedResults.push(
806
1064
  `[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
807
1065
  `${content.timestamp || ""}\n` +
808
- `${content.text || ""}`
1066
+ `${truncateMiddle(content.text || "", effectiveCap)}`
809
1067
  );
810
1068
  } else if (result.type === "Memory") {
811
1069
  const tagsStr = content.tags?.length ? ` [${content.tags.join(", ")}]` : "";
@@ -814,7 +1072,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
814
1072
  formattedResults.push(
815
1073
  `[Memory #${content.id}]${tagsStr}${importance}\n` +
816
1074
  `${content.created_at || ""}\n` +
817
- `${content.content || ""}`
1075
+ `${truncateMiddle(content.content || "", effectiveCap)}`
818
1076
  );
819
1077
  }
820
1078
  }
@@ -839,15 +1097,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
839
1097
  }
840
1098
 
841
1099
  case "list-meetings": {
1100
+ const normalized = normalizeTimeFields(args);
842
1101
  const params = new URLSearchParams();
843
- for (const [key, value] of Object.entries(args)) {
1102
+ for (const [key, value] of Object.entries(normalized)) {
844
1103
  if (value !== null && value !== undefined) {
845
1104
  params.append(key, String(value));
846
1105
  }
847
1106
  }
848
1107
 
849
- const response = await fetchAPI(`/meetings?${params.toString()}`);
850
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1108
+ const response = await callAPI(`/meetings?${params.toString()}`);
851
1109
 
852
1110
  const meetings = await response.json();
853
1111
 
@@ -874,15 +1132,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
874
1132
  }
875
1133
 
876
1134
  case "activity-summary": {
1135
+ const normalized = normalizeTimeFields(args);
877
1136
  const params = new URLSearchParams();
878
- for (const [key, value] of Object.entries(args)) {
1137
+ for (const [key, value] of Object.entries(normalized)) {
879
1138
  if (value !== null && value !== undefined) {
880
1139
  params.append(key, String(value));
881
1140
  }
882
1141
  }
883
1142
 
884
- const response = await fetchAPI(`/activity-summary?${params.toString()}`);
885
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1143
+ const response = await callAPI(`/activity-summary?${params.toString()}`);
886
1144
 
887
1145
  const data = await response.json();
888
1146
 
@@ -957,15 +1215,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
957
1215
  }
958
1216
 
959
1217
  case "search-elements": {
1218
+ const normalized = normalizeTimeFields(args);
960
1219
  const params = new URLSearchParams();
961
- for (const [key, value] of Object.entries(args)) {
1220
+ for (const [key, value] of Object.entries(normalized)) {
962
1221
  if (value !== null && value !== undefined) {
963
1222
  params.append(key, String(value));
964
1223
  }
965
1224
  }
966
1225
 
967
- const response = await fetchAPI(`/elements?${params.toString()}`);
968
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1226
+ const response = await callAPI(`/elements?${params.toString()}`);
969
1227
 
970
1228
  const data = await response.json();
971
1229
  const elements = data.data || [];
@@ -1016,8 +1274,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1016
1274
  return { content: [{ type: "text", text: "Error: frame_id is required" }] };
1017
1275
  }
1018
1276
 
1019
- const response = await fetchAPI(`/frames/${frameId}/context`);
1020
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1277
+ const response = await callAPI(`/frames/${frameId}/context`);
1021
1278
 
1022
1279
  const data = await response.json();
1023
1280
  const lines = [`Frame ${data.frame_id} (source: ${data.text_source})`];
@@ -1047,8 +1304,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1047
1304
  }
1048
1305
 
1049
1306
  case "export-video": {
1050
- const startTime = args.start_time as string;
1051
- const endTime = args.end_time as string;
1307
+ const startTime = normalizeTime(args.start_time as string);
1308
+ const endTime = normalizeTime(args.end_time as string);
1052
1309
  const fps = (args.fps as number) || 1.0;
1053
1310
 
1054
1311
  if (!startTime || !endTime) {
@@ -1065,11 +1322,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1065
1322
  limit: "10000",
1066
1323
  });
1067
1324
 
1068
- const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
1069
- if (!searchResponse.ok) {
1070
- throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
1071
- }
1072
-
1325
+ const searchResponse = await callAPI(`/search?${searchParams.toString()}`);
1073
1326
  const searchData = await searchResponse.json();
1074
1327
  const results = searchData.data || [];
1075
1328
 
@@ -1188,9 +1441,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1188
1441
 
1189
1442
  case "update-memory": {
1190
1443
  if (args.delete && args.id) {
1191
- const response = await fetchAPI(`/memories/${args.id}`, { method: "DELETE" });
1192
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1193
- return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
1444
+ const response = await callAPI(`/memories/${args.id}`, { method: "DELETE" });
1445
+ return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
1194
1446
  }
1195
1447
  if (args.id) {
1196
1448
  const body: Record<string, unknown> = {};
@@ -1198,12 +1450,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1198
1450
  if (args.tags !== undefined) body.tags = args.tags;
1199
1451
  if (args.importance !== undefined) body.importance = args.importance;
1200
1452
  if (args.source_context !== undefined) body.source_context = args.source_context;
1201
- const response = await fetchAPI(`/memories/${args.id}`, {
1453
+ const response = await callAPI(`/memories/${args.id}`, {
1202
1454
  method: "PUT",
1203
1455
  body: JSON.stringify(body),
1204
1456
  });
1205
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1206
- const memory = await response.json();
1457
+ const memory = await response.json();
1207
1458
  return {
1208
1459
  content: [{ type: "text", text: `Memory ${memory.id} updated: "${memory.content}"` }],
1209
1460
  };
@@ -1220,11 +1471,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1220
1471
  importance: args.importance ?? 0.5,
1221
1472
  };
1222
1473
  if (args.source_context) memoryBody.source_context = args.source_context;
1223
- const memoryResponse = await fetchAPI("/memories", {
1474
+ const memoryResponse = await callAPI("/memories", {
1224
1475
  method: "POST",
1225
1476
  body: JSON.stringify(memoryBody),
1226
1477
  });
1227
- if (!memoryResponse.ok) throw new Error(`HTTP error: ${memoryResponse.status}`);
1228
1478
  const newMemory = await memoryResponse.json();
1229
1479
  return {
1230
1480
  content: [
@@ -1241,12 +1491,26 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1241
1491
  };
1242
1492
  if (args.timeout_secs) notifBody.timeout = Number(args.timeout_secs) * 1000;
1243
1493
  if (args.actions) notifBody.actions = args.actions;
1244
- const notifResponse = await fetch("http://localhost:11435/notify", {
1245
- method: "POST",
1246
- headers: { "Content-Type": "application/json" },
1247
- body: JSON.stringify(notifBody),
1248
- });
1249
- if (!notifResponse.ok) throw new Error(`HTTP error: ${notifResponse.status}`);
1494
+ // send-notification hits the desktop notify daemon on a separate port
1495
+ // (11435), not the screenpipe API. Keep direct fetch with friendlier
1496
+ // error so the model sees an actionable message if the daemon's down.
1497
+ let notifResponse: Response;
1498
+ try {
1499
+ notifResponse = await fetch("http://localhost:11435/notify", {
1500
+ method: "POST",
1501
+ headers: { "Content-Type": "application/json" },
1502
+ body: JSON.stringify(notifBody),
1503
+ });
1504
+ } catch (e) {
1505
+ throw new Error(
1506
+ "notification daemon not reachable on localhost:11435 — is the screenpipe desktop app running?",
1507
+ );
1508
+ }
1509
+ if (!notifResponse.ok) {
1510
+ let body = "";
1511
+ try { body = await notifResponse.text(); } catch {}
1512
+ throw new Error(`notify daemon HTTP ${notifResponse.status}${body ? `: ${body.slice(0, 200)}` : ""}`);
1513
+ }
1250
1514
  const notifResult = await notifResponse.json();
1251
1515
  return {
1252
1516
  content: [{ type: "text", text: `Notification sent: ${notifResult.message}` }],
@@ -1254,8 +1518,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1254
1518
  }
1255
1519
 
1256
1520
  case "health-check": {
1257
- const response = await fetchAPI("/health");
1258
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1521
+ const response = await callAPI("/health");
1259
1522
  const data = await response.json();
1260
1523
  return {
1261
1524
  content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
@@ -1263,8 +1526,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1263
1526
  }
1264
1527
 
1265
1528
  case "list-audio-devices": {
1266
- const response = await fetchAPI("/audio/list");
1267
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1529
+ const response = await callAPI("/audio/list");
1268
1530
  const devices = await response.json();
1269
1531
  if (!Array.isArray(devices) || devices.length === 0) {
1270
1532
  return { content: [{ type: "text", text: "No audio devices found." }] };
@@ -1279,8 +1541,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1279
1541
  }
1280
1542
 
1281
1543
  case "list-monitors": {
1282
- const response = await fetchAPI("/vision/list");
1283
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1544
+ const response = await callAPI("/vision/list");
1284
1545
  const monitors = await response.json();
1285
1546
  if (!Array.isArray(monitors) || monitors.length === 0) {
1286
1547
  return { content: [{ type: "text", text: "No monitors found." }] };
@@ -1301,11 +1562,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1301
1562
  if (!contentType || !id || !tags) {
1302
1563
  return { content: [{ type: "text", text: "Error: content_type, id, and tags are required" }] };
1303
1564
  }
1304
- const response = await fetchAPI(`/tags/${contentType}/${id}`, {
1565
+ const response = await callAPI(`/tags/${contentType}/${id}`, {
1305
1566
  method: "POST",
1306
1567
  body: JSON.stringify({ tags }),
1307
1568
  });
1308
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1309
1569
  return {
1310
1570
  content: [{ type: "text", text: `Tags added to ${contentType}/${id}: ${tags.join(", ")}` }],
1311
1571
  };
@@ -1316,8 +1576,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1316
1576
  if (!nameQuery) {
1317
1577
  return { content: [{ type: "text", text: "Error: name is required" }] };
1318
1578
  }
1319
- const response = await fetchAPI(`/speakers/search?name=${encodeURIComponent(nameQuery)}`);
1320
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1579
+ const response = await callAPI(`/speakers/search?name=${encodeURIComponent(nameQuery)}`);
1321
1580
  const speakers = await response.json();
1322
1581
  if (!Array.isArray(speakers) || speakers.length === 0) {
1323
1582
  return { content: [{ type: "text", text: "No speakers found." }] };
@@ -1334,8 +1593,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1334
1593
  case "list-unnamed-speakers": {
1335
1594
  const limit = (args.limit as number) || 10;
1336
1595
  const offset = (args.offset as number) || 0;
1337
- const response = await fetchAPI(`/speakers/unnamed?limit=${limit}&offset=${offset}`);
1338
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1596
+ const response = await callAPI(`/speakers/unnamed?limit=${limit}&offset=${offset}`);
1339
1597
  const speakers = await response.json();
1340
1598
  if (!Array.isArray(speakers) || speakers.length === 0) {
1341
1599
  return { content: [{ type: "text", text: "No unnamed speakers found." }] };
@@ -1356,11 +1614,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1356
1614
  const body: Record<string, unknown> = { id: speakerId };
1357
1615
  if (args.name !== undefined) body.name = args.name;
1358
1616
  if (args.metadata !== undefined) body.metadata = args.metadata;
1359
- const response = await fetchAPI("/speakers/update", {
1617
+ const response = await callAPI("/speakers/update", {
1360
1618
  method: "POST",
1361
1619
  body: JSON.stringify(body),
1362
1620
  });
1363
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1364
1621
  return {
1365
1622
  content: [{ type: "text", text: `Speaker ${speakerId} updated.` }],
1366
1623
  };
@@ -1372,11 +1629,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1372
1629
  if (!keepId || !mergeId) {
1373
1630
  return { content: [{ type: "text", text: "Error: speaker_to_keep_id and speaker_to_merge_id are required" }] };
1374
1631
  }
1375
- const response = await fetchAPI("/speakers/merge", {
1632
+ const response = await callAPI("/speakers/merge", {
1376
1633
  method: "POST",
1377
1634
  body: JSON.stringify({ speaker_to_keep_id: keepId, speaker_to_merge_id: mergeId }),
1378
1635
  });
1379
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1380
1636
  return {
1381
1637
  content: [{ type: "text", text: `Merged speaker ${mergeId} into ${keepId}.` }],
1382
1638
  };
@@ -1387,11 +1643,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1387
1643
  if (args.app) body.app = args.app;
1388
1644
  if (args.title) body.title = args.title;
1389
1645
  if (args.attendees) body.attendees = args.attendees;
1390
- const response = await fetchAPI("/meetings/start", {
1646
+ const response = await callAPI("/meetings/start", {
1391
1647
  method: "POST",
1392
1648
  body: JSON.stringify(body),
1393
1649
  });
1394
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1395
1650
  const meeting = await response.json();
1396
1651
  return {
1397
1652
  content: [{ type: "text", text: `Meeting started (id: ${meeting.id || "ok"}).` }],
@@ -1399,8 +1654,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1399
1654
  }
1400
1655
 
1401
1656
  case "stop-meeting": {
1402
- const response = await fetchAPI("/meetings/stop", { method: "POST" });
1403
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1657
+ const response = await callAPI("/meetings/stop", { method: "POST" });
1404
1658
  return {
1405
1659
  content: [{ type: "text", text: "Meeting stopped." }],
1406
1660
  };
@@ -1411,8 +1665,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1411
1665
  if (!meetingId) {
1412
1666
  return { content: [{ type: "text", text: "Error: id is required" }] };
1413
1667
  }
1414
- const response = await fetchAPI(`/meetings/${meetingId}`);
1415
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1668
+ const response = await callAPI(`/meetings/${meetingId}`);
1416
1669
  const meeting = await response.json();
1417
1670
  return {
1418
1671
  content: [{ type: "text", text: JSON.stringify(meeting, null, 2) }],
@@ -1439,12 +1692,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1439
1692
  ],
1440
1693
  };
1441
1694
  }
1442
- const response = await fetchAPI(`/meetings/${meetingId}`, {
1695
+ const response = await callAPI(`/meetings/${meetingId}`, {
1443
1696
  method: "PATCH",
1444
1697
  headers: { "Content-Type": "application/json" },
1445
1698
  body: JSON.stringify(body),
1446
1699
  });
1447
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1448
1700
  const updated = await response.json();
1449
1701
  return {
1450
1702
  content: [{ type: "text", text: JSON.stringify(updated, null, 2) }],
@@ -1452,22 +1704,52 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1452
1704
  }
1453
1705
 
1454
1706
  case "keyword-search": {
1455
- const params = new URLSearchParams();
1456
- for (const [key, value] of Object.entries(args)) {
1457
- if (value !== null && value !== undefined) {
1458
- params.append(key, String(value));
1459
- }
1707
+ // Translate model-facing arg names to what the engine actually
1708
+ // accepts (KeywordSearchRequest in routes/search.rs):
1709
+ // q -> query (mandatory; the field is literally named `query`)
1710
+ // app_name -> app_names (comma-separated; serde splits it)
1711
+ // content_type: dropped — the keyword endpoint doesn't filter by type.
1712
+ // It searches OCR + audio together via the FTS index.
1713
+ // Without these mappings every keyword-search request 400s (and used
1714
+ // to: in logs, 25/25 calls failed before this fix).
1715
+ const queryStr = (args.query as string) ?? (args.q as string);
1716
+ if (!queryStr) {
1717
+ return {
1718
+ content: [{ type: "text", text: "Error: 'q' (search query) is required" }],
1719
+ };
1460
1720
  }
1461
- const response = await fetchAPI(`/search/keyword?${params.toString()}`);
1462
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1721
+ const normalized = normalizeTimeFields(args);
1722
+ const params = new URLSearchParams();
1723
+ params.append("query", queryStr);
1724
+ if (normalized.start_time) params.append("start_time", String(normalized.start_time));
1725
+ if (normalized.end_time) params.append("end_time", String(normalized.end_time));
1726
+ if (normalized.limit !== undefined) params.append("limit", String(normalized.limit));
1727
+ if (normalized.offset !== undefined) params.append("offset", String(normalized.offset));
1728
+ if (normalized.app_name) params.append("app_names", String(normalized.app_name));
1729
+ if (normalized.app_names) params.append("app_names", String(normalized.app_names));
1730
+ if (args.fuzzy_match !== undefined) params.append("fuzzy_match", String(args.fuzzy_match));
1731
+ const response = await callAPI(`/search/keyword?${params.toString()}`);
1463
1732
  const data = await response.json();
1464
- const results = data.data || [];
1733
+ // /search/keyword returns a bare array (Vec<KeywordSearchMatch> from
1734
+ // routes/search.rs), not the {data, pagination} shape /search uses.
1735
+ // The old `data.data || []` always lost results.
1736
+ const results: Array<Record<string, unknown>> = Array.isArray(data)
1737
+ ? data
1738
+ : (data.data ?? []);
1465
1739
  if (results.length === 0) {
1466
1740
  return { content: [{ type: "text", text: "No keyword search results found." }] };
1467
1741
  }
1468
- const formatted = results.map((r: Record<string, unknown>) => {
1469
- const content = r.content as Record<string, unknown> | undefined;
1470
- return `[${r.type}] ${content?.app_name || "?"} | ${content?.timestamp || ""}\n${content?.text || content?.transcription || ""}`;
1742
+ const formatted = results.map((r) => {
1743
+ // Flat shape from search_with_text_positions: { app_name, frame_id,
1744
+ // timestamp, text, text_source, ... }. Truncate to keep responses
1745
+ // under tool-output limits. text_source is "accessibility" (primary)
1746
+ // or "ocr" (fallback) — show it so the model knows which path hit.
1747
+ const text = (r.text as string) || (r.transcription as string) || "";
1748
+ const tag = screenTag(r.text_source);
1749
+ return (
1750
+ `${tag} [frame:${r.frame_id ?? "?"}] ${r.app_name ?? "?"} | ${r.timestamp ?? ""}\n` +
1751
+ truncateMiddle(text, DEFAULT_SEARCH_CONTENT_TRUNCATE)
1752
+ );
1471
1753
  });
1472
1754
  return {
1473
1755
  content: [{ type: "text", text: `Results: ${results.length}\n\n${formatted.join("\n---\n")}` }],
@@ -1479,8 +1761,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1479
1761
  if (!frameId) {
1480
1762
  return { content: [{ type: "text", text: "Error: frame_id is required" }] };
1481
1763
  }
1482
- const response = await fetchAPI(`/frames/${frameId}/elements`);
1483
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1764
+ const response = await callAPI(`/frames/${frameId}/elements`);
1484
1765
  const elements = await response.json();
1485
1766
  if (!Array.isArray(elements) || elements.length === 0) {
1486
1767
  return { content: [{ type: "text", text: `No elements found for frame ${frameId}.` }] };
@@ -1507,19 +1788,69 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1507
1788
  else {
1508
1789
  return { content: [{ type: "text", text: `Error: unknown action '${action}'` }] };
1509
1790
  }
1510
- const response = await fetchAPI(endpoint, { method: "POST" });
1511
- if (!response.ok) throw new Error(`HTTP error: ${response.status}`);
1791
+ await callAPI(endpoint, { method: "POST" });
1512
1792
  return {
1513
1793
  content: [{ type: "text", text: `Recording action '${action}' executed.` }],
1514
1794
  };
1515
1795
  }
1516
1796
 
1797
+ // ---------------------------------------------------------------------
1798
+ // Enterprise team tools — only callable when TEAM_TOKEN is set at boot.
1799
+ // If we got this far without one, the tool wasn't in the listed set the
1800
+ // host saw, but a misbehaving client could still try to call it. Fail
1801
+ // loudly so the host surfaces the misconfiguration.
1802
+ // ---------------------------------------------------------------------
1803
+ case "team-search":
1804
+ case "team-devices":
1805
+ case "team-records": {
1806
+ if (!TEAM_TOKEN) {
1807
+ return {
1808
+ content: [
1809
+ {
1810
+ type: "text",
1811
+ text:
1812
+ `team-* tools require an enterprise admin token. Set ` +
1813
+ `SCREENPIPE_ENTERPRISE_TOKEN in your MCP env, or mint one ` +
1814
+ `at https://screenpi.pe/enterprise → API Tokens and paste ` +
1815
+ `it into Settings → Privacy → Admin Team API Token in the ` +
1816
+ `screenpipe desktop app.`,
1817
+ },
1818
+ ],
1819
+ };
1820
+ }
1821
+ // Map MCP tool name → /api/enterprise/v1 path
1822
+ const subpath =
1823
+ name === "team-search" ? "/search"
1824
+ : name === "team-devices" ? "/devices"
1825
+ : "/records";
1826
+ // Forward every primitive arg as a query param. The server validates;
1827
+ // unknown params are ignored, so we don't need to gatekeep here.
1828
+ const params = new URLSearchParams();
1829
+ for (const [k, v] of Object.entries(args)) {
1830
+ if (v !== null && v !== undefined && v !== "") {
1831
+ params.append(k, String(v));
1832
+ }
1833
+ }
1834
+ const query = params.toString();
1835
+ const response = await fetchTeam(`${subpath}${query ? `?${query}` : ""}`);
1836
+ const body = await response.text();
1837
+ if (!response.ok) {
1838
+ throw new Error(
1839
+ `${name} failed: HTTP ${response.status} ${response.statusText} — ${body.slice(0, 300)}`
1840
+ );
1841
+ }
1842
+ return { content: [{ type: "text", text: body }] };
1843
+ }
1844
+
1517
1845
  default:
1518
1846
  throw new Error(`Unknown tool: ${name}`);
1519
1847
  }
1520
1848
  } catch (error) {
1521
1849
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
1850
+ // isError flags the result as a failure so the model retries with a
1851
+ // different approach instead of treating the error text as data.
1522
1852
  return {
1853
+ isError: true,
1523
1854
  content: [{ type: "text", text: `Error executing ${name}: ${errorMessage}` }],
1524
1855
  };
1525
1856
  }