screenpipe-mcp 0.18.1 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +499 -194
  2. package/package.json +1 -1
  3. package/src/index.ts +509 -178
package/dist/index.js CHANGED
@@ -56,16 +56,20 @@ const SCREENPIPE_API = `http://localhost:${port}`;
56
56
  // Discover the local API key, in priority order:
57
57
  //
58
58
  // 1. env vars set by the launcher (Claude Desktop config, terminal, etc.)
59
- // 2. direct sqlite3 read of ~/.screenpipe/db.sqlite (plaintext entries only
60
- // encrypted ones need keychain, handled by 3+)
61
- // 3. bundled `bun` shipped with the desktop app → `bun x screenpipe@latest auth token`
62
- // this is the kill-shot for Claude-Desktop-via-MCP: Claude strips PATH so
63
- // `npx` and `sqlite3` lookups fail, but the desktop app's bundled bun is
64
- // at a deterministic path. We invoke it with an absolute path, which
65
- // then runs the screenpipe CLI's `auth token` command — which goes
66
- // through `find_api_auth_key` (handles the encrypted-secret-store case).
67
- // 4. node-adjacent npx (legacy fallback for users without the desktop app)
68
- // 5. PATH-based npx (very last resort)
59
+ // 2. CLI via bundled `bun` from screenpipe.app at a deterministic absolute
60
+ // path. Runs `bun x screenpipe@latest auth token` → goes through the
61
+ // Rust CLI's `find_api_auth_key` resolver, which handles the encrypted
62
+ // keychain-backed secret store. This is the canonical path: same
63
+ // contract as `screenpipe auth token` in a terminal, no PATH needed.
64
+ // 3. CLI via node-adjacent npx for dev environments that have node but
65
+ // not the desktop app.
66
+ // 4. CLI via PATH-based npx — last CLI fallback.
67
+ // 5. Direct sqlite3 read of ~/.screenpipe/db.sqlite plaintext entries
68
+ // only (encrypted entries need the keychain, which only the CLI can
69
+ // reach). Kept as a final last-resort for users who have screenpipe
70
+ // *data* but no working CLI install (rare). Demoted below the CLI
71
+ // paths because it reimplements logic that lives in `auth_key.rs` and
72
+ // can silently drift on storage-format changes.
69
73
  //
70
74
  // If all 5 miss we log a loud stderr warning so it surfaces in the host's
71
75
  // MCP log instead of the user just seeing 403s with no explanation.
@@ -81,55 +85,13 @@ function discoverApiKey() {
81
85
  const fs = require("fs");
82
86
  // eslint-disable-next-line @typescript-eslint/no-var-requires
83
87
  const { execFileSync, execSync } = require("child_process");
84
- // Common absolute paths for `sqlite3`. Claude Desktop's MCP launcher
85
- // strips PATH so the bare command name `sqlite3` would fail spawn
86
- // even though `/usr/bin/sqlite3` is always present on macOS. Try the
87
- // bare name first (cheap; works on dev machines with a normal shell)
88
- // then walk known absolute paths.
89
- const sqliteCandidates = process.platform === "win32"
90
- ? ["sqlite3.exe", "C\\:Windows\\System32\\sqlite3.exe"]
91
- : process.platform === "darwin"
92
- ? ["sqlite3", "/usr/bin/sqlite3", "/opt/homebrew/bin/sqlite3", "/usr/local/bin/sqlite3"]
93
- : ["sqlite3", "/usr/bin/sqlite3", "/usr/local/bin/sqlite3"];
94
- // 2. Direct sqlite3 read of the secret store. Only succeeds for
95
- // plaintext entries (nonce all zeros). Encrypted entries fall
96
- // through to the CLI path which can decrypt via keychain.
97
- try {
98
- const dbPath = path.join(os.homedir(), ".screenpipe", "db.sqlite");
99
- if (fs.existsSync(dbPath)) {
100
- let row = null;
101
- for (const candidate of sqliteCandidates) {
102
- try {
103
- row = execFileSync(candidate, [dbPath, "SELECT hex(nonce), value FROM secrets WHERE key = 'api_auth_key';"], { timeout: 5000, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
104
- break;
105
- }
106
- catch {
107
- // try next candidate
108
- }
109
- }
110
- if (row) {
111
- const sepIdx = row.indexOf("|");
112
- const nonceHex = sepIdx >= 0 ? row.substring(0, sepIdx) : "";
113
- const value = sepIdx >= 0 ? row.substring(sepIdx + 1) : row;
114
- const isPlaintext = !nonceHex || /^0+$/.test(nonceHex);
115
- if (isPlaintext && value) {
116
- const decoded = Buffer.from(value, "base64").toString("utf-8");
117
- if (decoded && decoded.startsWith("sp-"))
118
- return decoded;
119
- if (value.startsWith("sp-"))
120
- return value;
121
- }
122
- // Non-zero nonce = encrypted — fall through to bun/npx which decrypt via keychain.
123
- }
124
- }
125
- }
126
- catch { }
127
- // 3. Bundled `bun` shipped with the desktop app. The Tauri externalBin
128
- // config (apps/screenpipe-app-tauri/src-tauri/tauri.prod.conf.json)
129
- // places it next to the main app executable; on each OS the install
130
- // path is deterministic so we don't need PATH or current_exe — both
131
- // of which Claude Desktop's MCP launcher rolls back.
132
88
  const home = os.homedir();
89
+ // 2. CLI via bundled `bun` shipped with the desktop app. The Tauri
90
+ // externalBin config places `bun` next to the main app exe at a
91
+ // deterministic install path on each OS, so we don't need PATH —
92
+ // which Claude Desktop's MCP launcher strips. The CLI's `auth
93
+ // token` goes through `find_api_auth_key` and decrypts via
94
+ // keychain when needed.
133
95
  const bunCandidates = process.platform === "darwin"
134
96
  ? [
135
97
  // Standard system-wide install
@@ -168,9 +130,8 @@ function discoverApiKey() {
168
130
  // try next candidate
169
131
  }
170
132
  }
171
- // 4. npx adjacent to the running node works in dev environments
172
- // where the user installed @screenpipe/mcp via npx without the
173
- // desktop app.
133
+ // 3. CLI via npx adjacent to the running node. Works for dev
134
+ // environments without the desktop app.
174
135
  try {
175
136
  const npxName = process.platform === "win32" ? "npx.cmd" : "npx";
176
137
  const npxPath = path.join(path.dirname(process.execPath), npxName);
@@ -185,8 +146,8 @@ function discoverApiKey() {
185
146
  }
186
147
  }
187
148
  catch { }
188
- // 5. PATH-based npx last-ditch. Will fail under Claude Desktop's
189
- // sanitized env; useful only on raw shells.
149
+ // 4. CLI via PATH-based npx. Last CLI try; works on raw shells with
150
+ // npx on PATH.
190
151
  try {
191
152
  const token = execSync("npx screenpipe@latest auth token", {
192
153
  timeout: 30000,
@@ -197,15 +158,55 @@ function discoverApiKey() {
197
158
  return token;
198
159
  }
199
160
  catch { }
161
+ // 5. Direct sqlite3 read of the secret store (last-resort). Plaintext
162
+ // entries only — encrypted ones live behind the keychain, which the
163
+ // CLI paths above already cover. Used when the user has screenpipe
164
+ // data on disk but no working CLI install.
165
+ const sqliteCandidates = process.platform === "win32"
166
+ ? ["sqlite3.exe", "C:\\Windows\\System32\\sqlite3.exe"]
167
+ : process.platform === "darwin"
168
+ ? ["sqlite3", "/usr/bin/sqlite3", "/opt/homebrew/bin/sqlite3", "/usr/local/bin/sqlite3"]
169
+ : ["sqlite3", "/usr/bin/sqlite3", "/usr/local/bin/sqlite3"];
170
+ try {
171
+ const dbPath = path.join(home, ".screenpipe", "db.sqlite");
172
+ if (fs.existsSync(dbPath)) {
173
+ let row = null;
174
+ for (const candidate of sqliteCandidates) {
175
+ try {
176
+ row = execFileSync(candidate, [dbPath, "SELECT hex(nonce), value FROM secrets WHERE key = 'api_auth_key';"], { timeout: 5000, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
177
+ break;
178
+ }
179
+ catch {
180
+ // try next candidate
181
+ }
182
+ }
183
+ if (row) {
184
+ const sepIdx = row.indexOf("|");
185
+ const nonceHex = sepIdx >= 0 ? row.substring(0, sepIdx) : "";
186
+ const value = sepIdx >= 0 ? row.substring(sepIdx + 1) : row;
187
+ const isPlaintext = !nonceHex || /^0+$/.test(nonceHex);
188
+ if (isPlaintext && value) {
189
+ const decoded = Buffer.from(value, "base64").toString("utf-8");
190
+ if (decoded && decoded.startsWith("sp-"))
191
+ return decoded;
192
+ if (value.startsWith("sp-"))
193
+ return value;
194
+ }
195
+ // Encrypted — only the CLI paths above can decrypt this; we
196
+ // already tried them.
197
+ }
198
+ }
199
+ }
200
+ catch { }
200
201
  // All five paths missed. Log loudly to stderr so the host's MCP
201
202
  // panel surfaces this instead of the user seeing cryptic 403s from
202
203
  // the screenpipe server on every tool call.
203
204
  process.stderr.write([
204
205
  "[screenpipe-mcp] could not discover SCREENPIPE_LOCAL_API_KEY from any source.",
205
206
  " - env vars (SCREENPIPE_LOCAL_API_KEY / SCREENPIPE_API_KEY) not set",
206
- " - direct sqlite3 read of ~/.screenpipe/db.sqlite failed",
207
207
  " - bundled `bun` from screenpipe.app not found at any known install path",
208
208
  " - npx fallback unavailable",
209
+ " - direct sqlite3 read of ~/.screenpipe/db.sqlite failed",
209
210
  "Fix: set SCREENPIPE_LOCAL_API_KEY in your MCP launcher's env block,",
210
211
  "or install the screenpipe desktop app (https://screenpi.pe).",
211
212
  "",
@@ -213,6 +214,49 @@ function discoverApiKey() {
213
214
  return "";
214
215
  }
215
216
  const API_KEY = discoverApiKey();
217
+ // Enterprise team token — when present, this MCP additionally registers
218
+ // `team-*` tools that query the org-wide telemetry control plane
219
+ // (https://screenpi.pe/api/enterprise/v1/*) instead of just the local
220
+ // recordings. Same audience: an enterprise admin running screenpipe-mcp
221
+ // inside Claude Desktop / Cursor / Windsurf wants to ask "what did MY
222
+ // machine do" AND "what did MY TEAM do" without juggling two MCPs.
223
+ //
224
+ // Resolution order matches discoverApiKey() in spirit:
225
+ // 1. SCREENPIPE_ENTERPRISE_TOKEN env var (Claude config, terminal)
226
+ // 2. team_api_token field in ~/.screenpipe/enterprise.json (written by
227
+ // the desktop app's Settings → Privacy → Admin Team API Token)
228
+ //
229
+ // Token format is `sk_ent_…`. Empty / missing → team tools are not
230
+ // registered; non-admin users of screenpipe-mcp see exactly what they
231
+ // see today.
232
+ function discoverTeamToken() {
233
+ const envTok = process.env.SCREENPIPE_ENTERPRISE_TOKEN;
234
+ if (envTok && envTok.startsWith("sk_ent_"))
235
+ return envTok;
236
+ try {
237
+ const entPath = path.join(os.homedir(), ".screenpipe", "enterprise.json");
238
+ if (fs.existsSync(entPath)) {
239
+ const raw = fs.readFileSync(entPath, "utf-8");
240
+ const parsed = JSON.parse(raw);
241
+ const tok = typeof parsed?.team_api_token === "string" ? parsed.team_api_token : "";
242
+ if (tok && tok.startsWith("sk_ent_"))
243
+ return tok;
244
+ }
245
+ }
246
+ catch { }
247
+ return "";
248
+ }
249
+ const TEAM_TOKEN = discoverTeamToken();
250
+ const TEAM_API = "https://screenpi.pe/api/enterprise/v1";
251
+ async function fetchTeam(p, init = {}) {
252
+ return fetch(`${TEAM_API}${p}`, {
253
+ ...init,
254
+ headers: {
255
+ Authorization: `Bearer ${TEAM_TOKEN}`,
256
+ ...(init.headers || {}),
257
+ },
258
+ });
259
+ }
216
260
  // Read version from package.json (single source of truth)
217
261
  // eslint-disable-next-line @typescript-eslint/no-var-requires
218
262
  const PKG_VERSION = require("../package.json").version;
@@ -232,11 +276,11 @@ const server = new index_js_1.Server({
232
276
  const TOOLS = [
233
277
  {
234
278
  name: "search-content",
235
- description: "Search screen text, audio transcriptions, input events, and memories. " +
236
- "Returns timestamped results with app context. " +
237
- "IMPORTANT: prefer activity-summary for broad questions ('what was I doing?'). " +
238
- "Use search-content only when you need specific text/content. " +
239
- "Start with limit=5, increase only if needed. Results can be large use max_content_length=500 to truncate.",
279
+ description: "Search screen text, audio transcriptions, input events, and memories. Returns timestamped results with app context. " +
280
+ "USE WHEN: you need the actual text/content of a moment — quotes, OCR snippets, transcript lines — or want to filter by speaker/window. " +
281
+ "DO NOT USE for: broad questions like 'what was I doing?' (use activity-summary, it pre-summarizes apps + windows + transcripts). " +
282
+ "Also DO NOT USE for: targeted UI controls (use search-elements). " +
283
+ "Start with limit=5, increase only if needed. Per-result text is auto-truncated to 1000 chars; pass max_content_length=0 to opt out, or a custom integer to override.",
240
284
  annotations: { title: "Search Content", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
241
285
  inputSchema: {
242
286
  type: "object",
@@ -248,14 +292,14 @@ const TOOLS = [
248
292
  content_type: {
249
293
  type: "string",
250
294
  enum: ["all", "ocr", "audio", "input", "accessibility", "memory"],
251
- description: "Filter by content type. 'accessibility' is preferred for screen text (OS-native). 'ocr' is fallback for apps without accessibility support. Default: 'all'.",
295
+ description: "Filter by content type. NOTE on screen text: 'ocr' is a legacy label — it returns ALL screen-text rows, which are accessibility-derived for most apps (the result tag [Screen·a11y] vs [Screen·ocr] tells you which). Use 'ocr' for screen text (covers both paths), 'audio' for transcriptions, 'input' for keyboard/mouse events, 'memory' for stored facts. Default: 'all'.",
252
296
  default: "all",
253
297
  },
254
298
  limit: { type: "integer", description: "Max results (default 10, max 20). Start with 5 for exploration.", default: 10 },
255
299
  offset: { type: "integer", description: "Pagination offset. Use when results say 'use offset=N for more'.", default: 0 },
256
300
  start_time: {
257
301
  type: "string",
258
- description: "ISO 8601 UTC or relative (e.g. '2h ago', '1d ago'). Always provide to avoid scanning entire history.",
302
+ description: "Accepted: ISO 8601 ('2024-01-15T10:00:00Z'), 'Nh ago' / 'Nd ago' / 'Nw ago', 'now', 'yesterday', 'today', or bare 'YYYY-MM-DD'. Always provide to avoid scanning entire history.",
259
303
  },
260
304
  end_time: {
261
305
  type: "string",
@@ -297,9 +341,9 @@ const TOOLS = [
297
341
  {
298
342
  name: "activity-summary",
299
343
  description: "Rich activity overview: app usage, window/tab titles with URLs and time spent, key text per context, audio transcriptions. " +
300
- "USE THIS FIRST for broad questions: 'what was I doing?', 'how long on X?', 'which apps?'. " +
301
- "The 'windows' field shows exactly what the user worked on (e.g. 'Debug crash issue 20 min', 'Stripe pricing page — 5 min'). " +
302
- "Usually sufficient without further searches.",
344
+ "USE WHEN: any broad question about what the user did — 'what was I doing?', 'how long on X?', 'which apps?', 'recap my morning'. " +
345
+ "This is almost always the right first call for time-range questionsusually sufficient without follow-up searches. " +
346
+ "DO NOT USE for: finding a specific keyword (use keyword-search) or a specific UI control (use search-elements).",
303
347
  annotations: { title: "Activity Summary", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
304
348
  inputSchema: {
305
349
  type: "object",
@@ -313,9 +357,9 @@ const TOOLS = [
313
357
  },
314
358
  {
315
359
  name: "search-elements",
316
- description: "Search UI elements (buttons, links, text fields) from the accessibility tree. " +
317
- "Lighter than search-content for targeted UI lookups. " +
318
- "Use when you need to find specific UI controls or page structure, not general content.",
360
+ description: "Search UI elements (buttons, links, text fields) from the accessibility tree, filterable by role. " +
361
+ "USE WHEN: you want a specific UI control or page-structure question — 'find every Submit button I saw', 'list the links in that page'. " +
362
+ "DO NOT USE for: general text/content (use search-content) or fast keyword lookup (use keyword-search).",
319
363
  annotations: { title: "Search Elements", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
320
364
  inputSchema: {
321
365
  type: "object",
@@ -554,19 +598,21 @@ const TOOLS = [
554
598
  },
555
599
  {
556
600
  name: "keyword-search",
557
- description: "Fast keyword search using FTS index. Faster than search-content for exact keyword matching. " +
558
- "Returns frame IDs and matched text.",
601
+ description: "Fast FTS5 keyword search across OCR + audio combined. Returns matches with frame_id, app, timestamp, and text positions. " +
602
+ "USE WHEN: you have a specific keyword/phrase and want the fastest hit-list (e.g. 'find every screen where I typed \"stripe\"'). " +
603
+ "DO NOT USE for: structured filters by content_type / speaker / window — this endpoint ignores those (use search-content instead). " +
604
+ "DO NOT USE for: broad questions like 'what was I doing' (use activity-summary).",
559
605
  annotations: { title: "Keyword Search", readOnlyHint: true, openWorldHint: false, idempotentHint: true },
560
606
  inputSchema: {
561
607
  type: "object",
562
608
  properties: {
563
- q: { type: "string", description: "Keyword search query" },
564
- content_type: { type: "string", enum: ["ocr", "audio", "all"], description: "Content type filter", default: "all" },
565
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
566
- end_time: { type: "string", description: "ISO 8601 UTC or relative" },
567
- app_name: { type: "string", description: "Filter by app name" },
609
+ q: { type: "string", description: "Keyword query (FTS5 syntax: quoted phrases, AND/OR, prefix*)" },
610
+ start_time: { type: "string", description: "ISO 8601 UTC, 'Nh ago' / 'Nd ago' / 'Nw ago', 'now', 'yesterday', 'today', or 'YYYY-MM-DD'" },
611
+ end_time: { type: "string", description: "Same formats as start_time" },
612
+ app_name: { type: "string", description: "Filter by exact app name (case-sensitive, e.g. 'Google Chrome')" },
568
613
  limit: { type: "integer", description: "Max results (default 20)", default: 20 },
569
614
  offset: { type: "integer", description: "Pagination offset", default: 0 },
615
+ fuzzy_match: { type: "boolean", description: "Enable typo-tolerant matching", default: false },
570
616
  },
571
617
  required: ["q"],
572
618
  },
@@ -596,8 +642,70 @@ const TOOLS = [
596
642
  },
597
643
  },
598
644
  ];
645
+ // ---------------------------------------------------------------------------
646
+ // Enterprise team tools — registered only when a team API token is present.
647
+ // Same endpoint surface as the desktop `screenpipe-team` pi-agent skill:
648
+ // proxy GETs to https://screenpi.pe/api/enterprise/v1/* with Bearer auth.
649
+ //
650
+ // Naming convention: every team tool is `team-*` so it's obvious at a glance
651
+ // which scope (just-me vs the-whole-org) any given call is hitting.
652
+ // ---------------------------------------------------------------------------
653
+ const TEAM_TOOLS = [
654
+ {
655
+ name: "team-search",
656
+ description: "Substring-search across the ENTIRE ORG's telemetry (every enrolled " +
657
+ "device). Use when the question is about the team or another teammate " +
658
+ "(\"what did engineering work on yesterday\", \"did alice touch the auth code\"). " +
659
+ "For your own machine only, use search-content. " +
660
+ "Auth: enterprise admin token (sk_ent_…). " +
661
+ "Defaults: since=now-24h, limit=50. Returns matched records with device + timestamp.",
662
+ annotations: { title: "Team Search", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
663
+ inputSchema: {
664
+ type: "object",
665
+ properties: {
666
+ q: { type: "string", description: "Substring to match (case-insensitive). Empty = all records in window." },
667
+ device_id: { type: "string", description: "Restrict to one device. Get the ID from team-devices." },
668
+ app_name: { type: "string", description: "Restrict to records whose app_name equals this (case-insensitive)." },
669
+ since: { type: "string", description: "ISO 8601 lower bound. Default = now - 24h." },
670
+ until: { type: "string", description: "ISO 8601 upper bound. Default = now." },
671
+ since_hours_ago: { type: "integer", description: "Convenience: equivalent to since=now-N*h." },
672
+ limit: { type: "integer", description: "Max records (default 50, max 200).", default: 50 },
673
+ },
674
+ },
675
+ },
676
+ {
677
+ name: "team-devices",
678
+ description: "List all devices enrolled under this org's license — hostname, OS, " +
679
+ "app version, last-seen timestamp. Use to discover device IDs to pass " +
680
+ "to team-search or team-records, or to spot stale machines.",
681
+ annotations: { title: "Team Devices", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
682
+ inputSchema: { type: "object", properties: {} },
683
+ },
684
+ {
685
+ name: "team-records",
686
+ description: "Chronological raw dump of the org's telemetry for a time window. " +
687
+ "Returns oldest → newest (vs team-search which is recency-ranked). " +
688
+ "Use for ETL or \"walk me through X from Y to Z\" — NOT for question-answering, use team-search for that. " +
689
+ "Auth: enterprise admin token.",
690
+ annotations: { title: "Team Records", readOnlyHint: true, openWorldHint: true, idempotentHint: true },
691
+ inputSchema: {
692
+ type: "object",
693
+ properties: {
694
+ device_id: { type: "string", description: "Restrict to one device (optional)." },
695
+ kind: { type: "string", enum: ["frame", "audio", "all"], description: "Record kind filter. Default: all.", default: "all" },
696
+ since: { type: "string", description: "ISO 8601 lower bound." },
697
+ until: { type: "string", description: "ISO 8601 upper bound." },
698
+ since_hours_ago: { type: "integer", description: "Convenience: equivalent to since=now-N*h." },
699
+ limit: { type: "integer", description: "Max records (default 50, max 200).", default: 50 },
700
+ },
701
+ },
702
+ },
703
+ ];
599
704
  server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
600
- return { tools: TOOLS };
705
+ // Team tools only surface when an enterprise token was discovered at boot.
706
+ // No token = consumer / non-admin user; their MCP looks identical to today.
707
+ const tools = TEAM_TOKEN ? [...TOOLS, ...TEAM_TOOLS] : TOOLS;
708
+ return { tools };
601
709
  });
602
710
  // ---------------------------------------------------------------------------
603
711
  // Resources — dynamic context only (no duplicated reference docs)
@@ -675,7 +783,7 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
675
783
  - **Use max_content_length=500** to keep responses compact
676
784
  - **Don't use q for audio** — transcriptions are noisy, q filters too aggressively. Search audio by time range and speaker instead
677
785
  - **app_name is case-sensitive** — use exact names: "Google Chrome" not "chrome"
678
- - **content_type=accessibility is preferred** for screen text (OS-native). ocr is fallback for apps without accessibility support
786
+ - **Screen text is mostly accessibility-derived, not OCR.** Screenpipe walks the OS accessibility tree first; OCR is only a fallback (terminals, canvas-rendered apps, games). \`content_type=ocr\` returns both paths the result label \`[Screen·a11y]\` vs \`[Screen·ocr]\` tells you which produced the row. Don't pre-filter to a11y/ocr unless you specifically need one or the other
679
787
 
680
788
  ## Common Patterns
681
789
 
@@ -699,18 +807,151 @@ Never fabricate IDs or timestamps — only use values from actual results.
699
807
  throw new Error(`Unknown resource: ${uri}`);
700
808
  });
701
809
  // ---------------------------------------------------------------------------
702
- // Helper
810
+ // Helpers
703
811
  // ---------------------------------------------------------------------------
812
+ // Thrown by fetchAPI / callAPI when the backend is unreachable. Caught in the
813
+ // tool dispatcher to surface an actionable hint ("backend not running")
814
+ // instead of the opaque "fetch failed" the model used to see.
815
+ class BackendDownError extends Error {
816
+ cause;
817
+ constructor(cause) {
818
+ super(`screenpipe backend not running on ${SCREENPIPE_API}. ` +
819
+ `Start it with \`screenpipe\` in a terminal, or open the screenpipe desktop app.`);
820
+ this.cause = cause;
821
+ this.name = "BackendDownError";
822
+ }
823
+ }
824
+ // Thrown when the backend returns a non-2xx. Carries the server's response
825
+ // body so the dispatcher can include it in the user-visible error message.
826
+ class BackendHttpError extends Error {
827
+ status;
828
+ bodyText;
829
+ constructor(status, bodyText, endpoint) {
830
+ let hint = "";
831
+ if (status === 401 || status === 403) {
832
+ hint =
833
+ " — API key not accepted. Set SCREENPIPE_LOCAL_API_KEY in your MCP " +
834
+ "launcher env, or install the screenpipe desktop app so the MCP can " +
835
+ "discover the key automatically.";
836
+ }
837
+ else if (status === 404) {
838
+ hint =
839
+ " — endpoint not found. The backend may be on a different version than this MCP.";
840
+ }
841
+ else if (status === 400) {
842
+ hint = " — bad request. Check argument names and types against the tool schema.";
843
+ }
844
+ else if (status >= 500) {
845
+ hint = " — backend error. Check screenpipe logs.";
846
+ }
847
+ const trimmed = bodyText.trim().slice(0, 300);
848
+ const bodyPart = trimmed ? ` body: ${trimmed}` : "";
849
+ super(`HTTP ${status} from ${endpoint}${hint}${bodyPart}`);
850
+ this.status = status;
851
+ this.bodyText = bodyText;
852
+ this.name = "BackendHttpError";
853
+ }
854
+ }
704
855
  async function fetchAPI(endpoint, options = {}) {
705
856
  const url = `${SCREENPIPE_API}${endpoint}`;
706
- return fetch(url, {
707
- ...options,
708
- headers: {
709
- "Content-Type": "application/json",
710
- ...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
711
- ...options.headers,
712
- },
713
- });
857
+ try {
858
+ return await fetch(url, {
859
+ ...options,
860
+ headers: {
861
+ "Content-Type": "application/json",
862
+ ...(API_KEY ? { Authorization: `Bearer ${API_KEY}` } : {}),
863
+ ...options.headers,
864
+ },
865
+ });
866
+ }
867
+ catch (e) {
868
+ throw new BackendDownError(e);
869
+ }
870
+ }
871
+ // Wrap a fetchAPI call: throw BackendHttpError on non-2xx with body included.
872
+ // Use from handlers instead of `if (!response.ok) throw new Error(...)`.
873
+ async function callAPI(endpoint, options = {}) {
874
+ const response = await fetchAPI(endpoint, options);
875
+ if (!response.ok) {
876
+ let body = "";
877
+ try {
878
+ body = await response.text();
879
+ }
880
+ catch {
881
+ // body may not be readable; that's fine
882
+ }
883
+ throw new BackendHttpError(response.status, body, endpoint);
884
+ }
885
+ return response;
886
+ }
887
+ // Server's deserialize_flexible_datetime accepts ISO 8601 + "Nh ago" / "Nd ago"
888
+ // / "Nw ago" / "now". Models also try "yesterday", "today", and bare dates
889
+ // ("2026-05-17") — normalize those here so the request doesn't 400.
890
+ function normalizeTime(input) {
891
+ if (!input)
892
+ return input;
893
+ const s = input.trim();
894
+ if (!s)
895
+ return input;
896
+ const lower = s.toLowerCase();
897
+ if (lower === "yesterday")
898
+ return "1d ago";
899
+ if (lower === "today") {
900
+ return `${new Date().toISOString().split("T")[0]}T00:00:00Z`;
901
+ }
902
+ if (lower === "tomorrow") {
903
+ const t = new Date();
904
+ t.setUTCDate(t.getUTCDate() + 1);
905
+ return `${t.toISOString().split("T")[0]}T00:00:00Z`;
906
+ }
907
+ // Bare YYYY-MM-DD → start of day UTC
908
+ if (/^\d{4}-\d{2}-\d{2}$/.test(s))
909
+ return `${s}T00:00:00Z`;
910
+ return s;
911
+ }
912
+ // Apply normalizeTime to start_time/end_time fields in an args object.
913
+ // Returns a new object — does not mutate the input.
914
+ function normalizeTimeFields(args) {
915
+ const out = { ...args };
916
+ for (const k of ["start_time", "end_time"]) {
917
+ if (typeof out[k] === "string") {
918
+ out[k] = normalizeTime(out[k]);
919
+ }
920
+ }
921
+ return out;
922
+ }
923
+ // Middle-truncate long strings: keep head + tail, mark the gap with how much
924
+ // was cut. Used to cap OCR/transcription text in search-content responses
925
+ // so a single call doesn't blow past Claude Code's per-tool output limit
926
+ // (one logged call returned 131k chars from a limit:10 search).
927
+ function truncateMiddle(text, max) {
928
+ if (!text)
929
+ return text ?? "";
930
+ if (max <= 0 || text.length <= max)
931
+ return text;
932
+ const halfLeft = Math.floor(max / 2);
933
+ const halfRight = max - halfLeft;
934
+ const cut = text.length - max;
935
+ return (text.slice(0, halfLeft) +
936
+ `…[${cut} chars truncated — pass max_content_length=0 for full text]…` +
937
+ text.slice(text.length - halfRight));
938
+ }
939
+ // Default per-result text cap for search-content when the caller didn't
940
+ // specify one. Tuned to keep limit=10 responses well under tool-output limits
941
+ // while still giving the model enough text to reason over.
942
+ const DEFAULT_SEARCH_CONTENT_TRUNCATE = 1000;
943
+ // Format the screen-text tag for a result. The server's `text_source` is
944
+ // "accessibility" (OS-native tree, primary path) or "ocr" (fallback for
945
+ // terminals, canvas, weak a11y). Older rows have no text_source, so we
946
+ // fall back to a bare `[Screen]`. The result type is historically called
947
+ // OCR in the engine but most captures are accessibility-derived — surface
948
+ // the actual source so the model picks filters correctly.
949
+ function screenTag(textSource) {
950
+ if (textSource === "accessibility")
951
+ return "[Screen·a11y]";
952
+ if (textSource === "ocr")
953
+ return "[Screen·ocr]";
954
+ return "[Screen]";
714
955
  }
715
956
  // ---------------------------------------------------------------------------
716
957
  // Tool handlers
@@ -724,15 +965,22 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
724
965
  switch (name) {
725
966
  case "search-content": {
726
967
  const includeFrames = args.include_frames === true;
968
+ const normalized = normalizeTimeFields(args);
969
+ // Default text cap if the caller didn't pass max_content_length.
970
+ // Keeps single calls under Claude Code's per-tool output limit.
971
+ const userCap = normalized.max_content_length;
972
+ const effectiveCap = typeof userCap === "number"
973
+ ? userCap
974
+ : userCap === undefined
975
+ ? DEFAULT_SEARCH_CONTENT_TRUNCATE
976
+ : Number(userCap);
727
977
  const params = new URLSearchParams();
728
- for (const [key, value] of Object.entries(args)) {
978
+ for (const [key, value] of Object.entries(normalized)) {
729
979
  if (value !== null && value !== undefined) {
730
980
  params.append(key, String(value));
731
981
  }
732
982
  }
733
- const response = await fetchAPI(`/search?${params.toString()}`);
734
- if (!response.ok)
735
- throw new Error(`HTTP error: ${response.status}`);
983
+ const response = await callAPI(`/search?${params.toString()}`);
736
984
  const data = await response.json();
737
985
  const results = data.data || [];
738
986
  const pagination = data.pagination || {};
@@ -755,9 +1003,13 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
755
1003
  continue;
756
1004
  if (result.type === "OCR") {
757
1005
  const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
758
- formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
1006
+ // result.type is "OCR" by historical naming, but content.text_source
1007
+ // tells us if the text actually came from the accessibility tree
1008
+ // (primary path) or OCR (fallback). Use it to label honestly.
1009
+ const tag = screenTag(content.text_source);
1010
+ formattedResults.push(`${tag} ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
759
1011
  `${content.timestamp || ""}\n` +
760
- `${content.text || ""}` +
1012
+ `${truncateMiddle(content.text || "", effectiveCap)}` +
761
1013
  tagsStr);
762
1014
  if (includeFrames && content.frame) {
763
1015
  images.push({
@@ -770,20 +1022,20 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
770
1022
  const tagsStr = content.tags?.length ? `\nTags: ${content.tags.join(", ")}` : "";
771
1023
  formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
772
1024
  `${content.timestamp || ""}\n` +
773
- `${content.transcription || ""}` +
1025
+ `${truncateMiddle(content.transcription || "", effectiveCap)}` +
774
1026
  tagsStr);
775
1027
  }
776
1028
  else if (result.type === "UI" || result.type === "Accessibility") {
777
1029
  formattedResults.push(`[Accessibility] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
778
1030
  `${content.timestamp || ""}\n` +
779
- `${content.text || ""}`);
1031
+ `${truncateMiddle(content.text || "", effectiveCap)}`);
780
1032
  }
781
1033
  else if (result.type === "Memory") {
782
1034
  const tagsStr = content.tags?.length ? ` [${content.tags.join(", ")}]` : "";
783
1035
  const importance = content.importance != null ? ` (importance: ${content.importance})` : "";
784
1036
  formattedResults.push(`[Memory #${content.id}]${tagsStr}${importance}\n` +
785
1037
  `${content.created_at || ""}\n` +
786
- `${content.content || ""}`);
1038
+ `${truncateMiddle(content.content || "", effectiveCap)}`);
787
1039
  }
788
1040
  }
789
1041
  const header = `Results: ${results.length}/${pagination.total || "?"}` +
@@ -801,15 +1053,14 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
801
1053
  return { content: contentItems };
802
1054
  }
803
1055
  case "list-meetings": {
1056
+ const normalized = normalizeTimeFields(args);
804
1057
  const params = new URLSearchParams();
805
- for (const [key, value] of Object.entries(args)) {
1058
+ for (const [key, value] of Object.entries(normalized)) {
806
1059
  if (value !== null && value !== undefined) {
807
1060
  params.append(key, String(value));
808
1061
  }
809
1062
  }
810
- const response = await fetchAPI(`/meetings?${params.toString()}`);
811
- if (!response.ok)
812
- throw new Error(`HTTP error: ${response.status}`);
1063
+ const response = await callAPI(`/meetings?${params.toString()}`);
813
1064
  const meetings = await response.json();
814
1065
  if (!Array.isArray(meetings) || meetings.length === 0) {
815
1066
  return {
@@ -831,15 +1082,14 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
831
1082
  };
832
1083
  }
833
1084
  case "activity-summary": {
1085
+ const normalized = normalizeTimeFields(args);
834
1086
  const params = new URLSearchParams();
835
- for (const [key, value] of Object.entries(args)) {
1087
+ for (const [key, value] of Object.entries(normalized)) {
836
1088
  if (value !== null && value !== undefined) {
837
1089
  params.append(key, String(value));
838
1090
  }
839
1091
  }
840
- const response = await fetchAPI(`/activity-summary?${params.toString()}`);
841
- if (!response.ok)
842
- throw new Error(`HTTP error: ${response.status}`);
1092
+ const response = await callAPI(`/activity-summary?${params.toString()}`);
843
1093
  const data = await response.json();
844
1094
  const appsLines = (data.apps || []).map((a) => {
845
1095
  const timeSpan = a.first_seen && a.last_seen
@@ -880,15 +1130,14 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
880
1130
  return { content: [{ type: "text", text: summary }] };
881
1131
  }
882
1132
  case "search-elements": {
1133
+ const normalized = normalizeTimeFields(args);
883
1134
  const params = new URLSearchParams();
884
- for (const [key, value] of Object.entries(args)) {
1135
+ for (const [key, value] of Object.entries(normalized)) {
885
1136
  if (value !== null && value !== undefined) {
886
1137
  params.append(key, String(value));
887
1138
  }
888
1139
  }
889
- const response = await fetchAPI(`/elements?${params.toString()}`);
890
- if (!response.ok)
891
- throw new Error(`HTTP error: ${response.status}`);
1140
+ const response = await callAPI(`/elements?${params.toString()}`);
892
1141
  const data = await response.json();
893
1142
  const elements = data.data || [];
894
1143
  const pagination = data.pagination || {};
@@ -921,9 +1170,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
921
1170
  if (!frameId) {
922
1171
  return { content: [{ type: "text", text: "Error: frame_id is required" }] };
923
1172
  }
924
- const response = await fetchAPI(`/frames/${frameId}/context`);
925
- if (!response.ok)
926
- throw new Error(`HTTP error: ${response.status}`);
1173
+ const response = await callAPI(`/frames/${frameId}/context`);
927
1174
  const data = await response.json();
928
1175
  const lines = [`Frame ${data.frame_id} (source: ${data.text_source})`];
929
1176
  if (data.urls?.length) {
@@ -946,8 +1193,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
946
1193
  return { content: [{ type: "text", text: lines.join("\n") }] };
947
1194
  }
948
1195
  case "export-video": {
949
- const startTime = args.start_time;
950
- const endTime = args.end_time;
1196
+ const startTime = normalizeTime(args.start_time);
1197
+ const endTime = normalizeTime(args.end_time);
951
1198
  const fps = args.fps || 1.0;
952
1199
  if (!startTime || !endTime) {
953
1200
  return {
@@ -961,10 +1208,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
961
1208
  end_time: endTime,
962
1209
  limit: "10000",
963
1210
  });
964
- const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
965
- if (!searchResponse.ok) {
966
- throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
967
- }
1211
+ const searchResponse = await callAPI(`/search?${searchParams.toString()}`);
968
1212
  const searchData = await searchResponse.json();
969
1213
  const results = searchData.data || [];
970
1214
  if (results.length === 0) {
@@ -1067,9 +1311,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1067
1311
  }
1068
1312
  case "update-memory": {
1069
1313
  if (args.delete && args.id) {
1070
- const response = await fetchAPI(`/memories/${args.id}`, { method: "DELETE" });
1071
- if (!response.ok)
1072
- throw new Error(`HTTP error: ${response.status}`);
1314
+ const response = await callAPI(`/memories/${args.id}`, { method: "DELETE" });
1073
1315
  return { content: [{ type: "text", text: `Memory ${args.id} deleted.` }] };
1074
1316
  }
1075
1317
  if (args.id) {
@@ -1082,12 +1324,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1082
1324
  body.importance = args.importance;
1083
1325
  if (args.source_context !== undefined)
1084
1326
  body.source_context = args.source_context;
1085
- const response = await fetchAPI(`/memories/${args.id}`, {
1327
+ const response = await callAPI(`/memories/${args.id}`, {
1086
1328
  method: "PUT",
1087
1329
  body: JSON.stringify(body),
1088
1330
  });
1089
- if (!response.ok)
1090
- throw new Error(`HTTP error: ${response.status}`);
1091
1331
  const memory = await response.json();
1092
1332
  return {
1093
1333
  content: [{ type: "text", text: `Memory ${memory.id} updated: "${memory.content}"` }],
@@ -1106,12 +1346,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1106
1346
  };
1107
1347
  if (args.source_context)
1108
1348
  memoryBody.source_context = args.source_context;
1109
- const memoryResponse = await fetchAPI("/memories", {
1349
+ const memoryResponse = await callAPI("/memories", {
1110
1350
  method: "POST",
1111
1351
  body: JSON.stringify(memoryBody),
1112
1352
  });
1113
- if (!memoryResponse.ok)
1114
- throw new Error(`HTTP error: ${memoryResponse.status}`);
1115
1353
  const newMemory = await memoryResponse.json();
1116
1354
  return {
1117
1355
  content: [
@@ -1129,31 +1367,42 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1129
1367
  notifBody.timeout = Number(args.timeout_secs) * 1000;
1130
1368
  if (args.actions)
1131
1369
  notifBody.actions = args.actions;
1132
- const notifResponse = await fetch("http://localhost:11435/notify", {
1133
- method: "POST",
1134
- headers: { "Content-Type": "application/json" },
1135
- body: JSON.stringify(notifBody),
1136
- });
1137
- if (!notifResponse.ok)
1138
- throw new Error(`HTTP error: ${notifResponse.status}`);
1370
+ // send-notification hits the desktop notify daemon on a separate port
1371
+ // (11435), not the screenpipe API. Keep direct fetch with friendlier
1372
+ // error so the model sees an actionable message if the daemon's down.
1373
+ let notifResponse;
1374
+ try {
1375
+ notifResponse = await fetch("http://localhost:11435/notify", {
1376
+ method: "POST",
1377
+ headers: { "Content-Type": "application/json" },
1378
+ body: JSON.stringify(notifBody),
1379
+ });
1380
+ }
1381
+ catch (e) {
1382
+ throw new Error("notification daemon not reachable on localhost:11435 — is the screenpipe desktop app running?");
1383
+ }
1384
+ if (!notifResponse.ok) {
1385
+ let body = "";
1386
+ try {
1387
+ body = await notifResponse.text();
1388
+ }
1389
+ catch { }
1390
+ throw new Error(`notify daemon HTTP ${notifResponse.status}${body ? `: ${body.slice(0, 200)}` : ""}`);
1391
+ }
1139
1392
  const notifResult = await notifResponse.json();
1140
1393
  return {
1141
1394
  content: [{ type: "text", text: `Notification sent: ${notifResult.message}` }],
1142
1395
  };
1143
1396
  }
1144
1397
  case "health-check": {
1145
- const response = await fetchAPI("/health");
1146
- if (!response.ok)
1147
- throw new Error(`HTTP error: ${response.status}`);
1398
+ const response = await callAPI("/health");
1148
1399
  const data = await response.json();
1149
1400
  return {
1150
1401
  content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
1151
1402
  };
1152
1403
  }
1153
1404
  case "list-audio-devices": {
1154
- const response = await fetchAPI("/audio/list");
1155
- if (!response.ok)
1156
- throw new Error(`HTTP error: ${response.status}`);
1405
+ const response = await callAPI("/audio/list");
1157
1406
  const devices = await response.json();
1158
1407
  if (!Array.isArray(devices) || devices.length === 0) {
1159
1408
  return { content: [{ type: "text", text: "No audio devices found." }] };
@@ -1164,9 +1413,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1164
1413
  };
1165
1414
  }
1166
1415
  case "list-monitors": {
1167
- const response = await fetchAPI("/vision/list");
1168
- if (!response.ok)
1169
- throw new Error(`HTTP error: ${response.status}`);
1416
+ const response = await callAPI("/vision/list");
1170
1417
  const monitors = await response.json();
1171
1418
  if (!Array.isArray(monitors) || monitors.length === 0) {
1172
1419
  return { content: [{ type: "text", text: "No monitors found." }] };
@@ -1183,12 +1430,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1183
1430
  if (!contentType || !id || !tags) {
1184
1431
  return { content: [{ type: "text", text: "Error: content_type, id, and tags are required" }] };
1185
1432
  }
1186
- const response = await fetchAPI(`/tags/${contentType}/${id}`, {
1433
+ const response = await callAPI(`/tags/${contentType}/${id}`, {
1187
1434
  method: "POST",
1188
1435
  body: JSON.stringify({ tags }),
1189
1436
  });
1190
- if (!response.ok)
1191
- throw new Error(`HTTP error: ${response.status}`);
1192
1437
  return {
1193
1438
  content: [{ type: "text", text: `Tags added to ${contentType}/${id}: ${tags.join(", ")}` }],
1194
1439
  };
@@ -1198,9 +1443,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1198
1443
  if (!nameQuery) {
1199
1444
  return { content: [{ type: "text", text: "Error: name is required" }] };
1200
1445
  }
1201
- const response = await fetchAPI(`/speakers/search?name=${encodeURIComponent(nameQuery)}`);
1202
- if (!response.ok)
1203
- throw new Error(`HTTP error: ${response.status}`);
1446
+ const response = await callAPI(`/speakers/search?name=${encodeURIComponent(nameQuery)}`);
1204
1447
  const speakers = await response.json();
1205
1448
  if (!Array.isArray(speakers) || speakers.length === 0) {
1206
1449
  return { content: [{ type: "text", text: "No speakers found." }] };
@@ -1213,9 +1456,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1213
1456
  case "list-unnamed-speakers": {
1214
1457
  const limit = args.limit || 10;
1215
1458
  const offset = args.offset || 0;
1216
- const response = await fetchAPI(`/speakers/unnamed?limit=${limit}&offset=${offset}`);
1217
- if (!response.ok)
1218
- throw new Error(`HTTP error: ${response.status}`);
1459
+ const response = await callAPI(`/speakers/unnamed?limit=${limit}&offset=${offset}`);
1219
1460
  const speakers = await response.json();
1220
1461
  if (!Array.isArray(speakers) || speakers.length === 0) {
1221
1462
  return { content: [{ type: "text", text: "No unnamed speakers found." }] };
@@ -1235,12 +1476,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1235
1476
  body.name = args.name;
1236
1477
  if (args.metadata !== undefined)
1237
1478
  body.metadata = args.metadata;
1238
- const response = await fetchAPI("/speakers/update", {
1479
+ const response = await callAPI("/speakers/update", {
1239
1480
  method: "POST",
1240
1481
  body: JSON.stringify(body),
1241
1482
  });
1242
- if (!response.ok)
1243
- throw new Error(`HTTP error: ${response.status}`);
1244
1483
  return {
1245
1484
  content: [{ type: "text", text: `Speaker ${speakerId} updated.` }],
1246
1485
  };
@@ -1251,12 +1490,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1251
1490
  if (!keepId || !mergeId) {
1252
1491
  return { content: [{ type: "text", text: "Error: speaker_to_keep_id and speaker_to_merge_id are required" }] };
1253
1492
  }
1254
- const response = await fetchAPI("/speakers/merge", {
1493
+ const response = await callAPI("/speakers/merge", {
1255
1494
  method: "POST",
1256
1495
  body: JSON.stringify({ speaker_to_keep_id: keepId, speaker_to_merge_id: mergeId }),
1257
1496
  });
1258
- if (!response.ok)
1259
- throw new Error(`HTTP error: ${response.status}`);
1260
1497
  return {
1261
1498
  content: [{ type: "text", text: `Merged speaker ${mergeId} into ${keepId}.` }],
1262
1499
  };
@@ -1269,21 +1506,17 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1269
1506
  body.title = args.title;
1270
1507
  if (args.attendees)
1271
1508
  body.attendees = args.attendees;
1272
- const response = await fetchAPI("/meetings/start", {
1509
+ const response = await callAPI("/meetings/start", {
1273
1510
  method: "POST",
1274
1511
  body: JSON.stringify(body),
1275
1512
  });
1276
- if (!response.ok)
1277
- throw new Error(`HTTP error: ${response.status}`);
1278
1513
  const meeting = await response.json();
1279
1514
  return {
1280
1515
  content: [{ type: "text", text: `Meeting started (id: ${meeting.id || "ok"}).` }],
1281
1516
  };
1282
1517
  }
1283
1518
  case "stop-meeting": {
1284
- const response = await fetchAPI("/meetings/stop", { method: "POST" });
1285
- if (!response.ok)
1286
- throw new Error(`HTTP error: ${response.status}`);
1519
+ const response = await callAPI("/meetings/stop", { method: "POST" });
1287
1520
  return {
1288
1521
  content: [{ type: "text", text: "Meeting stopped." }],
1289
1522
  };
@@ -1293,9 +1526,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1293
1526
  if (!meetingId) {
1294
1527
  return { content: [{ type: "text", text: "Error: id is required" }] };
1295
1528
  }
1296
- const response = await fetchAPI(`/meetings/${meetingId}`);
1297
- if (!response.ok)
1298
- throw new Error(`HTTP error: ${response.status}`);
1529
+ const response = await callAPI(`/meetings/${meetingId}`);
1299
1530
  const meeting = await response.json();
1300
1531
  return {
1301
1532
  content: [{ type: "text", text: JSON.stringify(meeting, null, 2) }],
@@ -1322,36 +1553,68 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1322
1553
  ],
1323
1554
  };
1324
1555
  }
1325
- const response = await fetchAPI(`/meetings/${meetingId}`, {
1556
+ const response = await callAPI(`/meetings/${meetingId}`, {
1326
1557
  method: "PATCH",
1327
1558
  headers: { "Content-Type": "application/json" },
1328
1559
  body: JSON.stringify(body),
1329
1560
  });
1330
- if (!response.ok)
1331
- throw new Error(`HTTP error: ${response.status}`);
1332
1561
  const updated = await response.json();
1333
1562
  return {
1334
1563
  content: [{ type: "text", text: JSON.stringify(updated, null, 2) }],
1335
1564
  };
1336
1565
  }
1337
1566
  case "keyword-search": {
1338
- const params = new URLSearchParams();
1339
- for (const [key, value] of Object.entries(args)) {
1340
- if (value !== null && value !== undefined) {
1341
- params.append(key, String(value));
1342
- }
1567
+ // Translate model-facing arg names to what the engine actually
1568
+ // accepts (KeywordSearchRequest in routes/search.rs):
1569
+ // q -> query (mandatory; the field is literally named `query`)
1570
+ // app_name -> app_names (comma-separated; serde splits it)
1571
+ // content_type: dropped — the keyword endpoint doesn't filter by type.
1572
+ // It searches OCR + audio together via the FTS index.
1573
+ // Without these mappings every keyword-search request 400s (and used
1574
+ // to: in logs, 25/25 calls failed before this fix).
1575
+ const queryStr = args.query ?? args.q;
1576
+ if (!queryStr) {
1577
+ return {
1578
+ content: [{ type: "text", text: "Error: 'q' (search query) is required" }],
1579
+ };
1343
1580
  }
1344
- const response = await fetchAPI(`/search/keyword?${params.toString()}`);
1345
- if (!response.ok)
1346
- throw new Error(`HTTP error: ${response.status}`);
1581
+ const normalized = normalizeTimeFields(args);
1582
+ const params = new URLSearchParams();
1583
+ params.append("query", queryStr);
1584
+ if (normalized.start_time)
1585
+ params.append("start_time", String(normalized.start_time));
1586
+ if (normalized.end_time)
1587
+ params.append("end_time", String(normalized.end_time));
1588
+ if (normalized.limit !== undefined)
1589
+ params.append("limit", String(normalized.limit));
1590
+ if (normalized.offset !== undefined)
1591
+ params.append("offset", String(normalized.offset));
1592
+ if (normalized.app_name)
1593
+ params.append("app_names", String(normalized.app_name));
1594
+ if (normalized.app_names)
1595
+ params.append("app_names", String(normalized.app_names));
1596
+ if (args.fuzzy_match !== undefined)
1597
+ params.append("fuzzy_match", String(args.fuzzy_match));
1598
+ const response = await callAPI(`/search/keyword?${params.toString()}`);
1347
1599
  const data = await response.json();
1348
- const results = data.data || [];
1600
+ // /search/keyword returns a bare array (Vec<KeywordSearchMatch> from
1601
+ // routes/search.rs), not the {data, pagination} shape /search uses.
1602
+ // The old `data.data || []` always lost results.
1603
+ const results = Array.isArray(data)
1604
+ ? data
1605
+ : (data.data ?? []);
1349
1606
  if (results.length === 0) {
1350
1607
  return { content: [{ type: "text", text: "No keyword search results found." }] };
1351
1608
  }
1352
1609
  const formatted = results.map((r) => {
1353
- const content = r.content;
1354
- return `[${r.type}] ${content?.app_name || "?"} | ${content?.timestamp || ""}\n${content?.text || content?.transcription || ""}`;
1610
+ // Flat shape from search_with_text_positions: { app_name, frame_id,
1611
+ // timestamp, text, text_source, ... }. Truncate to keep responses
1612
+ // under tool-output limits. text_source is "accessibility" (primary)
1613
+ // or "ocr" (fallback) — show it so the model knows which path hit.
1614
+ const text = r.text || r.transcription || "";
1615
+ const tag = screenTag(r.text_source);
1616
+ return (`${tag} [frame:${r.frame_id ?? "?"}] ${r.app_name ?? "?"} | ${r.timestamp ?? ""}\n` +
1617
+ truncateMiddle(text, DEFAULT_SEARCH_CONTENT_TRUNCATE));
1355
1618
  });
1356
1619
  return {
1357
1620
  content: [{ type: "text", text: `Results: ${results.length}\n\n${formatted.join("\n---\n")}` }],
@@ -1362,9 +1625,7 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1362
1625
  if (!frameId) {
1363
1626
  return { content: [{ type: "text", text: "Error: frame_id is required" }] };
1364
1627
  }
1365
- const response = await fetchAPI(`/frames/${frameId}/elements`);
1366
- if (!response.ok)
1367
- throw new Error(`HTTP error: ${response.status}`);
1628
+ const response = await callAPI(`/frames/${frameId}/elements`);
1368
1629
  const elements = await response.json();
1369
1630
  if (!Array.isArray(elements) || elements.length === 0) {
1370
1631
  return { content: [{ type: "text", text: `No elements found for frame ${frameId}.` }] };
@@ -1390,20 +1651,64 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
1390
1651
  else {
1391
1652
  return { content: [{ type: "text", text: `Error: unknown action '${action}'` }] };
1392
1653
  }
1393
- const response = await fetchAPI(endpoint, { method: "POST" });
1394
- if (!response.ok)
1395
- throw new Error(`HTTP error: ${response.status}`);
1654
+ await callAPI(endpoint, { method: "POST" });
1396
1655
  return {
1397
1656
  content: [{ type: "text", text: `Recording action '${action}' executed.` }],
1398
1657
  };
1399
1658
  }
1659
+ // ---------------------------------------------------------------------
1660
+ // Enterprise team tools — only callable when TEAM_TOKEN is set at boot.
1661
+ // If we got this far without one, the tool wasn't in the listed set the
1662
+ // host saw, but a misbehaving client could still try to call it. Fail
1663
+ // loudly so the host surfaces the misconfiguration.
1664
+ // ---------------------------------------------------------------------
1665
+ case "team-search":
1666
+ case "team-devices":
1667
+ case "team-records": {
1668
+ if (!TEAM_TOKEN) {
1669
+ return {
1670
+ content: [
1671
+ {
1672
+ type: "text",
1673
+ text: `team-* tools require an enterprise admin token. Set ` +
1674
+ `SCREENPIPE_ENTERPRISE_TOKEN in your MCP env, or mint one ` +
1675
+ `at https://screenpi.pe/enterprise → API Tokens and paste ` +
1676
+ `it into Settings → Privacy → Admin Team API Token in the ` +
1677
+ `screenpipe desktop app.`,
1678
+ },
1679
+ ],
1680
+ };
1681
+ }
1682
+ // Map MCP tool name → /api/enterprise/v1 path
1683
+ const subpath = name === "team-search" ? "/search"
1684
+ : name === "team-devices" ? "/devices"
1685
+ : "/records";
1686
+ // Forward every primitive arg as a query param. The server validates;
1687
+ // unknown params are ignored, so we don't need to gatekeep here.
1688
+ const params = new URLSearchParams();
1689
+ for (const [k, v] of Object.entries(args)) {
1690
+ if (v !== null && v !== undefined && v !== "") {
1691
+ params.append(k, String(v));
1692
+ }
1693
+ }
1694
+ const query = params.toString();
1695
+ const response = await fetchTeam(`${subpath}${query ? `?${query}` : ""}`);
1696
+ const body = await response.text();
1697
+ if (!response.ok) {
1698
+ throw new Error(`${name} failed: HTTP ${response.status} ${response.statusText} — ${body.slice(0, 300)}`);
1699
+ }
1700
+ return { content: [{ type: "text", text: body }] };
1701
+ }
1400
1702
  default:
1401
1703
  throw new Error(`Unknown tool: ${name}`);
1402
1704
  }
1403
1705
  }
1404
1706
  catch (error) {
1405
1707
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
1708
+ // isError flags the result as a failure so the model retries with a
1709
+ // different approach instead of treating the error text as data.
1406
1710
  return {
1711
+ isError: true,
1407
1712
  content: [{ type: "text", text: `Error executing ${name}: ${errorMessage}` }],
1408
1713
  };
1409
1714
  }