screenpipe-mcp 0.18.9 → 0.18.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -12,7 +12,6 @@ import {
12
12
  ReadResourceRequestSchema,
13
13
  Tool,
14
14
  } from "@modelcontextprotocol/sdk/types.js";
15
- import { WebSocket } from "ws";
16
15
  import * as fs from "fs";
17
16
  import * as path from "path";
18
17
  import * as os from "os";
@@ -303,6 +302,11 @@ const TOOLS: Tool[] = [
303
302
  },
304
303
  speaker_ids: { type: "string", description: "Comma-separated speaker IDs to filter audio" },
305
304
  speaker_name: { type: "string", description: "Filter audio by speaker name (case-insensitive partial match)" },
305
+ tags: {
306
+ type: "string",
307
+ description:
308
+ "Comma-separated tags; returns only items carrying ALL of them (e.g. 'person:ada,project:atlas'). Works for screen + audio (content_type 'ocr'/'audio'/'all', tags written by add-tags) AND memories (content_type 'memory', tags written by update-memory). Same tag string links across all three, so two items sharing a tag are connected. Use namespaced tags (person:, project:, topic:) to link people/projects/topics. content_type 'input' and 'accessibility' have no tags and return nothing when this is set.",
309
+ },
306
310
  max_content_length: {
307
311
  type: "integer",
308
312
  description: "Truncate each result's text via middle-truncation. Use 200-500 to keep responses compact.",
@@ -389,15 +393,22 @@ const TOOLS: Tool[] = [
389
393
  {
390
394
  name: "export-video",
391
395
  description:
392
- "Export an MP4 video of screen recordings for a time range. " +
393
- "Returns the file path. Can take a few minutes for long ranges.",
396
+ "Export an MP4 of screen recordings for a time range, with synced microphone audio. " +
397
+ "Frames are placed at their real timestamps, so the clip's duration matches the " +
398
+ "wall-clock span you requested (not a sped-up timelapse). Returns the file path. " +
399
+ "Can take a few minutes for long ranges.",
394
400
  annotations: { title: "Export Video", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
395
401
  inputSchema: {
396
402
  type: "object",
397
403
  properties: {
398
- start_time: { type: "string", description: "ISO 8601 UTC or relative" },
399
- end_time: { type: "string", description: "ISO 8601 UTC or relative" },
400
- fps: { type: "number", description: "Output FPS (default 1.0). Higher = smoother but larger file.", default: 1.0 },
404
+ start_time: { type: "string", description: 'ISO 8601 UTC or relative (e.g. "5m ago", "now")' },
405
+ end_time: { type: "string", description: 'ISO 8601 UTC or relative (e.g. "5m ago", "now")' },
406
+ output_path: {
407
+ type: "string",
408
+ description:
409
+ "Optional absolute path for the MP4 (e.g. ~/Downloads/clip.mp4). " +
410
+ "Defaults to the screenpipe data dir's exports/ folder.",
411
+ },
401
412
  },
402
413
  required: ["start_time", "end_time"],
403
414
  },
@@ -414,7 +425,7 @@ const TOOLS: Tool[] = [
414
425
  properties: {
415
426
  id: { type: "integer", description: "Memory ID — omit to create new, provide to update/delete" },
416
427
  content: { type: "string", description: "Memory text (required for creation)" },
417
- tags: { type: "array", items: { type: "string" }, description: "Categorization tags (e.g. ['work', 'project-x'])" },
428
+ tags: { type: "array", items: { type: "string" }, description: "Tags. Prefer namespaced (person:ada, project:atlas, topic:pricing) so this memory links to the same people/projects you tag on frames/audio. Retrieve with search-content content_type='memory' tags='person:ada'." },
418
429
  importance: { type: "number", description: "0.0 (trivial) to 1.0 (critical). Default 0.5." },
419
430
  source_context: { type: "object", description: "Optional metadata linking to source (app, timestamp, etc.)" },
420
431
  delete: { type: "boolean", description: "Set true to delete the memory identified by id" },
@@ -477,14 +488,18 @@ const TOOLS: Tool[] = [
477
488
  {
478
489
  name: "add-tags",
479
490
  description:
480
- "Add tags to a content item (vision frame or audio chunk) for organization and retrieval.",
491
+ "Tag a screen frame (vision) or audio chunk (audio) so it can be retrieved later. " +
492
+ "Tags are a shared linking layer: use namespaced tags (person:ada, project:atlas, topic:pricing) to connect a capture to a person, project, or topic. " +
493
+ "The SAME tag string also works on memories (via update-memory), so tagging a frame and a memory with person:ada links them. " +
494
+ "Retrieve later with search-content tags='person:ada' (add content_type+start_time/end_time to scope to a timeframe). " +
495
+ "Note: frames are pruned by retention, so for durable links prefer tagging a memory; tag frames/audio for shorter-term recall.",
481
496
  annotations: { title: "Add Tags", readOnlyHint: false, destructiveHint: false, openWorldHint: false },
482
497
  inputSchema: {
483
498
  type: "object",
484
499
  properties: {
485
- content_type: { type: "string", enum: ["vision", "audio"], description: "Type of content to tag" },
486
- id: { type: "integer", description: "Content item ID" },
487
- tags: { type: "array", items: { type: "string" }, description: "Tags to add" },
500
+ content_type: { type: "string", enum: ["vision", "audio"], description: "vision = screen frame, audio = audio chunk. Get the id from search-content results (frame_id / chunk_id)." },
501
+ id: { type: "integer", description: "Content item ID (OCR result frame_id, or audio result chunk_id)" },
502
+ tags: { type: "array", items: { type: "string" }, description: "Tags to add. Prefer namespaced: person:<name>, project:<name>, topic:<name>." },
488
503
  },
489
504
  required: ["content_type", "id", "tags"],
490
505
  },
@@ -1333,7 +1348,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1333
1348
  case "export-video": {
1334
1349
  const startTime = normalizeTime(args.start_time as string);
1335
1350
  const endTime = normalizeTime(args.end_time as string);
1336
- const fps = (args.fps as number) || 1.0;
1337
1351
 
1338
1352
  if (!startTime || !endTime) {
1339
1353
  return {
@@ -1341,128 +1355,51 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1341
1355
  };
1342
1356
  }
1343
1357
 
1344
- // Get frame IDs for the time range
1345
- const searchParams = new URLSearchParams({
1346
- content_type: "ocr",
1347
- start_time: startTime,
1348
- end_time: endTime,
1349
- limit: "10000",
1350
- });
1351
-
1352
- const searchResponse = await callAPI(`/search?${searchParams.toString()}`);
1353
- const searchData = await searchResponse.json();
1354
- const results = searchData.data || [];
1355
-
1356
- if (results.length === 0) {
1358
+ // A real-time MP4 with synced microphone audio, rendered server-side by the
1359
+ // engine export core (the `screenpipe export` CLI's HTTP twin). MCP runs on the
1360
+ // same host as the backend, so the returned path is a local file. Frames sit at
1361
+ // their real timestamps, so the clip duration matches the wall-clock span.
1362
+ try {
1363
+ const body: Record<string, unknown> = { start: startTime, end: endTime };
1364
+ if (typeof args.output_path === "string" && args.output_path.trim()) {
1365
+ body.output_path = args.output_path;
1366
+ }
1367
+ const response = await callAPI("/export", {
1368
+ method: "POST",
1369
+ body: JSON.stringify(body),
1370
+ });
1371
+ const data = (await response.json()) as {
1372
+ output_path: string;
1373
+ frame_count: number;
1374
+ audio_chunk_count: number;
1375
+ duration_secs: number;
1376
+ file_size_bytes: number;
1377
+ };
1378
+ const sizeMb = data.file_size_bytes
1379
+ ? (data.file_size_bytes / (1024 * 1024)).toFixed(1)
1380
+ : null;
1357
1381
  return {
1358
1382
  content: [
1359
1383
  {
1360
1384
  type: "text",
1361
- text: `No screen recordings found between ${startTime} and ${endTime}.`,
1385
+ text:
1386
+ `Video exported (with audio): ${data.output_path}\n` +
1387
+ `${data.frame_count ?? 0} frames | ${data.audio_chunk_count ?? 0} audio chunks` +
1388
+ (sizeMb ? ` | ${sizeMb} MB` : "") +
1389
+ (data.duration_secs ? ` | ${data.duration_secs}s` : "") +
1390
+ ` | ${startTime} → ${endTime}`,
1362
1391
  },
1363
1392
  ],
1364
1393
  };
1365
- }
1366
-
1367
- const frameIds: number[] = [];
1368
- const seenIds = new Set<number>();
1369
- for (const result of results) {
1370
- if (result.type === "OCR" && result.content?.frame_id) {
1371
- const frameId = result.content.frame_id;
1372
- if (!seenIds.has(frameId)) {
1373
- seenIds.add(frameId);
1374
- frameIds.push(frameId);
1375
- }
1376
- }
1377
- }
1378
-
1379
- if (frameIds.length === 0) {
1380
- return {
1381
- content: [{ type: "text", text: "No valid frame IDs found (audio-only?)." }],
1382
- };
1383
- }
1384
-
1385
- frameIds.sort((a, b) => a - b);
1386
-
1387
- const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
1388
-
1389
- const exportResult = await new Promise<{
1390
- success: boolean;
1391
- filePath?: string;
1392
- error?: string;
1393
- frameCount?: number;
1394
- }>((resolve) => {
1395
- const ws = new WebSocket(wsUrl);
1396
- let resolved = false;
1397
-
1398
- const timeout = setTimeout(() => {
1399
- if (!resolved) {
1400
- resolved = true;
1401
- ws.close();
1402
- resolve({ success: false, error: "Export timed out after 5 minutes" });
1403
- }
1404
- }, 5 * 60 * 1000);
1405
-
1406
- ws.on("open", () => {
1407
- ws.send(JSON.stringify({ frame_ids: frameIds }));
1408
- });
1409
-
1410
- ws.on("error", (error) => {
1411
- if (!resolved) {
1412
- resolved = true;
1413
- clearTimeout(timeout);
1414
- resolve({ success: false, error: `WebSocket error: ${error.message}` });
1415
- }
1416
- });
1417
-
1418
- ws.on("close", () => {
1419
- if (!resolved) {
1420
- resolved = true;
1421
- clearTimeout(timeout);
1422
- resolve({ success: false, error: "Connection closed unexpectedly" });
1423
- }
1424
- });
1425
-
1426
- ws.on("message", (data) => {
1427
- try {
1428
- const message = JSON.parse(data.toString());
1429
- if (message.status === "completed" && message.video_data) {
1430
- const tempDir = os.tmpdir();
1431
- const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
1432
- const filename = `screenpipe_export_${timestamp}.mp4`;
1433
- const filePath = path.join(tempDir, filename);
1434
- fs.writeFileSync(filePath, Buffer.from(message.video_data));
1435
- resolved = true;
1436
- clearTimeout(timeout);
1437
- ws.close();
1438
- resolve({ success: true, filePath, frameCount: frameIds.length });
1439
- } else if (message.status === "error") {
1440
- resolved = true;
1441
- clearTimeout(timeout);
1442
- ws.close();
1443
- resolve({ success: false, error: message.error || "Export failed" });
1444
- }
1445
- } catch {
1446
- // Ignore parse errors for progress messages
1447
- }
1448
- });
1449
- });
1450
-
1451
- if (exportResult.success && exportResult.filePath) {
1394
+ } catch (err) {
1452
1395
  return {
1453
1396
  content: [
1454
1397
  {
1455
1398
  type: "text",
1456
- text:
1457
- `Video exported: ${exportResult.filePath}\n` +
1458
- `Frames: ${exportResult.frameCount} | ${startTime} → ${endTime} | ${fps} fps`,
1399
+ text: `Export failed: ${err instanceof Error ? err.message : String(err)}`,
1459
1400
  },
1460
1401
  ],
1461
1402
  };
1462
- } else {
1463
- return {
1464
- content: [{ type: "text", text: `Export failed: ${exportResult.error}` }],
1465
- };
1466
1403
  }
1467
1404
  }
1468
1405