screenpipe-mcp 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,9 +1,59 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
+ if (k2 === undefined) k2 = k;
5
+ var desc = Object.getOwnPropertyDescriptor(m, k);
6
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
+ desc = { enumerable: true, get: function() { return m[k]; } };
8
+ }
9
+ Object.defineProperty(o, k2, desc);
10
+ }) : (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ o[k2] = m[k];
13
+ }));
14
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
16
+ }) : function(o, v) {
17
+ o["default"] = v;
18
+ });
19
+ var __importStar = (this && this.__importStar) || (function () {
20
+ var ownKeys = function(o) {
21
+ ownKeys = Object.getOwnPropertyNames || function (o) {
22
+ var ar = [];
23
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
24
+ return ar;
25
+ };
26
+ return ownKeys(o);
27
+ };
28
+ return function (mod) {
29
+ if (mod && mod.__esModule) return mod;
30
+ var result = {};
31
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
32
+ __setModuleDefault(result, mod);
33
+ return result;
34
+ };
35
+ })();
3
36
  Object.defineProperty(exports, "__esModule", { value: true });
4
37
  const index_js_1 = require("@modelcontextprotocol/sdk/server/index.js");
5
38
  const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
6
39
  const types_js_1 = require("@modelcontextprotocol/sdk/types.js");
40
+ const ws_1 = require("ws");
41
+ const fs = __importStar(require("fs"));
42
+ const path = __importStar(require("path"));
43
+ const os = __importStar(require("os"));
44
+ // Helper to get current date in ISO format
45
+ function getCurrentDateInfo() {
46
+ const now = new Date();
47
+ return {
48
+ isoDate: now.toISOString(),
49
+ localDate: now.toLocaleDateString("en-US", {
50
+ weekday: "long",
51
+ year: "numeric",
52
+ month: "long",
53
+ day: "numeric",
54
+ }),
55
+ };
56
+ }
7
57
  // Detect OS
8
58
  const CURRENT_OS = process.platform;
9
59
  const IS_MACOS = CURRENT_OS === "darwin";
@@ -21,60 +71,66 @@ const SCREENPIPE_API = `http://localhost:${port}`;
21
71
  // Initialize server
22
72
  const server = new index_js_1.Server({
23
73
  name: "screenpipe",
24
- version: "0.3.1",
74
+ version: "0.4.0",
25
75
  }, {
26
76
  capabilities: {
27
77
  tools: {},
78
+ prompts: {},
79
+ resources: {},
28
80
  },
29
81
  });
30
82
  // Tool definitions
31
83
  const BASE_TOOLS = [
32
84
  {
33
85
  name: "search-content",
34
- description: "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
35
- "Use this to find specific content that has appeared on your screen or been spoken. " +
36
- "Results include timestamps, app context, and the content itself. " +
37
- "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
86
+ description: "Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
87
+ "Returns timestamped results with app context. " +
88
+ "Call with no parameters to get recent activity. " +
89
+ "Use the 'screenpipe://context' resource for current time when building time-based queries.",
90
+ annotations: {
91
+ title: "Search Content",
92
+ readOnlyHint: true,
93
+ },
38
94
  inputSchema: {
39
95
  type: "object",
40
96
  properties: {
41
97
  q: {
42
98
  type: "string",
43
- description: "Search query to find in recorded content",
99
+ description: "Search query. Optional - omit to return all recent content.",
44
100
  },
45
101
  content_type: {
46
102
  type: "string",
47
103
  enum: ["all", "ocr", "audio", "ui"],
48
- description: "Type of content to search: 'ocr' for screen text, 'audio' for spoken words, 'ui' for UI elements, or 'all' for everything",
104
+ description: "Content type filter. Default: 'all'",
49
105
  default: "all",
50
106
  },
51
107
  limit: {
52
108
  type: "integer",
53
- description: "Maximum number of results to return",
109
+ description: "Max results. Default: 10",
54
110
  default: 10,
55
111
  },
56
112
  offset: {
57
113
  type: "integer",
58
- description: "Number of results to skip (for pagination)",
114
+ description: "Skip N results for pagination. Default: 0",
59
115
  default: 0,
60
116
  },
61
117
  start_time: {
62
118
  type: "string",
63
119
  format: "date-time",
64
- description: "Start time in ISO format UTC (e.g. 2024-01-01T00:00:00Z). Filter results from this time onward.",
120
+ description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
65
121
  },
66
122
  end_time: {
67
123
  type: "string",
68
124
  format: "date-time",
69
- description: "End time in ISO format UTC (e.g. 2024-01-01T00:00:00Z). Filter results up to this time.",
125
+ description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
70
126
  },
71
127
  app_name: {
72
128
  type: "string",
73
- description: "Filter by application name (e.g. 'Chrome', 'Safari', 'Terminal')",
129
+ description: "Filter by app (e.g., 'Google Chrome', 'Slack', 'zoom.us')",
74
130
  },
75
131
  window_name: {
76
132
  type: "string",
77
- description: "Filter by window name or title",
133
+ description: "Filter by window title",
78
134
  },
79
135
  min_length: {
80
136
  type: "integer",
@@ -86,9 +142,7 @@ const BASE_TOOLS = [
86
142
  },
87
143
  include_frames: {
88
144
  type: "boolean",
89
- description: "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
90
- "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
91
- "Note: Images are limited to ~1MB each. Default: false",
145
+ description: "Include base64 screenshots (OCR only). Default: false",
92
146
  default: false,
93
147
  },
94
148
  },
@@ -98,6 +152,10 @@ const BASE_TOOLS = [
98
152
  name: "pixel-control",
99
153
  description: "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
100
154
  "Use this to type text, press keys, move the mouse, and click buttons.",
155
+ annotations: {
156
+ title: "Pixel Control",
157
+ destructiveHint: true,
158
+ },
101
159
  inputSchema: {
102
160
  type: "object",
103
161
  properties: {
@@ -138,6 +196,40 @@ const BASE_TOOLS = [
138
196
  required: ["action_type", "data"],
139
197
  },
140
198
  },
199
+ {
200
+ name: "export-video",
201
+ description: "Export a video of screen recordings for a specific time range. " +
202
+ "Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
203
+ "IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z)\n\n" +
204
+ "EXAMPLES:\n" +
205
+ "- Last 30 minutes: Calculate timestamps from current time\n" +
206
+ "- Specific meeting: Use the meeting's start and end times in UTC",
207
+ annotations: {
208
+ title: "Export Video",
209
+ destructiveHint: true,
210
+ },
211
+ inputSchema: {
212
+ type: "object",
213
+ properties: {
214
+ start_time: {
215
+ type: "string",
216
+ format: "date-time",
217
+ description: "Start time in ISO 8601 format UTC. MUST include timezone (Z for UTC). Example: '2024-01-15T10:00:00Z'",
218
+ },
219
+ end_time: {
220
+ type: "string",
221
+ format: "date-time",
222
+ description: "End time in ISO 8601 format UTC. MUST include timezone (Z for UTC). Example: '2024-01-15T10:30:00Z'",
223
+ },
224
+ fps: {
225
+ type: "number",
226
+ description: "Frames per second for the output video. Lower values (0.5-1.0) create smaller files, higher values (5-10) create smoother playback. Default: 1.0",
227
+ default: 1.0,
228
+ },
229
+ },
230
+ required: ["start_time", "end_time"],
231
+ },
232
+ },
141
233
  ];
142
234
  const MACOS_TOOLS = [
143
235
  {
@@ -151,6 +243,10 @@ const MACOS_TOOLS = [
151
243
  "- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
152
244
  "- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
153
245
  "Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
246
+ annotations: {
247
+ title: "Find Elements",
248
+ readOnlyHint: true,
249
+ },
154
250
  inputSchema: {
155
251
  type: "object",
156
252
  properties: {
@@ -192,6 +288,10 @@ const MACOS_TOOLS = [
192
288
  {
193
289
  name: "click-element",
194
290
  description: "Click an element in an application using its id (MacOS only)",
291
+ annotations: {
292
+ title: "Click Element",
293
+ destructiveHint: true,
294
+ },
195
295
  inputSchema: {
196
296
  type: "object",
197
297
  properties: {
@@ -224,6 +324,10 @@ const MACOS_TOOLS = [
224
324
  {
225
325
  name: "fill-element",
226
326
  description: "Type text into an element in an application (MacOS only)",
327
+ annotations: {
328
+ title: "Fill Element",
329
+ destructiveHint: true,
330
+ },
227
331
  inputSchema: {
228
332
  type: "object",
229
333
  properties: {
@@ -260,6 +364,10 @@ const MACOS_TOOLS = [
260
364
  {
261
365
  name: "scroll-element",
262
366
  description: "Scroll an element in a specific direction (MacOS only)",
367
+ annotations: {
368
+ title: "Scroll Element",
369
+ destructiveHint: true,
370
+ },
263
371
  inputSchema: {
264
372
  type: "object",
265
373
  properties: {
@@ -301,6 +409,10 @@ const MACOS_TOOLS = [
301
409
  {
302
410
  name: "open-application",
303
411
  description: "Open an application by name",
412
+ annotations: {
413
+ title: "Open Application",
414
+ destructiveHint: true,
415
+ },
304
416
  inputSchema: {
305
417
  type: "object",
306
418
  properties: {
@@ -315,6 +427,10 @@ const MACOS_TOOLS = [
315
427
  {
316
428
  name: "open-url",
317
429
  description: "Open a URL in a browser",
430
+ annotations: {
431
+ title: "Open URL",
432
+ destructiveHint: true,
433
+ },
318
434
  inputSchema: {
319
435
  type: "object",
320
436
  properties: {
@@ -339,6 +455,209 @@ server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
339
455
  }
340
456
  return { tools };
341
457
  });
458
+ // MCP Resources - provide dynamic context data
459
+ const RESOURCES = [
460
+ {
461
+ uri: "screenpipe://context",
462
+ name: "Current Context",
463
+ description: "Current date/time and pre-computed timestamps for common time ranges",
464
+ mimeType: "application/json",
465
+ },
466
+ {
467
+ uri: "screenpipe://guide",
468
+ name: "Usage Guide",
469
+ description: "How to use screenpipe search effectively",
470
+ mimeType: "text/markdown",
471
+ },
472
+ ];
473
+ // List resources handler
474
+ server.setRequestHandler(types_js_1.ListResourcesRequestSchema, async () => {
475
+ return { resources: RESOURCES };
476
+ });
477
+ // Read resource handler
478
+ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) => {
479
+ const { uri } = request.params;
480
+ const dateInfo = getCurrentDateInfo();
481
+ const now = Date.now();
482
+ switch (uri) {
483
+ case "screenpipe://context":
484
+ return {
485
+ contents: [
486
+ {
487
+ uri,
488
+ mimeType: "application/json",
489
+ text: JSON.stringify({
490
+ current_time: dateInfo.isoDate,
491
+ current_date_local: dateInfo.localDate,
492
+ timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
493
+ timestamps: {
494
+ now: dateInfo.isoDate,
495
+ one_hour_ago: new Date(now - 60 * 60 * 1000).toISOString(),
496
+ three_hours_ago: new Date(now - 3 * 60 * 60 * 1000).toISOString(),
497
+ today_start: `${new Date().toISOString().split("T")[0]}T00:00:00Z`,
498
+ yesterday_start: `${new Date(now - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
499
+ one_week_ago: new Date(now - 7 * 24 * 60 * 60 * 1000).toISOString(),
500
+ },
501
+ common_apps: ["Google Chrome", "Safari", "Slack", "zoom.us", "Microsoft Teams", "Code", "Terminal"],
502
+ }, null, 2),
503
+ },
504
+ ],
505
+ };
506
+ case "screenpipe://guide":
507
+ return {
508
+ contents: [
509
+ {
510
+ uri,
511
+ mimeType: "text/markdown",
512
+ text: `# Screenpipe Search Guide
513
+
514
+ ## Quick Start
515
+ - **Get recent activity**: Call search-content with no parameters
516
+ - **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
517
+ - **Time filter**: Use start_time/end_time with ISO 8601 UTC timestamps
518
+
519
+ ## Content Types
520
+ - \`ocr\`: Screen text (what you see)
521
+ - \`audio\`: Transcribed speech
522
+ - \`ui\`: UI element interactions
523
+ - \`all\`: Everything (default)
524
+
525
+ ## Key Parameters
526
+ | Parameter | Description | Default |
527
+ |-----------|-------------|---------|
528
+ | q | Search query | (none - returns all) |
529
+ | content_type | ocr/audio/ui/all | all |
530
+ | limit | Max results | 10 |
531
+ | start_time | ISO 8601 UTC | (no filter) |
532
+ | end_time | ISO 8601 UTC | (no filter) |
533
+ | app_name | Filter by app | (no filter) |
534
+ | include_frames | Include screenshots | false |
535
+
536
+ ## Tips
537
+ 1. Read screenpipe://context first to get current timestamps
538
+ 2. Omit \`q\` to get all content (useful for "what was I doing?")
539
+ 3. Use \`limit: 50-100\` for comprehensive searches
540
+ 4. Combine app_name + time filters for focused results`,
541
+ },
542
+ ],
543
+ };
544
+ default:
545
+ throw new Error(`Unknown resource: ${uri}`);
546
+ }
547
+ });
548
+ // MCP Prompts - static interaction templates
549
+ const PROMPTS = [
550
+ {
551
+ name: "search-recent",
552
+ description: "Search recent screen activity",
553
+ arguments: [
554
+ { name: "query", description: "Optional search term", required: false },
555
+ { name: "hours", description: "Hours to look back (default: 1)", required: false },
556
+ ],
557
+ },
558
+ {
559
+ name: "find-in-app",
560
+ description: "Find content from a specific application",
561
+ arguments: [
562
+ { name: "app", description: "App name (e.g., Chrome, Slack)", required: true },
563
+ { name: "query", description: "Optional search term", required: false },
564
+ ],
565
+ },
566
+ {
567
+ name: "meeting-notes",
568
+ description: "Get audio transcriptions from meetings",
569
+ arguments: [
570
+ { name: "hours", description: "Hours to look back (default: 3)", required: false },
571
+ ],
572
+ },
573
+ ];
574
+ // List prompts handler
575
+ server.setRequestHandler(types_js_1.ListPromptsRequestSchema, async () => {
576
+ return { prompts: PROMPTS };
577
+ });
578
+ // Get prompt handler
579
+ server.setRequestHandler(types_js_1.GetPromptRequestSchema, async (request) => {
580
+ const { name, arguments: promptArgs } = request.params;
581
+ const dateInfo = getCurrentDateInfo();
582
+ const now = Date.now();
583
+ switch (name) {
584
+ case "search-recent": {
585
+ const query = promptArgs?.query || "";
586
+ const hours = parseInt(promptArgs?.hours || "1", 10);
587
+ const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
588
+ return {
589
+ description: `Search recent activity (last ${hours} hour${hours > 1 ? "s" : ""})`,
590
+ messages: [
591
+ {
592
+ role: "user",
593
+ content: {
594
+ type: "text",
595
+ text: `Search screenpipe for recent activity.
596
+
597
+ Current time: ${dateInfo.isoDate}
598
+
599
+ Use search-content with:
600
+ ${query ? `- q: "${query}"` : "- No query filter (get all content)"}
601
+ - start_time: "${startTime}"
602
+ - limit: 50`,
603
+ },
604
+ },
605
+ ],
606
+ };
607
+ }
608
+ case "find-in-app": {
609
+ const app = promptArgs?.app || "Google Chrome";
610
+ const query = promptArgs?.query || "";
611
+ return {
612
+ description: `Find content from ${app}`,
613
+ messages: [
614
+ {
615
+ role: "user",
616
+ content: {
617
+ type: "text",
618
+ text: `Search screenpipe for content from ${app}.
619
+
620
+ Current time: ${dateInfo.isoDate}
621
+
622
+ Use search-content with:
623
+ - app_name: "${app}"
624
+ ${query ? `- q: "${query}"` : "- No query filter"}
625
+ - content_type: "ocr"
626
+ - limit: 50`,
627
+ },
628
+ },
629
+ ],
630
+ };
631
+ }
632
+ case "meeting-notes": {
633
+ const hours = parseInt(promptArgs?.hours || "3", 10);
634
+ const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
635
+ return {
636
+ description: `Get meeting transcriptions (last ${hours} hours)`,
637
+ messages: [
638
+ {
639
+ role: "user",
640
+ content: {
641
+ type: "text",
642
+ text: `Get audio transcriptions from recent meetings.
643
+
644
+ Current time: ${dateInfo.isoDate}
645
+
646
+ Use search-content with:
647
+ - content_type: "audio"
648
+ - start_time: "${startTime}"
649
+ - limit: 100
650
+
651
+ Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
652
+ },
653
+ },
654
+ ],
655
+ };
656
+ }
657
+ default:
658
+ throw new Error(`Unknown prompt: ${name}`);
659
+ }
660
+ });
342
661
  // Helper function to make HTTP requests
343
662
  async function fetchAPI(endpoint, options = {}) {
344
663
  const url = `${SCREENPIPE_API}${endpoint}`;
@@ -391,9 +710,15 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
391
710
  }
392
711
  const data = await response.json();
393
712
  const results = data.data || [];
713
+ const pagination = data.pagination || {};
394
714
  if (results.length === 0) {
395
715
  return {
396
- content: [{ type: "text", text: "No results found" }],
716
+ content: [
717
+ {
718
+ type: "text",
719
+ text: "No results found. Try: broader search terms, different content_type, or wider time range.",
720
+ },
721
+ ],
397
722
  };
398
723
  }
399
724
  // Build content array with text and optional images
@@ -405,57 +730,38 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
405
730
  if (!content)
406
731
  continue;
407
732
  if (result.type === "OCR") {
408
- const textResult = `OCR Text: ${content.text || "N/A"}\n` +
409
- `App: ${content.app_name || "N/A"}\n` +
410
- `Window: ${content.window_name || "N/A"}\n` +
411
- `Time: ${content.timestamp || "N/A"}\n` +
412
- `Frame ID: ${content.frame_id || "N/A"}\n` +
413
- "---";
414
- formattedResults.push(textResult);
415
- // Collect frame if available and requested
733
+ formattedResults.push(`[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
734
+ `${content.timestamp || ""}\n` +
735
+ `${content.text || ""}`);
416
736
  if (includeFrames && content.frame) {
417
737
  images.push({
418
738
  data: content.frame,
419
- context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
739
+ context: `${content.app_name} at ${content.timestamp}`,
420
740
  });
421
741
  }
422
742
  }
423
743
  else if (result.type === "Audio") {
424
- formattedResults.push(`Audio Transcription: ${content.transcription || "N/A"}\n` +
425
- `Device: ${content.device_name || "N/A"}\n` +
426
- `Time: ${content.timestamp || "N/A"}\n` +
427
- "---");
744
+ formattedResults.push(`[Audio] ${content.device_name || "?"}\n` +
745
+ `${content.timestamp || ""}\n` +
746
+ `${content.transcription || ""}`);
428
747
  }
429
748
  else if (result.type === "UI") {
430
- formattedResults.push(`UI Text: ${content.text || "N/A"}\n` +
431
- `App: ${content.app_name || "N/A"}\n` +
432
- `Window: ${content.window_name || "N/A"}\n` +
433
- `Time: ${content.timestamp || "N/A"}\n` +
434
- "---");
749
+ formattedResults.push(`[UI] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
750
+ `${content.timestamp || ""}\n` +
751
+ `${content.text || ""}`);
435
752
  }
436
753
  }
437
- // Add text results
754
+ // Header with pagination info
755
+ const header = `Results: ${results.length}/${pagination.total || "?"}` +
756
+ (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
438
757
  contentItems.push({
439
758
  type: "text",
440
- text: "Search Results:\n\n" +
441
- formattedResults.join("\n") +
442
- (images.length > 0
443
- ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
444
- : ""),
759
+ text: header + "\n\n" + formattedResults.join("\n---\n"),
445
760
  });
446
- // Add images if requested and available
761
+ // Add images if requested
447
762
  for (const img of images) {
448
- // Add context for the image
449
- contentItems.push({
450
- type: "text",
451
- text: `\n📷 ${img.context}`,
452
- });
453
- // Add the image itself
454
- contentItems.push({
455
- type: "image",
456
- data: img.data,
457
- mimeType: "image/png",
458
- });
763
+ contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
764
+ contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
459
765
  }
460
766
  return { content: contentItems };
461
767
  }
@@ -500,6 +806,151 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
500
806
  content: [{ type: "text", text: resultText }],
501
807
  };
502
808
  }
809
+ case "export-video": {
810
+ const startTime = args.start_time;
811
+ const endTime = args.end_time;
812
+ const fps = args.fps || 1.0;
813
+ // Validate time inputs
814
+ if (!startTime || !endTime) {
815
+ return {
816
+ content: [
817
+ {
818
+ type: "text",
819
+ text: "Error: Both start_time and end_time are required in ISO 8601 format (e.g., '2024-01-15T10:00:00Z')",
820
+ },
821
+ ],
822
+ };
823
+ }
824
+ // Step 1: Query the search API to get frame IDs for the time range
825
+ const searchParams = new URLSearchParams({
826
+ content_type: "ocr",
827
+ start_time: startTime,
828
+ end_time: endTime,
829
+ limit: "10000", // Get all frames in range
830
+ });
831
+ const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
832
+ if (!searchResponse.ok) {
833
+ throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
834
+ }
835
+ const searchData = await searchResponse.json();
836
+ const results = searchData.data || [];
837
+ if (results.length === 0) {
838
+ return {
839
+ content: [
840
+ {
841
+ type: "text",
842
+ text: `No screen recordings found between ${startTime} and ${endTime}. Make sure screenpipe was recording during this time period.`,
843
+ },
844
+ ],
845
+ };
846
+ }
847
+ // Extract unique frame IDs from OCR results
848
+ const frameIds = [];
849
+ const seenIds = new Set();
850
+ for (const result of results) {
851
+ if (result.type === "OCR" && result.content?.frame_id) {
852
+ const frameId = result.content.frame_id;
853
+ if (!seenIds.has(frameId)) {
854
+ seenIds.add(frameId);
855
+ frameIds.push(frameId);
856
+ }
857
+ }
858
+ }
859
+ if (frameIds.length === 0) {
860
+ return {
861
+ content: [
862
+ {
863
+ type: "text",
864
+ text: `Found ${results.length} results but no valid frame IDs. The recordings may be audio-only.`,
865
+ },
866
+ ],
867
+ };
868
+ }
869
+ // Sort frame IDs
870
+ frameIds.sort((a, b) => a - b);
871
+ // Step 2: Connect to WebSocket and export video
872
+ const wsUrl = `ws://localhost:${port}/frames/export?frame_ids=${frameIds.join(",")}&fps=${fps}`;
873
+ const exportResult = await new Promise((resolve) => {
874
+ const ws = new ws_1.WebSocket(wsUrl);
875
+ let resolved = false;
876
+ const timeout = setTimeout(() => {
877
+ if (!resolved) {
878
+ resolved = true;
879
+ ws.close();
880
+ resolve({ success: false, error: "Export timed out after 5 minutes" });
881
+ }
882
+ }, 5 * 60 * 1000); // 5 minute timeout
883
+ ws.on("error", (error) => {
884
+ if (!resolved) {
885
+ resolved = true;
886
+ clearTimeout(timeout);
887
+ resolve({ success: false, error: `WebSocket error: ${error.message}` });
888
+ }
889
+ });
890
+ ws.on("close", () => {
891
+ if (!resolved) {
892
+ resolved = true;
893
+ clearTimeout(timeout);
894
+ resolve({ success: false, error: "Connection closed unexpectedly" });
895
+ }
896
+ });
897
+ ws.on("message", (data) => {
898
+ try {
899
+ const message = JSON.parse(data.toString());
900
+ if (message.status === "completed" && message.video_data) {
901
+ // Save video to temp file
902
+ const tempDir = os.tmpdir();
903
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
904
+ const filename = `screenpipe_export_${timestamp}.mp4`;
905
+ const filePath = path.join(tempDir, filename);
906
+ fs.writeFileSync(filePath, Buffer.from(message.video_data));
907
+ resolved = true;
908
+ clearTimeout(timeout);
909
+ ws.close();
910
+ resolve({
911
+ success: true,
912
+ filePath,
913
+ frameCount: frameIds.length,
914
+ });
915
+ }
916
+ else if (message.status === "error") {
917
+ resolved = true;
918
+ clearTimeout(timeout);
919
+ ws.close();
920
+ resolve({ success: false, error: message.error || "Export failed" });
921
+ }
922
+ // Ignore "extracting" and "encoding" status updates
923
+ }
924
+ catch (parseError) {
925
+ // Ignore parse errors for progress messages
926
+ }
927
+ });
928
+ });
929
+ if (exportResult.success && exportResult.filePath) {
930
+ return {
931
+ content: [
932
+ {
933
+ type: "text",
934
+ text: `Successfully exported video!\n\n` +
935
+ `File: ${exportResult.filePath}\n` +
936
+ `Frames: ${exportResult.frameCount}\n` +
937
+ `Time range: ${startTime} to ${endTime}\n` +
938
+ `FPS: ${fps}`,
939
+ },
940
+ ],
941
+ };
942
+ }
943
+ else {
944
+ return {
945
+ content: [
946
+ {
947
+ type: "text",
948
+ text: `Failed to export video: ${exportResult.error}`,
949
+ },
950
+ ],
951
+ };
952
+ }
953
+ }
503
954
  case "click-element": {
504
955
  const selector = {
505
956
  app_name: args.app,