screenpipe-mcp 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -5,8 +5,30 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
5
5
  import {
6
6
  CallToolRequestSchema,
7
7
  ListToolsRequestSchema,
8
+ ListPromptsRequestSchema,
9
+ GetPromptRequestSchema,
10
+ ListResourcesRequestSchema,
11
+ ReadResourceRequestSchema,
8
12
  Tool,
9
13
  } from "@modelcontextprotocol/sdk/types.js";
14
+ import { WebSocket } from "ws";
15
+ import * as fs from "fs";
16
+ import * as path from "path";
17
+ import * as os from "os";
18
+
19
+ // Helper to get current date in ISO format
20
+ function getCurrentDateInfo(): { isoDate: string; localDate: string } {
21
+ const now = new Date();
22
+ return {
23
+ isoDate: now.toISOString(),
24
+ localDate: now.toLocaleDateString("en-US", {
25
+ weekday: "long",
26
+ year: "numeric",
27
+ month: "long",
28
+ day: "numeric",
29
+ }),
30
+ };
31
+ }
10
32
 
11
33
  // Detect OS
12
34
  const CURRENT_OS = process.platform;
@@ -29,11 +51,13 @@ const SCREENPIPE_API = `http://localhost:${port}`;
29
51
  const server = new Server(
30
52
  {
31
53
  name: "screenpipe",
32
- version: "0.3.1",
54
+ version: "0.4.0",
33
55
  },
34
56
  {
35
57
  capabilities: {
36
58
  tools: {},
59
+ prompts: {},
60
+ resources: {},
37
61
  },
38
62
  }
39
63
  );
@@ -43,54 +67,54 @@ const BASE_TOOLS: Tool[] = [
43
67
  {
44
68
  name: "search-content",
45
69
  description:
46
- "Search through screenpipe recorded content (OCR text, audio transcriptions, UI elements). " +
47
- "Use this to find specific content that has appeared on your screen or been spoken. " +
48
- "Results include timestamps, app context, and the content itself. " +
49
- "Set include_frames=true to get screenshot images for visual analysis (OCR results only).",
70
+ "Search screenpipe's recorded content: screen text (OCR), audio transcriptions, and UI elements. " +
71
+ "Returns timestamped results with app context. " +
72
+ "Call with no parameters to get recent activity. " +
73
+ "Use the 'screenpipe://context' resource for current time when building time-based queries.",
74
+ annotations: {
75
+ title: "Search Content",
76
+ readOnlyHint: true,
77
+ },
50
78
  inputSchema: {
51
79
  type: "object",
52
80
  properties: {
53
81
  q: {
54
82
  type: "string",
55
- description: "Search query to find in recorded content",
83
+ description: "Search query. Optional - omit to return all recent content.",
56
84
  },
57
85
  content_type: {
58
86
  type: "string",
59
87
  enum: ["all", "ocr", "audio", "ui"],
60
- description:
61
- "Type of content to search: 'ocr' for screen text, 'audio' for spoken words, 'ui' for UI elements, or 'all' for everything",
88
+ description: "Content type filter. Default: 'all'",
62
89
  default: "all",
63
90
  },
64
91
  limit: {
65
92
  type: "integer",
66
- description: "Maximum number of results to return",
93
+ description: "Max results. Default: 10",
67
94
  default: 10,
68
95
  },
69
96
  offset: {
70
97
  type: "integer",
71
- description: "Number of results to skip (for pagination)",
98
+ description: "Skip N results for pagination. Default: 0",
72
99
  default: 0,
73
100
  },
74
101
  start_time: {
75
102
  type: "string",
76
103
  format: "date-time",
77
- description:
78
- "Start time in ISO format UTC (e.g. 2024-01-01T00:00:00Z). Filter results from this time onward.",
104
+ description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
79
105
  },
80
106
  end_time: {
81
107
  type: "string",
82
108
  format: "date-time",
83
- description:
84
- "End time in ISO format UTC (e.g. 2024-01-01T00:00:00Z). Filter results up to this time.",
109
+ description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
85
110
  },
86
111
  app_name: {
87
112
  type: "string",
88
- description:
89
- "Filter by application name (e.g. 'Chrome', 'Safari', 'Terminal')",
113
+ description: "Filter by app (e.g., 'Google Chrome', 'Slack', 'zoom.us')",
90
114
  },
91
115
  window_name: {
92
116
  type: "string",
93
- description: "Filter by window name or title",
117
+ description: "Filter by window title",
94
118
  },
95
119
  min_length: {
96
120
  type: "integer",
@@ -102,10 +126,7 @@ const BASE_TOOLS: Tool[] = [
102
126
  },
103
127
  include_frames: {
104
128
  type: "boolean",
105
- description:
106
- "Include screenshot images in results for visual analysis. Only applies to OCR results. " +
107
- "When true, returns base64-encoded images that can be analyzed with vision capabilities. " +
108
- "Note: Images are limited to ~1MB each. Default: false",
129
+ description: "Include base64 screenshots (OCR only). Default: false",
109
130
  default: false,
110
131
  },
111
132
  },
@@ -116,6 +137,10 @@ const BASE_TOOLS: Tool[] = [
116
137
  description:
117
138
  "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
118
139
  "Use this to type text, press keys, move the mouse, and click buttons.",
140
+ annotations: {
141
+ title: "Pixel Control",
142
+ destructiveHint: true,
143
+ },
119
144
  inputSchema: {
120
145
  type: "object",
121
146
  properties: {
@@ -157,6 +182,44 @@ const BASE_TOOLS: Tool[] = [
157
182
  required: ["action_type", "data"],
158
183
  },
159
184
  },
185
+ {
186
+ name: "export-video",
187
+ description:
188
+ "Export a video of screen recordings for a specific time range. " +
189
+ "Creates an MP4 video from the recorded frames between the start and end times.\n\n" +
190
+ "IMPORTANT: Use ISO 8601 UTC timestamps (e.g., 2024-01-15T10:00:00Z)\n\n" +
191
+ "EXAMPLES:\n" +
192
+ "- Last 30 minutes: Calculate timestamps from current time\n" +
193
+ "- Specific meeting: Use the meeting's start and end times in UTC",
194
+ annotations: {
195
+ title: "Export Video",
196
+ destructiveHint: true,
197
+ },
198
+ inputSchema: {
199
+ type: "object",
200
+ properties: {
201
+ start_time: {
202
+ type: "string",
203
+ format: "date-time",
204
+ description:
205
+ "Start time in ISO 8601 format UTC. MUST include timezone (Z for UTC). Example: '2024-01-15T10:00:00Z'",
206
+ },
207
+ end_time: {
208
+ type: "string",
209
+ format: "date-time",
210
+ description:
211
+ "End time in ISO 8601 format UTC. MUST include timezone (Z for UTC). Example: '2024-01-15T10:30:00Z'",
212
+ },
213
+ fps: {
214
+ type: "number",
215
+ description:
216
+ "Frames per second for the output video. Lower values (0.5-1.0) create smaller files, higher values (5-10) create smoother playback. Default: 1.0",
217
+ default: 1.0,
218
+ },
219
+ },
220
+ required: ["start_time", "end_time"],
221
+ },
222
+ },
160
223
  ];
161
224
 
162
225
  const MACOS_TOOLS: Tool[] = [
@@ -172,6 +235,10 @@ const MACOS_TOOLS: Tool[] = [
172
235
  "- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
173
236
  "- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
174
237
  "Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
238
+ annotations: {
239
+ title: "Find Elements",
240
+ readOnlyHint: true,
241
+ },
175
242
  inputSchema: {
176
243
  type: "object",
177
244
  properties: {
@@ -216,6 +283,10 @@ const MACOS_TOOLS: Tool[] = [
216
283
  name: "click-element",
217
284
  description:
218
285
  "Click an element in an application using its id (MacOS only)",
286
+ annotations: {
287
+ title: "Click Element",
288
+ destructiveHint: true,
289
+ },
219
290
  inputSchema: {
220
291
  type: "object",
221
292
  properties: {
@@ -248,6 +319,10 @@ const MACOS_TOOLS: Tool[] = [
248
319
  {
249
320
  name: "fill-element",
250
321
  description: "Type text into an element in an application (MacOS only)",
322
+ annotations: {
323
+ title: "Fill Element",
324
+ destructiveHint: true,
325
+ },
251
326
  inputSchema: {
252
327
  type: "object",
253
328
  properties: {
@@ -284,6 +359,10 @@ const MACOS_TOOLS: Tool[] = [
284
359
  {
285
360
  name: "scroll-element",
286
361
  description: "Scroll an element in a specific direction (MacOS only)",
362
+ annotations: {
363
+ title: "Scroll Element",
364
+ destructiveHint: true,
365
+ },
287
366
  inputSchema: {
288
367
  type: "object",
289
368
  properties: {
@@ -325,6 +404,10 @@ const MACOS_TOOLS: Tool[] = [
325
404
  {
326
405
  name: "open-application",
327
406
  description: "Open an application by name",
407
+ annotations: {
408
+ title: "Open Application",
409
+ destructiveHint: true,
410
+ },
328
411
  inputSchema: {
329
412
  type: "object",
330
413
  properties: {
@@ -339,6 +422,10 @@ const MACOS_TOOLS: Tool[] = [
339
422
  {
340
423
  name: "open-url",
341
424
  description: "Open a URL in a browser",
425
+ annotations: {
426
+ title: "Open URL",
427
+ destructiveHint: true,
428
+ },
342
429
  inputSchema: {
343
430
  type: "object",
344
431
  properties: {
@@ -365,6 +452,225 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
365
452
  return { tools };
366
453
  });
367
454
 
455
+ // MCP Resources - provide dynamic context data
456
+ const RESOURCES = [
457
+ {
458
+ uri: "screenpipe://context",
459
+ name: "Current Context",
460
+ description: "Current date/time and pre-computed timestamps for common time ranges",
461
+ mimeType: "application/json",
462
+ },
463
+ {
464
+ uri: "screenpipe://guide",
465
+ name: "Usage Guide",
466
+ description: "How to use screenpipe search effectively",
467
+ mimeType: "text/markdown",
468
+ },
469
+ ];
470
+
471
+ // List resources handler
472
+ server.setRequestHandler(ListResourcesRequestSchema, async () => {
473
+ return { resources: RESOURCES };
474
+ });
475
+
476
+ // Read resource handler
477
+ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
478
+ const { uri } = request.params;
479
+ const dateInfo = getCurrentDateInfo();
480
+ const now = Date.now();
481
+
482
+ switch (uri) {
483
+ case "screenpipe://context":
484
+ return {
485
+ contents: [
486
+ {
487
+ uri,
488
+ mimeType: "application/json",
489
+ text: JSON.stringify({
490
+ current_time: dateInfo.isoDate,
491
+ current_date_local: dateInfo.localDate,
492
+ timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
493
+ timestamps: {
494
+ now: dateInfo.isoDate,
495
+ one_hour_ago: new Date(now - 60 * 60 * 1000).toISOString(),
496
+ three_hours_ago: new Date(now - 3 * 60 * 60 * 1000).toISOString(),
497
+ today_start: `${new Date().toISOString().split("T")[0]}T00:00:00Z`,
498
+ yesterday_start: `${new Date(now - 24 * 60 * 60 * 1000).toISOString().split("T")[0]}T00:00:00Z`,
499
+ one_week_ago: new Date(now - 7 * 24 * 60 * 60 * 1000).toISOString(),
500
+ },
501
+ common_apps: ["Google Chrome", "Safari", "Slack", "zoom.us", "Microsoft Teams", "Code", "Terminal"],
502
+ }, null, 2),
503
+ },
504
+ ],
505
+ };
506
+
507
+ case "screenpipe://guide":
508
+ return {
509
+ contents: [
510
+ {
511
+ uri,
512
+ mimeType: "text/markdown",
513
+ text: `# Screenpipe Search Guide
514
+
515
+ ## Quick Start
516
+ - **Get recent activity**: Call search-content with no parameters
517
+ - **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
518
+ - **Time filter**: Use start_time/end_time with ISO 8601 UTC timestamps
519
+
520
+ ## Content Types
521
+ - \`ocr\`: Screen text (what you see)
522
+ - \`audio\`: Transcribed speech
523
+ - \`ui\`: UI element interactions
524
+ - \`all\`: Everything (default)
525
+
526
+ ## Key Parameters
527
+ | Parameter | Description | Default |
528
+ |-----------|-------------|---------|
529
+ | q | Search query | (none - returns all) |
530
+ | content_type | ocr/audio/ui/all | all |
531
+ | limit | Max results | 10 |
532
+ | start_time | ISO 8601 UTC | (no filter) |
533
+ | end_time | ISO 8601 UTC | (no filter) |
534
+ | app_name | Filter by app | (no filter) |
535
+ | include_frames | Include screenshots | false |
536
+
537
+ ## Tips
538
+ 1. Read screenpipe://context first to get current timestamps
539
+ 2. Omit \`q\` to get all content (useful for "what was I doing?")
540
+ 3. Use \`limit: 50-100\` for comprehensive searches
541
+ 4. Combine app_name + time filters for focused results`,
542
+ },
543
+ ],
544
+ };
545
+
546
+ default:
547
+ throw new Error(`Unknown resource: ${uri}`);
548
+ }
549
+ });
550
+
551
+ // MCP Prompts - static interaction templates
552
+ const PROMPTS = [
553
+ {
554
+ name: "search-recent",
555
+ description: "Search recent screen activity",
556
+ arguments: [
557
+ { name: "query", description: "Optional search term", required: false },
558
+ { name: "hours", description: "Hours to look back (default: 1)", required: false },
559
+ ],
560
+ },
561
+ {
562
+ name: "find-in-app",
563
+ description: "Find content from a specific application",
564
+ arguments: [
565
+ { name: "app", description: "App name (e.g., Chrome, Slack)", required: true },
566
+ { name: "query", description: "Optional search term", required: false },
567
+ ],
568
+ },
569
+ {
570
+ name: "meeting-notes",
571
+ description: "Get audio transcriptions from meetings",
572
+ arguments: [
573
+ { name: "hours", description: "Hours to look back (default: 3)", required: false },
574
+ ],
575
+ },
576
+ ];
577
+
578
+ // List prompts handler
579
+ server.setRequestHandler(ListPromptsRequestSchema, async () => {
580
+ return { prompts: PROMPTS };
581
+ });
582
+
583
+ // Get prompt handler
584
+ server.setRequestHandler(GetPromptRequestSchema, async (request) => {
585
+ const { name, arguments: promptArgs } = request.params;
586
+ const dateInfo = getCurrentDateInfo();
587
+ const now = Date.now();
588
+
589
+ switch (name) {
590
+ case "search-recent": {
591
+ const query = promptArgs?.query || "";
592
+ const hours = parseInt(promptArgs?.hours || "1", 10);
593
+ const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
594
+
595
+ return {
596
+ description: `Search recent activity (last ${hours} hour${hours > 1 ? "s" : ""})`,
597
+ messages: [
598
+ {
599
+ role: "user" as const,
600
+ content: {
601
+ type: "text" as const,
602
+ text: `Search screenpipe for recent activity.
603
+
604
+ Current time: ${dateInfo.isoDate}
605
+
606
+ Use search-content with:
607
+ ${query ? `- q: "${query}"` : "- No query filter (get all content)"}
608
+ - start_time: "${startTime}"
609
+ - limit: 50`,
610
+ },
611
+ },
612
+ ],
613
+ };
614
+ }
615
+
616
+ case "find-in-app": {
617
+ const app = promptArgs?.app || "Google Chrome";
618
+ const query = promptArgs?.query || "";
619
+
620
+ return {
621
+ description: `Find content from ${app}`,
622
+ messages: [
623
+ {
624
+ role: "user" as const,
625
+ content: {
626
+ type: "text" as const,
627
+ text: `Search screenpipe for content from ${app}.
628
+
629
+ Current time: ${dateInfo.isoDate}
630
+
631
+ Use search-content with:
632
+ - app_name: "${app}"
633
+ ${query ? `- q: "${query}"` : "- No query filter"}
634
+ - content_type: "ocr"
635
+ - limit: 50`,
636
+ },
637
+ },
638
+ ],
639
+ };
640
+ }
641
+
642
+ case "meeting-notes": {
643
+ const hours = parseInt(promptArgs?.hours || "3", 10);
644
+ const startTime = new Date(now - hours * 60 * 60 * 1000).toISOString();
645
+
646
+ return {
647
+ description: `Get meeting transcriptions (last ${hours} hours)`,
648
+ messages: [
649
+ {
650
+ role: "user" as const,
651
+ content: {
652
+ type: "text" as const,
653
+ text: `Get audio transcriptions from recent meetings.
654
+
655
+ Current time: ${dateInfo.isoDate}
656
+
657
+ Use search-content with:
658
+ - content_type: "audio"
659
+ - start_time: "${startTime}"
660
+ - limit: 100
661
+
662
+ Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
663
+ },
664
+ },
665
+ ],
666
+ };
667
+ }
668
+
669
+ default:
670
+ throw new Error(`Unknown prompt: ${name}`);
671
+ }
672
+ });
673
+
368
674
  // Helper function to make HTTP requests
369
675
  async function fetchAPI(
370
676
  endpoint: string,
@@ -427,10 +733,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
427
733
 
428
734
  const data = await response.json();
429
735
  const results = data.data || [];
736
+ const pagination = data.pagination || {};
430
737
 
431
738
  if (results.length === 0) {
432
739
  return {
433
- content: [{ type: "text", text: "No results found" }],
740
+ content: [
741
+ {
742
+ type: "text",
743
+ text: "No results found. Try: broader search terms, different content_type, or wider time range.",
744
+ },
745
+ ],
434
746
  };
435
747
  }
436
748
 
@@ -448,64 +760,45 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
448
760
  if (!content) continue;
449
761
 
450
762
  if (result.type === "OCR") {
451
- const textResult =
452
- `OCR Text: ${content.text || "N/A"}\n` +
453
- `App: ${content.app_name || "N/A"}\n` +
454
- `Window: ${content.window_name || "N/A"}\n` +
455
- `Time: ${content.timestamp || "N/A"}\n` +
456
- `Frame ID: ${content.frame_id || "N/A"}\n` +
457
- "---";
458
- formattedResults.push(textResult);
459
-
460
- // Collect frame if available and requested
763
+ formattedResults.push(
764
+ `[OCR] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
765
+ `${content.timestamp || ""}\n` +
766
+ `${content.text || ""}`
767
+ );
461
768
  if (includeFrames && content.frame) {
462
769
  images.push({
463
770
  data: content.frame,
464
- context: `Screenshot from ${content.app_name || "unknown"} - ${content.window_name || "unknown"} at ${content.timestamp || "unknown"}`,
771
+ context: `${content.app_name} at ${content.timestamp}`,
465
772
  });
466
773
  }
467
774
  } else if (result.type === "Audio") {
468
775
  formattedResults.push(
469
- `Audio Transcription: ${content.transcription || "N/A"}\n` +
470
- `Device: ${content.device_name || "N/A"}\n` +
471
- `Time: ${content.timestamp || "N/A"}\n` +
472
- "---"
776
+ `[Audio] ${content.device_name || "?"}\n` +
777
+ `${content.timestamp || ""}\n` +
778
+ `${content.transcription || ""}`
473
779
  );
474
780
  } else if (result.type === "UI") {
475
781
  formattedResults.push(
476
- `UI Text: ${content.text || "N/A"}\n` +
477
- `App: ${content.app_name || "N/A"}\n` +
478
- `Window: ${content.window_name || "N/A"}\n` +
479
- `Time: ${content.timestamp || "N/A"}\n` +
480
- "---"
782
+ `[UI] ${content.app_name || "?"} | ${content.window_name || "?"}\n` +
783
+ `${content.timestamp || ""}\n` +
784
+ `${content.text || ""}`
481
785
  );
482
786
  }
483
787
  }
484
788
 
485
- // Add text results
789
+ // Header with pagination info
790
+ const header = `Results: ${results.length}/${pagination.total || "?"}` +
791
+ (pagination.total > results.length ? ` (use offset=${(pagination.offset || 0) + results.length} for more)` : "");
792
+
486
793
  contentItems.push({
487
794
  type: "text",
488
- text:
489
- "Search Results:\n\n" +
490
- formattedResults.join("\n") +
491
- (images.length > 0
492
- ? `\n\n${images.length} screenshot(s) included below for visual analysis:`
493
- : ""),
795
+ text: header + "\n\n" + formattedResults.join("\n---\n"),
494
796
  });
495
797
 
496
- // Add images if requested and available
798
+ // Add images if requested
497
799
  for (const img of images) {
498
- // Add context for the image
499
- contentItems.push({
500
- type: "text",
501
- text: `\n📷 ${img.context}`,
502
- });
503
- // Add the image itself
504
- contentItems.push({
505
- type: "image",
506
- data: img.data,
507
- mimeType: "image/png",
508
- });
800
+ contentItems.push({ type: "text", text: `\n📷 ${img.context}` });
801
+ contentItems.push({ type: "image", data: img.data, mimeType: "image/png" });
509
802
  }
510
803
 
511
804
  return { content: contentItems };
@@ -555,6 +848,172 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
555
848
  };
556
849
  }
557
850
 
851
+ case "export-video": {
852
+ const startTime = args.start_time as string;
853
+ const endTime = args.end_time as string;
854
+ const fps = (args.fps as number) || 1.0;
855
+
856
+ // Validate time inputs
857
+ if (!startTime || !endTime) {
858
+ return {
859
+ content: [
860
+ {
861
+ type: "text",
862
+ text: "Error: Both start_time and end_time are required in ISO 8601 format (e.g., '2024-01-15T10:00:00Z')",
863
+ },
864
+ ],
865
+ };
866
+ }
867
+
868
+ // Step 1: Query the search API to get frame IDs for the time range
869
+ const searchParams = new URLSearchParams({
870
+ content_type: "ocr",
871
+ start_time: startTime,
872
+ end_time: endTime,
873
+ limit: "10000", // Get all frames in range
874
+ });
875
+
876
+ const searchResponse = await fetchAPI(`/search?${searchParams.toString()}`);
877
+ if (!searchResponse.ok) {
878
+ throw new Error(`Failed to search for frames: HTTP ${searchResponse.status}`);
879
+ }
880
+
881
+ const searchData = await searchResponse.json();
882
+ const results = searchData.data || [];
883
+
884
+ if (results.length === 0) {
885
+ return {
886
+ content: [
887
+ {
888
+ type: "text",
889
+ text: `No screen recordings found between ${startTime} and ${endTime}. Make sure screenpipe was recording during this time period.`,
890
+ },
891
+ ],
892
+ };
893
+ }
894
+
895
+ // Extract unique frame IDs from OCR results
896
+ const frameIds: number[] = [];
897
+ const seenIds = new Set<number>();
898
+ for (const result of results) {
899
+ if (result.type === "OCR" && result.content?.frame_id) {
900
+ const frameId = result.content.frame_id;
901
+ if (!seenIds.has(frameId)) {
902
+ seenIds.add(frameId);
903
+ frameIds.push(frameId);
904
+ }
905
+ }
906
+ }
907
+
908
+ if (frameIds.length === 0) {
909
+ return {
910
+ content: [
911
+ {
912
+ type: "text",
913
+ text: `Found ${results.length} results but no valid frame IDs. The recordings may be audio-only.`,
914
+ },
915
+ ],
916
+ };
917
+ }
918
+
919
+ // Sort frame IDs
920
+ frameIds.sort((a, b) => a - b);
921
+
922
+ // Step 2: Connect to WebSocket and export video
923
+ const wsUrl = `ws://localhost:${port}/frames/export?frame_ids=${frameIds.join(",")}&fps=${fps}`;
924
+
925
+ const exportResult = await new Promise<{
926
+ success: boolean;
927
+ filePath?: string;
928
+ error?: string;
929
+ frameCount?: number;
930
+ }>((resolve) => {
931
+ const ws = new WebSocket(wsUrl);
932
+ let resolved = false;
933
+
934
+ const timeout = setTimeout(() => {
935
+ if (!resolved) {
936
+ resolved = true;
937
+ ws.close();
938
+ resolve({ success: false, error: "Export timed out after 5 minutes" });
939
+ }
940
+ }, 5 * 60 * 1000); // 5 minute timeout
941
+
942
+ ws.on("error", (error) => {
943
+ if (!resolved) {
944
+ resolved = true;
945
+ clearTimeout(timeout);
946
+ resolve({ success: false, error: `WebSocket error: ${error.message}` });
947
+ }
948
+ });
949
+
950
+ ws.on("close", () => {
951
+ if (!resolved) {
952
+ resolved = true;
953
+ clearTimeout(timeout);
954
+ resolve({ success: false, error: "Connection closed unexpectedly" });
955
+ }
956
+ });
957
+
958
+ ws.on("message", (data) => {
959
+ try {
960
+ const message = JSON.parse(data.toString());
961
+
962
+ if (message.status === "completed" && message.video_data) {
963
+ // Save video to temp file
964
+ const tempDir = os.tmpdir();
965
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
966
+ const filename = `screenpipe_export_${timestamp}.mp4`;
967
+ const filePath = path.join(tempDir, filename);
968
+
969
+ fs.writeFileSync(filePath, Buffer.from(message.video_data));
970
+
971
+ resolved = true;
972
+ clearTimeout(timeout);
973
+ ws.close();
974
+ resolve({
975
+ success: true,
976
+ filePath,
977
+ frameCount: frameIds.length,
978
+ });
979
+ } else if (message.status === "error") {
980
+ resolved = true;
981
+ clearTimeout(timeout);
982
+ ws.close();
983
+ resolve({ success: false, error: message.error || "Export failed" });
984
+ }
985
+ // Ignore "extracting" and "encoding" status updates
986
+ } catch (parseError) {
987
+ // Ignore parse errors for progress messages
988
+ }
989
+ });
990
+ });
991
+
992
+ if (exportResult.success && exportResult.filePath) {
993
+ return {
994
+ content: [
995
+ {
996
+ type: "text",
997
+ text: `Successfully exported video!\n\n` +
998
+ `File: ${exportResult.filePath}\n` +
999
+ `Frames: ${exportResult.frameCount}\n` +
1000
+ `Time range: ${startTime} to ${endTime}\n` +
1001
+ `FPS: ${fps}`,
1002
+ },
1003
+ ],
1004
+ };
1005
+ } else {
1006
+ return {
1007
+ content: [
1008
+ {
1009
+ type: "text",
1010
+ text: `Failed to export video: ${exportResult.error}`,
1011
+ },
1012
+ ],
1013
+ };
1014
+ }
1015
+ }
1016
+
558
1017
  case "click-element": {
559
1018
  const selector = {
560
1019
  app_name: args.app,