screenpipe-mcp 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -48,7 +48,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
48
48
  const server = new Server(
49
49
  {
50
50
  name: "screenpipe",
51
- version: "0.8.3",
51
+ version: "0.8.5",
52
52
  },
53
53
  {
54
54
  capabilities: {
@@ -68,10 +68,14 @@ const BASE_TOOLS: Tool[] = [
68
68
  "Returns timestamped results with app context. " +
69
69
  "Call with no parameters to get recent activity. " +
70
70
  "Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
71
+ "WHEN TO USE WHICH content_type:\n" +
72
+ "- For meetings/calls/conversations: content_type='audio', do NOT use q param (transcriptions are noisy, q filters too aggressively)\n" +
73
+ "- For screen text/reading: content_type='all' or 'accessibility'\n" +
74
+ "- For time spent/app usage questions: use activity-summary tool instead (this tool returns content, not time stats)\n\n" +
71
75
  "SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
72
76
  "This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
73
- "App names are case-sensitive and may differ from user input (e.g. 'Discord' vs 'Discord.exe'). " +
74
- "The q param searches captured text (accessibility/OCR), NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
77
+ "App names are case-sensitive (e.g. 'Discord' vs 'Discord.exe'). " +
78
+ "The q param searches captured text, NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
75
79
  "DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
76
80
  "- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
77
81
  "- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
@@ -85,12 +89,12 @@ const BASE_TOOLS: Tool[] = [
85
89
  properties: {
86
90
  q: {
87
91
  type: "string",
88
- description: "Search query. Optional - omit to return all recent content.",
92
+ description: "Search query (full-text search on captured text). Optional - omit to return all content in time range. IMPORTANT: Do NOT use q for audio/meeting searches — transcriptions are noisy and q filters too aggressively. Only use q when searching for specific text the user saw on screen.",
89
93
  },
90
94
  content_type: {
91
95
  type: "string",
92
96
  enum: ["all", "ocr", "audio", "input", "accessibility"],
93
- description: "Content type filter: 'ocr' (screen text via OCR, legacy fallback), 'audio' (transcriptions), 'input' (clicks, keystrokes, clipboard, app switches), 'accessibility' (accessibility tree text, preferred for screen content), 'all'. Default: 'all'.",
97
+ description: "Content type filter: 'audio' (transcriptions use for meetings/calls/conversations), 'accessibility' (accessibility tree text, preferred for screen content), 'ocr' (screen text via OCR, legacy fallback), 'input' (clicks, keystrokes, clipboard, app switches), 'all'. Default: 'all'. For meeting/call queries, ALWAYS use 'audio'.",
94
98
  default: "all",
95
99
  },
96
100
  limit: {
@@ -229,9 +233,14 @@ const BASE_TOOLS: Tool[] = [
229
233
  "Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
230
234
  "Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
231
235
  "Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
232
- "first_seen/last_seen show the wall-clock span per app. " +
233
- "Use this FIRST for broad questions like 'what was I doing?' before drilling into search-content or search-elements. " +
234
- "Much cheaper than search-content for getting an overview.",
236
+ "first_seen/last_seen show the wall-clock span per app.\n\n" +
237
+ "USE THIS TOOL (not search-content or raw SQL) for:\n" +
238
+ "- 'how long did I spend on X?' → active_minutes per app\n" +
239
+ "- 'which apps did I use today?' → app list sorted by active_minutes\n" +
240
+ "- 'what was I doing?' → broad overview before drilling deeper\n" +
241
+ "- Any time-spent or app-usage question\n\n" +
242
+ "WARNING: Do NOT estimate time from raw frame counts or SQL queries — those are inaccurate. " +
243
+ "This endpoint calculates actual active session time correctly.",
235
244
  annotations: {
236
245
  title: "Activity Summary",
237
246
  readOnlyHint: true,
@@ -364,6 +373,24 @@ const RESOURCES = [
364
373
  description: "Interactive search UI for exploring screen recordings and audio transcriptions",
365
374
  mimeType: "text/html",
366
375
  },
376
+ {
377
+ uri: "screenpipe://pipe-creation-guide",
378
+ name: "Pipe Creation Guide",
379
+ description: "How to create screenpipe pipes (scheduled AI automations): format, YAML frontmatter, schedule syntax, API parameters, and example templates",
380
+ mimeType: "text/markdown",
381
+ },
382
+ {
383
+ uri: "screenpipe://api-reference",
384
+ name: "REST API Reference",
385
+ description: "Full screenpipe REST API reference: search, activity-summary, elements, frames, export, retranscribe, raw SQL, connections, speakers (60+ endpoints)",
386
+ mimeType: "text/markdown",
387
+ },
388
+ {
389
+ uri: "screenpipe://cli-reference",
390
+ name: "CLI Reference",
391
+ description: "Screenpipe CLI commands: pipe management (list, enable, run, install, delete) and connection management (Telegram, Slack, Discord, etc.)",
392
+ mimeType: "text/markdown",
393
+ },
367
394
  ];
368
395
 
369
396
  // List resources handler
@@ -424,6 +451,20 @@ Screenpipe captures four types of data:
424
451
  - **Get keyboard input**: \`{"content_type": "input"}\`
425
452
  - **Get audio only**: \`{"content_type": "audio"}\`
426
453
 
454
+ ## Common User Requests → Correct Tool Choice
455
+ | User says | Use this tool | Key params |
456
+ |-----------|--------------|------------|
457
+ | "summarize my meeting/call" | search-content | content_type:"audio", NO q param, start_time |
458
+ | "what did they/I say about X" | search-content | content_type:"audio", NO q param (scan results manually) |
459
+ | "how long on X" / "which apps" / "time spent" | activity-summary | start_time, end_time |
460
+ | "what was I doing" | activity-summary | start_time, end_time (then drill into search-content) |
461
+ | "what was I reading/looking at" | search-content | content_type:"all", start_time |
462
+
463
+ ## Behavior Rules
464
+ - Act immediately on clear requests. NEVER ask "what time range?" or "which content type?" when the intent is obvious.
465
+ - If search returns empty, silently retry with wider time range or fewer filters. Do NOT ask the user what to change.
466
+ - For meetings: ALWAYS use content_type:"audio" and do NOT use the q param. Transcriptions are noisy — q filters too aggressively and misses relevant content.
467
+
427
468
  ## search-content
428
469
  | Parameter | Description | Default |
429
470
  |-----------|-------------|---------|
@@ -449,6 +490,19 @@ Screenpipe captures four types of data:
449
490
  4. **Fetch frame-context** for URLs and accessibility tree of specific frames
450
491
  5. **Screenshots** (include_frames=true) only when text isn't enough
451
492
 
493
+ ## Chat History
494
+ Previous screenpipe chat conversations are stored as individual JSON files in ~/.screenpipe/chats/{conversation-id}.json
495
+ Each file contains: id, title, messages[], createdAt, updatedAt. You can read these files to reference or search previous conversations.
496
+
497
+ ## Speaker Management
498
+ screenpipe auto-identifies speakers in audio. API endpoints for managing them:
499
+ - \`GET /speakers/unnamed?limit=10\` — list unnamed speakers
500
+ - \`GET /speakers/search?name=John\` — search by name
501
+ - \`POST /speakers/update\` with \`{"id": 5, "name": "John"}\` — rename a speaker
502
+ - \`POST /speakers/merge\` with \`{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}\` — merge duplicates
503
+ - \`GET /speakers/similar?speaker_id=5\` — find similar speakers for merging
504
+ - \`POST /speakers/reassign\` — reassign audio chunk to different speaker
505
+
452
506
  ## Tips
453
507
  1. Read screenpipe://context first to get current timestamps
454
508
  2. Use activity-summary before search-content for broad overview questions
@@ -521,6 +575,491 @@ When showing search results to users, create clickable links so they can jump to
521
575
  };
522
576
  }
523
577
 
578
+ case "screenpipe://pipe-creation-guide":
579
+ return {
580
+ contents: [
581
+ {
582
+ uri,
583
+ mimeType: "text/markdown",
584
+ text: `# Screenpipe Pipe Creation Guide
585
+
586
+ ## What is a pipe?
587
+
588
+ A pipe is a scheduled AI agent defined as a single markdown file: \`~/.screenpipe/pipes/{name}/pipe.md\`
589
+ Every N minutes, screenpipe runs a coding agent (like pi or claude-code) with the pipe's prompt.
590
+ The agent can query your screen data, write files, call external APIs, send notifications, etc.
591
+
592
+ ## pipe.md format
593
+
594
+ The file starts with YAML frontmatter on the very first line (no blank lines before it), then the prompt body:
595
+
596
+ \`\`\`markdown
597
+ ---
598
+ schedule: every 30m
599
+ enabled: true
600
+ ---
601
+
602
+ Your prompt instructions here...
603
+ \`\`\`
604
+
605
+ ### Config fields
606
+
607
+ | Field | Values | Description |
608
+ |-------|--------|-------------|
609
+ | \`schedule\` | \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\` | When to run |
610
+ | \`enabled\` | \`true\` / \`false\` | Whether the pipe is active |
611
+ | \`preset\` | AI preset name (e.g. \`Oai\`) | Which AI model to use |
612
+ | \`history\` | \`true\` / \`false\` | Include previous output as context |
613
+ | \`connections\` | list of connection IDs | Required integrations (e.g. \`obsidian\`, \`telegram\`) |
614
+
615
+ ## Context header
616
+
617
+ Before execution, screenpipe prepends a context header to the prompt with:
618
+ - Time range (start/end timestamps based on the schedule interval)
619
+ - Current date and user's timezone
620
+ - Screenpipe API base URL (http://localhost:3030)
621
+ - Output directory
622
+
623
+ The AI agent uses this context to query the right time range. No template variables needed in the prompt.
624
+
625
+ ## Screenpipe search API
626
+
627
+ The agent queries screen data via the local REST API:
628
+
629
+ \`\`\`
630
+ curl "http://localhost:3030/search?limit=20&content_type=all&start_time=<ISO8601>&end_time=<ISO8601>"
631
+ \`\`\`
632
+
633
+ ### Query parameters
634
+
635
+ | Parameter | Description |
636
+ |-----------|-------------|
637
+ | \`q\` | Text search query (optional — skip for audio, transcriptions are noisy) |
638
+ | \`content_type\` | \`all\`, \`ocr\`, \`audio\`, \`input\`, \`accessibility\` (prefer \`all\` or \`accessibility\`) |
639
+ | \`limit\` | Max results (default 20) |
640
+ | \`offset\` | Pagination offset |
641
+ | \`start_time\` / \`end_time\` | ISO 8601 timestamps or relative (\`1h ago\`, \`now\`) |
642
+ | \`app_name\` | Filter by app (e.g. \`Google Chrome\`, \`Slack\`) |
643
+ | \`window_name\` | Filter by window title |
644
+ | \`browser_url\` | Filter by URL |
645
+ | \`min_length\` / \`max_length\` | Filter by text length |
646
+ | \`speaker_name\` | Filter audio by speaker |
647
+
648
+ Other useful endpoints:
649
+ - \`GET /activity-summary?start_time=...&end_time=...\` — lightweight overview (~200 tokens)
650
+ - \`GET /elements?q=...&role=AXButton&start_time=...\` — UI elements
651
+ - \`GET /connections/{id}\` — get integration credentials (telegram, slack, obsidian, etc.)
652
+ - \`POST /raw_sql\` — run SQL queries (always include LIMIT)
653
+
654
+ Full API reference: read the \`screenpipe://api-reference\` resource.
655
+
656
+ ## Installing and running
657
+
658
+ After creating the pipe.md file:
659
+
660
+ \`\`\`bash
661
+ bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
662
+ bunx screenpipe@latest pipe enable my-pipe
663
+ bunx screenpipe@latest pipe run my-pipe # test immediately
664
+ \`\`\`
665
+
666
+ ## Example pipes
667
+
668
+ ### Daily recap (manual trigger)
669
+ \`\`\`markdown
670
+ ---
671
+ schedule: manual
672
+ enabled: true
673
+ ---
674
+
675
+ Analyze my screen and audio recordings from today (last 16 hours). Use limit=10 per search, max 5 searches total.
676
+
677
+ ## Summary
678
+ One sentence: what I mainly did today.
679
+
680
+ ## Accomplishments
681
+ - Top 3 things I finished, with timestamps
682
+
683
+ ## Key Moments
684
+ - Important things I saw, said, or heard
685
+
686
+ ## Unfinished Work
687
+ - What I should continue tomorrow
688
+ \`\`\`
689
+
690
+ ### Obsidian sync (every hour)
691
+ \`\`\`markdown
692
+ ---
693
+ schedule: every 1h
694
+ enabled: true
695
+ connections:
696
+ - obsidian
697
+ ---
698
+
699
+ Sync screenpipe activity to Obsidian vault as a daily note.
700
+
701
+ 1. Get vault path from GET http://localhost:3030/connections/obsidian
702
+ 2. Read existing daily note (merge into it)
703
+ 3. Query search API in 30-minute chunks with min_length=50
704
+ 4. Synthesize activities, extract action items, write note
705
+ \`\`\`
706
+
707
+ ### Slack standup (every weekday at 9am)
708
+ \`\`\`markdown
709
+ ---
710
+ schedule: every weekday at 9am
711
+ enabled: true
712
+ connections:
713
+ - slack
714
+ ---
715
+
716
+ Generate standup update from yesterday's activity and post to Slack.
717
+
718
+ 1. Query activity-summary for yesterday
719
+ 2. Search for key accomplishments and blockers
720
+ 3. Format as: Done / Doing / Blocked
721
+ 4. POST to Slack webhook from GET http://localhost:3030/connections/slack
722
+ \`\`\`
723
+
724
+ ## Optimization tips
725
+
726
+ - Be specific about expected output format
727
+ - Give step-by-step instructions
728
+ - Add error handling: "if API returns empty, try content_type=accessibility instead of ocr"
729
+ - Add validation: "before writing, verify you have at least 3 entries"
730
+ - Specify exact file paths, API parameters, output structure
731
+ - Keep search limit low (10-20) and use time ranges from the context header
732
+ - Use \`min_length=50\` to skip noisy OCR fragments`,
733
+ },
734
+ ],
735
+ };
736
+
737
+ case "screenpipe://api-reference":
738
+ return {
739
+ contents: [
740
+ {
741
+ uri,
742
+ mimeType: "text/markdown",
743
+ text: `# Screenpipe REST API Reference
744
+
745
+ Local REST API at \`http://localhost:3030\`. Full reference (60+ endpoints): https://docs.screenpi.pe/llms-full.txt
746
+
747
+ ## Shell
748
+
749
+ - **macOS/Linux** → \`bash\`, \`curl\`
750
+ - **Windows** → \`powershell\`, \`curl.exe\` (not the alias)
751
+
752
+ ## Context Window Protection
753
+
754
+ API responses can be large. Always write curl output to a file first (\`curl ... -o /tmp/sp_result.json\`), check size (\`wc -c\`), and if over 5KB read only the first 50-100 lines. Extract what you need with \`jq\`. NEVER dump full large responses into context.
755
+
756
+ ---
757
+
758
+ ## 1. Search — \`GET /search\`
759
+
760
+ \`\`\`bash
761
+ curl "http://localhost:3030/search?q=QUERY&content_type=all&limit=10&start_time=1h%20ago"
762
+ \`\`\`
763
+
764
+ ### Parameters
765
+
766
+ | Parameter | Type | Required | Description |
767
+ |-----------|------|----------|-------------|
768
+ | \`q\` | string | No | Keywords. Do NOT use for audio — transcriptions are noisy. |
769
+ | \`content_type\` | string | No | \`all\` (default), \`ocr\`, \`audio\`, \`input\`, \`accessibility\` |
770
+ | \`limit\` | integer | No | Max 1-20. Default: 10 |
771
+ | \`offset\` | integer | No | Pagination. Default: 0 |
772
+ | \`start_time\` | ISO 8601 or relative | **Yes** | \`2024-01-15T10:00:00Z\` or \`16h ago\`, \`2d ago\`, \`30m ago\` |
773
+ | \`end_time\` | ISO 8601 or relative | No | Defaults to now. \`now\`, \`1h ago\` |
774
+ | \`app_name\` | string | No | e.g. "Google Chrome", "Slack", "zoom.us" |
775
+ | \`window_name\` | string | No | Window title substring |
776
+ | \`speaker_name\` | string | No | Filter audio by speaker (case-insensitive partial) |
777
+ | \`focused\` | boolean | No | Only focused windows |
778
+
779
+ ### Critical Rules
780
+
781
+ 1. **ALWAYS include \`start_time\`** — queries without time bounds WILL timeout
782
+ 2. **Start with 1-2 hour ranges** — expand only if no results
783
+ 3. **Use \`app_name\`** when user mentions a specific app
784
+ 4. **"recent"** = 30 min. **"today"** = since midnight. **"yesterday"** = yesterday's range
785
+
786
+ ### Response Format
787
+
788
+ \`\`\`json
789
+ {
790
+ "data": [
791
+ {"type": "OCR", "content": {"frame_id": 12345, "text": "...", "timestamp": "...", "app_name": "Chrome"}},
792
+ {"type": "Audio", "content": {"chunk_id": 678, "transcription": "...", "timestamp": "...", "speaker": {"name": "John"}}},
793
+ {"type": "UI", "content": {"id": 999, "text": "Clicked Submit", "timestamp": "...", "app_name": "Safari"}}
794
+ ],
795
+ "pagination": {"limit": 10, "offset": 0, "total": 42}
796
+ }
797
+ \`\`\`
798
+
799
+ ---
800
+
801
+ ## 2. Activity Summary — \`GET /activity-summary\`
802
+
803
+ \`\`\`bash
804
+ curl "http://localhost:3030/activity-summary?start_time=1h%20ago&end_time=now"
805
+ \`\`\`
806
+
807
+ Returns app usage with \`active_minutes\`, first/last seen, recent texts, audio summary. ~200-500 tokens. Best starting point.
808
+
809
+ ---
810
+
811
+ ## 3. Elements — \`GET /elements\`
812
+
813
+ Lightweight FTS search across UI elements (~100-500 bytes each).
814
+
815
+ \`\`\`bash
816
+ curl "http://localhost:3030/elements?q=Submit&role=AXButton&start_time=1h%20ago&limit=10"
817
+ \`\`\`
818
+
819
+ Parameters: \`q\`, \`frame_id\`, \`source\` (\`accessibility\`|\`ocr\`), \`role\`, \`start_time\`, \`end_time\`, \`app_name\`, \`limit\`, \`offset\`.
820
+
821
+ ### Frame Context — \`GET /frames/{id}/context\`
822
+
823
+ Returns accessibility text, parsed nodes, and extracted URLs for a frame.
824
+
825
+ Common roles: \`AXButton\`, \`AXStaticText\`, \`AXLink\`, \`AXTextField\`, \`AXTextArea\`, \`AXMenuItem\`, \`AXCheckBox\`
826
+
827
+ ---
828
+
829
+ ## 4. Frames — \`GET /frames/{frame_id}\`
830
+
831
+ Returns raw PNG screenshot. Never fetch more than 2-3 per query.
832
+
833
+ ---
834
+
835
+ ## 5. Media Export — \`POST /frames/export\`
836
+
837
+ \`\`\`bash
838
+ curl -X POST http://localhost:3030/frames/export \\
839
+ -H "Content-Type: application/json" \\
840
+ -d '{"start_time": "5m ago", "end_time": "now", "fps": 1.0}'
841
+ \`\`\`
842
+
843
+ FPS guidelines: 5min→1.0, 30min→0.5, 1h→0.2, 2h+→0.1. Max 10,000 frames.
844
+
845
+ ---
846
+
847
+ ## 6. Retranscribe — \`POST /audio/retranscribe\`
848
+
849
+ \`\`\`bash
850
+ curl -X POST http://localhost:3030/audio/retranscribe \\
851
+ -H "Content-Type: application/json" \\
852
+ -d '{"start": "1h ago", "end": "now"}'
853
+ \`\`\`
854
+
855
+ Optional: \`engine\`, \`vocabulary\` (array of \`{"word": "...", "replacement": "..."}\`), \`prompt\` (topic context).
856
+
857
+ ---
858
+
859
+ ## 7. Raw SQL — \`POST /raw_sql\`
860
+
861
+ \`\`\`bash
862
+ curl -X POST http://localhost:3030/raw_sql \\
863
+ -H "Content-Type: application/json" \\
864
+ -d '{"query": "SELECT ... LIMIT 100"}'
865
+ \`\`\`
866
+
867
+ Every SELECT needs LIMIT. Always filter by time. Read-only.
868
+
869
+ ### Schema
870
+
871
+ | Table | Key Columns | Time Column |
872
+ |-------|-------------|-------------|
873
+ | \`frames\` | \`app_name\`, \`window_name\`, \`browser_url\`, \`focused\` | \`timestamp\` |
874
+ | \`ocr_text\` | \`text\`, \`app_name\`, \`window_name\` | join via \`frame_id\` |
875
+ | \`elements\` | \`source\`, \`role\`, \`text\` | join via \`frame_id\` |
876
+ | \`audio_transcriptions\` | \`transcription\`, \`device\`, \`speaker_id\`, \`is_input_device\` | \`timestamp\` |
877
+ | \`speakers\` | \`name\`, \`metadata\` | — |
878
+ | \`ui_events\` | \`event_type\`, \`app_name\`, \`window_title\`, \`browser_url\` | \`timestamp\` |
879
+ | \`accessibility\` | \`app_name\`, \`window_name\`, \`text_content\` | \`timestamp\` |
880
+
881
+ ### Example Queries
882
+
883
+ \`\`\`sql
884
+ -- Most used apps (last 24h)
885
+ SELECT app_name, COUNT(*) as frames FROM frames
886
+ WHERE timestamp > datetime('now', '-24 hours') AND app_name IS NOT NULL
887
+ GROUP BY app_name ORDER BY frames DESC LIMIT 20
888
+
889
+ -- Speaker stats
890
+ SELECT COALESCE(NULLIF(s.name, ''), 'Unknown') as speaker, COUNT(*) as segments
891
+ FROM audio_transcriptions at LEFT JOIN speakers s ON at.speaker_id = s.id
892
+ WHERE at.timestamp > datetime('now', '-24 hours')
893
+ GROUP BY at.speaker_id ORDER BY segments DESC LIMIT 20
894
+ \`\`\`
895
+
896
+ ---
897
+
898
+ ## 8. Connections — \`GET /connections\`
899
+
900
+ \`\`\`bash
901
+ curl http://localhost:3030/connections # List all
902
+ curl http://localhost:3030/connections/telegram # Get credentials
903
+ \`\`\`
904
+
905
+ Services: Telegram (\`bot_token\` + \`chat_id\`), Slack (\`webhook_url\`), Discord (\`webhook_url\`), Todoist (\`api_token\`), Teams (\`webhook_url\`), Email (SMTP config).
906
+
907
+ ---
908
+
909
+ ## 9. Speakers
910
+
911
+ \`\`\`bash
912
+ curl "http://localhost:3030/speakers/search?name=John"
913
+ curl "http://localhost:3030/speakers/unnamed?limit=10"
914
+ curl -X POST http://localhost:3030/speakers/update -H "Content-Type: application/json" -d '{"id": 5, "name": "John"}'
915
+ curl -X POST http://localhost:3030/speakers/merge -H "Content-Type: application/json" -d '{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}'
916
+ \`\`\`
917
+
918
+ ---
919
+
920
+ ## 10. Other Endpoints
921
+
922
+ \`\`\`bash
923
+ curl http://localhost:3030/health # Health check
924
+ curl http://localhost:3030/audio/list # Audio devices
925
+ curl http://localhost:3030/vision/list # Monitors
926
+ \`\`\`
927
+
928
+ ## Pipes API
929
+
930
+ \`\`\`bash
931
+ curl http://localhost:3030/pipes/list # List all pipes
932
+ curl -X POST http://localhost:3030/pipes/enable -d '{"name":"..."}' # Enable
933
+ curl -X POST http://localhost:3030/pipes/disable -d '{"name":"..."}' # Disable
934
+ curl -X POST http://localhost:3030/pipes/run -d '{"name":"..."}' # Run once
935
+ curl "http://localhost:3030/pipes/{name}/executions?limit=5" # Execution history
936
+ \`\`\`
937
+
938
+ ## Deep Links
939
+
940
+ \`\`\`markdown
941
+ [10:30 AM — Chrome](screenpipe://frame/12345) # OCR results (use frame_id)
942
+ [meeting at 3pm](screenpipe://timeline?timestamp=ISO8601) # Audio results (use timestamp)
943
+ \`\`\`
944
+
945
+ Only use IDs/timestamps from actual search results. Never fabricate.`,
946
+ },
947
+ ],
948
+ };
949
+
950
+ case "screenpipe://cli-reference":
951
+ return {
952
+ contents: [
953
+ {
954
+ uri,
955
+ mimeType: "text/markdown",
956
+ text: `# Screenpipe CLI Reference
957
+
958
+ Use \`bunx screenpipe@latest\` to run CLI commands (or \`npx screenpipe@latest\`). No separate install needed.
959
+
960
+ ## Shell
961
+
962
+ - **macOS/Linux** → \`bash\`
963
+ - **Windows** → \`powershell\`
964
+
965
+ ---
966
+
967
+ ## Pipe Management
968
+
969
+ Pipes are markdown-based AI automations. Each pipe lives at \`~/.screenpipe/pipes/<name>/pipe.md\`.
970
+
971
+ ### Commands
972
+
973
+ \`\`\`bash
974
+ bunx screenpipe@latest pipe list # List all pipes (compact table)
975
+ bunx screenpipe@latest pipe enable <name> # Enable a pipe
976
+ bunx screenpipe@latest pipe disable <name> # Disable a pipe
977
+ bunx screenpipe@latest pipe run <name> # Run once immediately (for testing)
978
+ bunx screenpipe@latest pipe logs <name> # View execution logs
979
+ bunx screenpipe@latest pipe install <url-or-path> # Install from GitHub or local path
980
+ bunx screenpipe@latest pipe delete <name> # Delete a pipe
981
+ bunx screenpipe@latest pipe models list # View AI model presets
982
+ \`\`\`
983
+
984
+ ### Creating a Pipe
985
+
986
+ Create \`~/.screenpipe/pipes/<name>/pipe.md\` with YAML frontmatter + prompt:
987
+
988
+ \`\`\`markdown
989
+ ---
990
+ schedule: every 30m
991
+ enabled: true
992
+ preset: Oai
993
+ ---
994
+
995
+ Your prompt instructions here. The AI agent executes this on schedule.
996
+ \`\`\`
997
+
998
+ **Schedule syntax**: \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\`
999
+
1000
+ **Config fields**: \`schedule\`, \`enabled\` (bool), \`preset\` (AI preset name), \`history\` (bool — include previous output), \`connections\` (list of required integrations)
1001
+
1002
+ After creating:
1003
+ \`\`\`bash
1004
+ bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
1005
+ bunx screenpipe@latest pipe enable my-pipe
1006
+ bunx screenpipe@latest pipe run my-pipe # test immediately
1007
+ \`\`\`
1008
+
1009
+ ### Editing Config
1010
+
1011
+ Edit frontmatter in the pipe.md file directly, or via API:
1012
+
1013
+ \`\`\`bash
1014
+ curl -X POST http://localhost:3030/pipes/<name>/config \\
1015
+ -H "Content-Type: application/json" \\
1016
+ -d '{"config": {"schedule": "every 1h", "enabled": true}}'
1017
+ \`\`\`
1018
+
1019
+ ### Rules
1020
+
1021
+ 1. Use \`pipe list\` (not \`--json\`) — table output is compact
1022
+ 2. Never dump full pipe JSON — can be 15MB+
1023
+ 3. Check logs first when debugging: \`pipe logs <name>\`
1024
+ 4. Use \`pipe run <name>\` to test before waiting for schedule
1025
+
1026
+ ---
1027
+
1028
+ ## Connection Management
1029
+
1030
+ Manage integrations (Telegram, Slack, Discord, Email, Todoist, Teams) from the CLI.
1031
+
1032
+ ### Commands
1033
+
1034
+ \`\`\`bash
1035
+ bunx screenpipe@latest connection list # List all connections + status
1036
+ bunx screenpipe@latest connection list --json # JSON output
1037
+ bunx screenpipe@latest connection get <id> # Show saved credentials
1038
+ bunx screenpipe@latest connection set <id> key=val # Save credentials
1039
+ bunx screenpipe@latest connection test <id> # Test a connection
1040
+ bunx screenpipe@latest connection remove <id> # Remove credentials
1041
+ \`\`\`
1042
+
1043
+ ### Examples
1044
+
1045
+ \`\`\`bash
1046
+ # Set up Telegram
1047
+ bunx screenpipe@latest connection set telegram bot_token=123456:ABC-DEF chat_id=5776185278
1048
+
1049
+ # Set up Slack webhook
1050
+ bunx screenpipe@latest connection set slack webhook_url=https://hooks.slack.com/services/...
1051
+
1052
+ # Verify it works
1053
+ bunx screenpipe@latest connection test telegram
1054
+ \`\`\`
1055
+
1056
+ Connection IDs: \`telegram\`, \`slack\`, \`discord\`, \`email\`, \`todoist\`, \`teams\`, \`google-calendar\`, \`apple-intelligence\`, \`openclaw\`, \`obsidian\`
1057
+
1058
+ Credentials are stored locally at \`~/.screenpipe/connections.json\`.`,
1059
+ },
1060
+ ],
1061
+ };
1062
+
524
1063
  default:
525
1064
  throw new Error(`Unknown resource: ${uri}`);
526
1065
  }
@@ -551,6 +1090,14 @@ const PROMPTS = [
551
1090
  { name: "hours", description: "Hours to look back (default: 3)", required: false },
552
1091
  ],
553
1092
  },
1093
+ {
1094
+ name: "create-pipe",
1095
+ description: "Create a new screenpipe pipe (scheduled AI automation)",
1096
+ arguments: [
1097
+ { name: "description", description: "What the pipe should do", required: true },
1098
+ { name: "schedule", description: "Schedule (e.g., 'every 30m', 'every day at 9am', 'manual')", required: false },
1099
+ ],
1100
+ },
554
1101
  ];
555
1102
 
556
1103
  // List prompts handler
@@ -644,6 +1191,85 @@ Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
644
1191
  };
645
1192
  }
646
1193
 
1194
+ case "create-pipe": {
1195
+ const description = promptArgs?.description || "a useful automation";
1196
+ const schedule = promptArgs?.schedule || "every 30m";
1197
+
1198
+ return {
1199
+ description: `Create a new screenpipe pipe: ${description}`,
1200
+ messages: [
1201
+ {
1202
+ role: "user" as const,
1203
+ content: {
1204
+ type: "text" as const,
1205
+ text: `Create a new screenpipe pipe based on this description: "${description}"
1206
+ Schedule: ${schedule}
1207
+
1208
+ ## How to create a pipe
1209
+
1210
+ A pipe is a TypeScript file that runs on a schedule or manually. It uses the screenpipe API to access screen/audio data and can send notifications, call AI, etc.
1211
+
1212
+ ### Pipe structure
1213
+ \`\`\`typescript
1214
+ const pipe = () => import("https://raw.githubusercontent.com/nichochar/screenpipe/refs/heads/main/pipes/pipe-modules/pipe-core/index.ts");
1215
+
1216
+ async function main() {
1217
+ const sp = await pipe();
1218
+
1219
+ // Query recent screen/audio data
1220
+ const results = await sp.queryScreenpipe({
1221
+ q: "search term",
1222
+ contentType: "all", // "ocr" | "audio" | "all" | "ui"
1223
+ limit: 50,
1224
+ startTime: new Date(Date.now() - 30 * 60 * 1000).toISOString(),
1225
+ endTime: new Date().toISOString(),
1226
+ });
1227
+
1228
+ // Send notification
1229
+ await sp.sendDesktopNotification({ title: "Title", body: "Body" });
1230
+
1231
+ // Call AI (uses user's configured AI provider)
1232
+ const response = await sp.generateText({
1233
+ messages: [{ role: "user", content: "Analyze this data..." }],
1234
+ });
1235
+ }
1236
+
1237
+ main();
1238
+ \`\`\`
1239
+
1240
+ ### Key APIs available in pipes
1241
+ - \`queryScreenpipe(params)\` - Search screen text (OCR/UI), audio transcriptions
1242
+ - \`sendDesktopNotification({ title, body })\` - System notifications
1243
+ - \`generateText({ messages, model? })\` - AI text generation
1244
+ - \`generateObject({ messages, schema, model? })\` - AI structured output
1245
+ - \`loadPipeConfig()\` - Load pipe configuration
1246
+ - \`fetch()\` - HTTP requests to external services
1247
+
1248
+ ### pipe.json config
1249
+ \`\`\`json
1250
+ {
1251
+ "cron": "${schedule === "manual" ? "" : schedule.replace("every ", "*/").replace("m", " * * * *").replace("h", " * * *")}",
1252
+ "is_nextjs": false,
1253
+ "fields": [
1254
+ { "name": "setting_name", "type": "string", "default": "value", "description": "Setting description" }
1255
+ ]
1256
+ }
1257
+ \`\`\`
1258
+
1259
+ ### Important notes
1260
+ - Use \`contentType: "ui"\` for accessibility/structured text, \`"ocr"\` for raw screen text
1261
+ - Always handle empty results gracefully
1262
+ - Use \`startTime\`/\`endTime\` to scope queries
1263
+ - Pipes run in Bun runtime with full TypeScript support
1264
+ - For scheduled pipes, keep execution fast (< 30s)
1265
+
1266
+ Create the pipe with the necessary files (pipe.ts and pipe.json). Follow the patterns above exactly.`,
1267
+ },
1268
+ },
1269
+ ],
1270
+ };
1271
+ }
1272
+
647
1273
  default:
648
1274
  throw new Error(`Unknown prompt: ${name}`);
649
1275
  }