screenpipe-mcp 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -1
- package/bun.lock +429 -0
- package/dist/index.js +629 -8
- package/package.json +10 -9
- package/src/index.ts +634 -8
package/src/index.ts
CHANGED
|
@@ -48,7 +48,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
48
48
|
const server = new Server(
|
|
49
49
|
{
|
|
50
50
|
name: "screenpipe",
|
|
51
|
-
version: "0.8.
|
|
51
|
+
version: "0.8.5",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
capabilities: {
|
|
@@ -68,10 +68,14 @@ const BASE_TOOLS: Tool[] = [
|
|
|
68
68
|
"Returns timestamped results with app context. " +
|
|
69
69
|
"Call with no parameters to get recent activity. " +
|
|
70
70
|
"Use the 'screenpipe://context' resource for current time when building time-based queries.\n\n" +
|
|
71
|
+
"WHEN TO USE WHICH content_type:\n" +
|
|
72
|
+
"- For meetings/calls/conversations: content_type='audio', do NOT use q param (transcriptions are noisy, q filters too aggressively)\n" +
|
|
73
|
+
"- For screen text/reading: content_type='all' or 'accessibility'\n" +
|
|
74
|
+
"- For time spent/app usage questions: use activity-summary tool instead (this tool returns content, not time stats)\n\n" +
|
|
71
75
|
"SEARCH STRATEGY: First search with ONLY time params (start_time/end_time) — no q, no app_name, no content_type. " +
|
|
72
76
|
"This gives ground truth of what's recorded. Scan results to find correct app_name values, then narrow with filters using exact observed values. " +
|
|
73
|
-
"App names are case-sensitive
|
|
74
|
-
"The q param searches captured text
|
|
77
|
+
"App names are case-sensitive (e.g. 'Discord' vs 'Discord.exe'). " +
|
|
78
|
+
"The q param searches captured text, NOT app names. NEVER report 'no data' after one filtered search — verify with unfiltered time-only search first.\n\n" +
|
|
75
79
|
"DEEP LINKS: When referencing specific moments, create clickable links using IDs from search results:\n" +
|
|
76
80
|
"- OCR results (PREFERRED): [10:30 AM — Chrome](screenpipe://frame/12345) — use content.frame_id from the result\n" +
|
|
77
81
|
"- Audio results: [meeting at 3pm](screenpipe://timeline?timestamp=2024-01-15T15:00:00Z) — use exact timestamp from result\n" +
|
|
@@ -85,12 +89,12 @@ const BASE_TOOLS: Tool[] = [
|
|
|
85
89
|
properties: {
|
|
86
90
|
q: {
|
|
87
91
|
type: "string",
|
|
88
|
-
description: "Search query. Optional - omit to return all
|
|
92
|
+
description: "Search query (full-text search on captured text). Optional - omit to return all content in time range. IMPORTANT: Do NOT use q for audio/meeting searches — transcriptions are noisy and q filters too aggressively. Only use q when searching for specific text the user saw on screen.",
|
|
89
93
|
},
|
|
90
94
|
content_type: {
|
|
91
95
|
type: "string",
|
|
92
96
|
enum: ["all", "ocr", "audio", "input", "accessibility"],
|
|
93
|
-
description: "Content type filter: '
|
|
97
|
+
description: "Content type filter: 'audio' (transcriptions — use for meetings/calls/conversations), 'accessibility' (accessibility tree text, preferred for screen content), 'ocr' (screen text via OCR, legacy fallback), 'input' (clicks, keystrokes, clipboard, app switches), 'all'. Default: 'all'. For meeting/call queries, ALWAYS use 'audio'.",
|
|
94
98
|
default: "all",
|
|
95
99
|
},
|
|
96
100
|
limit: {
|
|
@@ -229,9 +233,14 @@ const BASE_TOOLS: Tool[] = [
|
|
|
229
233
|
"Get a lightweight compressed activity overview for a time range (~200-500 tokens). " +
|
|
230
234
|
"Returns app usage (name, frame count, active minutes, first/last seen), recent accessibility texts, and audio speaker summary. " +
|
|
231
235
|
"Minutes are based on active session time (consecutive frames with gaps < 5min count as active). " +
|
|
232
|
-
"first_seen/last_seen show the wall-clock span per app
|
|
233
|
-
"
|
|
234
|
-
"
|
|
236
|
+
"first_seen/last_seen show the wall-clock span per app.\n\n" +
|
|
237
|
+
"USE THIS TOOL (not search-content or raw SQL) for:\n" +
|
|
238
|
+
"- 'how long did I spend on X?' → active_minutes per app\n" +
|
|
239
|
+
"- 'which apps did I use today?' → app list sorted by active_minutes\n" +
|
|
240
|
+
"- 'what was I doing?' → broad overview before drilling deeper\n" +
|
|
241
|
+
"- Any time-spent or app-usage question\n\n" +
|
|
242
|
+
"WARNING: Do NOT estimate time from raw frame counts or SQL queries — those are inaccurate. " +
|
|
243
|
+
"This endpoint calculates actual active session time correctly.",
|
|
235
244
|
annotations: {
|
|
236
245
|
title: "Activity Summary",
|
|
237
246
|
readOnlyHint: true,
|
|
@@ -364,6 +373,24 @@ const RESOURCES = [
|
|
|
364
373
|
description: "Interactive search UI for exploring screen recordings and audio transcriptions",
|
|
365
374
|
mimeType: "text/html",
|
|
366
375
|
},
|
|
376
|
+
{
|
|
377
|
+
uri: "screenpipe://pipe-creation-guide",
|
|
378
|
+
name: "Pipe Creation Guide",
|
|
379
|
+
description: "How to create screenpipe pipes (scheduled AI automations): format, YAML frontmatter, schedule syntax, API parameters, and example templates",
|
|
380
|
+
mimeType: "text/markdown",
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
uri: "screenpipe://api-reference",
|
|
384
|
+
name: "REST API Reference",
|
|
385
|
+
description: "Full screenpipe REST API reference: search, activity-summary, elements, frames, export, retranscribe, raw SQL, connections, speakers (60+ endpoints)",
|
|
386
|
+
mimeType: "text/markdown",
|
|
387
|
+
},
|
|
388
|
+
{
|
|
389
|
+
uri: "screenpipe://cli-reference",
|
|
390
|
+
name: "CLI Reference",
|
|
391
|
+
description: "Screenpipe CLI commands: pipe management (list, enable, run, install, delete) and connection management (Telegram, Slack, Discord, etc.)",
|
|
392
|
+
mimeType: "text/markdown",
|
|
393
|
+
},
|
|
367
394
|
];
|
|
368
395
|
|
|
369
396
|
// List resources handler
|
|
@@ -424,6 +451,20 @@ Screenpipe captures four types of data:
|
|
|
424
451
|
- **Get keyboard input**: \`{"content_type": "input"}\`
|
|
425
452
|
- **Get audio only**: \`{"content_type": "audio"}\`
|
|
426
453
|
|
|
454
|
+
## Common User Requests → Correct Tool Choice
|
|
455
|
+
| User says | Use this tool | Key params |
|
|
456
|
+
|-----------|--------------|------------|
|
|
457
|
+
| "summarize my meeting/call" | search-content | content_type:"audio", NO q param, start_time |
|
|
458
|
+
| "what did they/I say about X" | search-content | content_type:"audio", NO q param (scan results manually) |
|
|
459
|
+
| "how long on X" / "which apps" / "time spent" | activity-summary | start_time, end_time |
|
|
460
|
+
| "what was I doing" | activity-summary | start_time, end_time (then drill into search-content) |
|
|
461
|
+
| "what was I reading/looking at" | search-content | content_type:"all", start_time |
|
|
462
|
+
|
|
463
|
+
## Behavior Rules
|
|
464
|
+
- Act immediately on clear requests. NEVER ask "what time range?" or "which content type?" when the intent is obvious.
|
|
465
|
+
- If search returns empty, silently retry with wider time range or fewer filters. Do NOT ask the user what to change.
|
|
466
|
+
- For meetings: ALWAYS use content_type:"audio" and do NOT use the q param. Transcriptions are noisy — q filters too aggressively and misses relevant content.
|
|
467
|
+
|
|
427
468
|
## search-content
|
|
428
469
|
| Parameter | Description | Default |
|
|
429
470
|
|-----------|-------------|---------|
|
|
@@ -449,6 +490,19 @@ Screenpipe captures four types of data:
|
|
|
449
490
|
4. **Fetch frame-context** for URLs and accessibility tree of specific frames
|
|
450
491
|
5. **Screenshots** (include_frames=true) only when text isn't enough
|
|
451
492
|
|
|
493
|
+
## Chat History
|
|
494
|
+
Previous screenpipe chat conversations are stored as individual JSON files in ~/.screenpipe/chats/{conversation-id}.json
|
|
495
|
+
Each file contains: id, title, messages[], createdAt, updatedAt. You can read these files to reference or search previous conversations.
|
|
496
|
+
|
|
497
|
+
## Speaker Management
|
|
498
|
+
screenpipe auto-identifies speakers in audio. API endpoints for managing them:
|
|
499
|
+
- \`GET /speakers/unnamed?limit=10\` — list unnamed speakers
|
|
500
|
+
- \`GET /speakers/search?name=John\` — search by name
|
|
501
|
+
- \`POST /speakers/update\` with \`{"id": 5, "name": "John"}\` — rename a speaker
|
|
502
|
+
- \`POST /speakers/merge\` with \`{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}\` — merge duplicates
|
|
503
|
+
- \`GET /speakers/similar?speaker_id=5\` — find similar speakers for merging
|
|
504
|
+
- \`POST /speakers/reassign\` — reassign audio chunk to different speaker
|
|
505
|
+
|
|
452
506
|
## Tips
|
|
453
507
|
1. Read screenpipe://context first to get current timestamps
|
|
454
508
|
2. Use activity-summary before search-content for broad overview questions
|
|
@@ -521,6 +575,491 @@ When showing search results to users, create clickable links so they can jump to
|
|
|
521
575
|
};
|
|
522
576
|
}
|
|
523
577
|
|
|
578
|
+
case "screenpipe://pipe-creation-guide":
|
|
579
|
+
return {
|
|
580
|
+
contents: [
|
|
581
|
+
{
|
|
582
|
+
uri,
|
|
583
|
+
mimeType: "text/markdown",
|
|
584
|
+
text: `# Screenpipe Pipe Creation Guide
|
|
585
|
+
|
|
586
|
+
## What is a pipe?
|
|
587
|
+
|
|
588
|
+
A pipe is a scheduled AI agent defined as a single markdown file: \`~/.screenpipe/pipes/{name}/pipe.md\`
|
|
589
|
+
Every N minutes, screenpipe runs a coding agent (like pi or claude-code) with the pipe's prompt.
|
|
590
|
+
The agent can query your screen data, write files, call external APIs, send notifications, etc.
|
|
591
|
+
|
|
592
|
+
## pipe.md format
|
|
593
|
+
|
|
594
|
+
The file starts with YAML frontmatter on the very first line (no blank lines before it), then the prompt body:
|
|
595
|
+
|
|
596
|
+
\`\`\`markdown
|
|
597
|
+
---
|
|
598
|
+
schedule: every 30m
|
|
599
|
+
enabled: true
|
|
600
|
+
---
|
|
601
|
+
|
|
602
|
+
Your prompt instructions here...
|
|
603
|
+
\`\`\`
|
|
604
|
+
|
|
605
|
+
### Config fields
|
|
606
|
+
|
|
607
|
+
| Field | Values | Description |
|
|
608
|
+
|-------|--------|-------------|
|
|
609
|
+
| \`schedule\` | \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\` | When to run |
|
|
610
|
+
| \`enabled\` | \`true\` / \`false\` | Whether the pipe is active |
|
|
611
|
+
| \`preset\` | AI preset name (e.g. \`Oai\`) | Which AI model to use |
|
|
612
|
+
| \`history\` | \`true\` / \`false\` | Include previous output as context |
|
|
613
|
+
| \`connections\` | list of connection IDs | Required integrations (e.g. \`obsidian\`, \`telegram\`) |
|
|
614
|
+
|
|
615
|
+
## Context header
|
|
616
|
+
|
|
617
|
+
Before execution, screenpipe prepends a context header to the prompt with:
|
|
618
|
+
- Time range (start/end timestamps based on the schedule interval)
|
|
619
|
+
- Current date and user's timezone
|
|
620
|
+
- Screenpipe API base URL (http://localhost:3030)
|
|
621
|
+
- Output directory
|
|
622
|
+
|
|
623
|
+
The AI agent uses this context to query the right time range. No template variables needed in the prompt.
|
|
624
|
+
|
|
625
|
+
## Screenpipe search API
|
|
626
|
+
|
|
627
|
+
The agent queries screen data via the local REST API:
|
|
628
|
+
|
|
629
|
+
\`\`\`
|
|
630
|
+
curl "http://localhost:3030/search?limit=20&content_type=all&start_time=<ISO8601>&end_time=<ISO8601>"
|
|
631
|
+
\`\`\`
|
|
632
|
+
|
|
633
|
+
### Query parameters
|
|
634
|
+
|
|
635
|
+
| Parameter | Description |
|
|
636
|
+
|-----------|-------------|
|
|
637
|
+
| \`q\` | Text search query (optional — skip for audio, transcriptions are noisy) |
|
|
638
|
+
| \`content_type\` | \`all\`, \`ocr\`, \`audio\`, \`input\`, \`accessibility\` (prefer \`all\` or \`accessibility\`) |
|
|
639
|
+
| \`limit\` | Max results (default 20) |
|
|
640
|
+
| \`offset\` | Pagination offset |
|
|
641
|
+
| \`start_time\` / \`end_time\` | ISO 8601 timestamps or relative (\`1h ago\`, \`now\`) |
|
|
642
|
+
| \`app_name\` | Filter by app (e.g. \`Google Chrome\`, \`Slack\`) |
|
|
643
|
+
| \`window_name\` | Filter by window title |
|
|
644
|
+
| \`browser_url\` | Filter by URL |
|
|
645
|
+
| \`min_length\` / \`max_length\` | Filter by text length |
|
|
646
|
+
| \`speaker_name\` | Filter audio by speaker |
|
|
647
|
+
|
|
648
|
+
Other useful endpoints:
|
|
649
|
+
- \`GET /activity-summary?start_time=...&end_time=...\` — lightweight overview (~200 tokens)
|
|
650
|
+
- \`GET /elements?q=...&role=AXButton&start_time=...\` — UI elements
|
|
651
|
+
- \`GET /connections/{id}\` — get integration credentials (telegram, slack, obsidian, etc.)
|
|
652
|
+
- \`POST /raw_sql\` — run SQL queries (always include LIMIT)
|
|
653
|
+
|
|
654
|
+
Full API reference: read the \`screenpipe://api-reference\` resource.
|
|
655
|
+
|
|
656
|
+
## Installing and running
|
|
657
|
+
|
|
658
|
+
After creating the pipe.md file:
|
|
659
|
+
|
|
660
|
+
\`\`\`bash
|
|
661
|
+
bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
|
|
662
|
+
bunx screenpipe@latest pipe enable my-pipe
|
|
663
|
+
bunx screenpipe@latest pipe run my-pipe # test immediately
|
|
664
|
+
\`\`\`
|
|
665
|
+
|
|
666
|
+
## Example pipes
|
|
667
|
+
|
|
668
|
+
### Daily recap (manual trigger)
|
|
669
|
+
\`\`\`markdown
|
|
670
|
+
---
|
|
671
|
+
schedule: manual
|
|
672
|
+
enabled: true
|
|
673
|
+
---
|
|
674
|
+
|
|
675
|
+
Analyze my screen and audio recordings from today (last 16 hours). Use limit=10 per search, max 5 searches total.
|
|
676
|
+
|
|
677
|
+
## Summary
|
|
678
|
+
One sentence: what I mainly did today.
|
|
679
|
+
|
|
680
|
+
## Accomplishments
|
|
681
|
+
- Top 3 things I finished, with timestamps
|
|
682
|
+
|
|
683
|
+
## Key Moments
|
|
684
|
+
- Important things I saw, said, or heard
|
|
685
|
+
|
|
686
|
+
## Unfinished Work
|
|
687
|
+
- What I should continue tomorrow
|
|
688
|
+
\`\`\`
|
|
689
|
+
|
|
690
|
+
### Obsidian sync (every hour)
|
|
691
|
+
\`\`\`markdown
|
|
692
|
+
---
|
|
693
|
+
schedule: every 1h
|
|
694
|
+
enabled: true
|
|
695
|
+
connections:
|
|
696
|
+
- obsidian
|
|
697
|
+
---
|
|
698
|
+
|
|
699
|
+
Sync screenpipe activity to Obsidian vault as a daily note.
|
|
700
|
+
|
|
701
|
+
1. Get vault path from GET http://localhost:3030/connections/obsidian
|
|
702
|
+
2. Read existing daily note (merge into it)
|
|
703
|
+
3. Query search API in 30-minute chunks with min_length=50
|
|
704
|
+
4. Synthesize activities, extract action items, write note
|
|
705
|
+
\`\`\`
|
|
706
|
+
|
|
707
|
+
### Slack standup (every weekday at 9am)
|
|
708
|
+
\`\`\`markdown
|
|
709
|
+
---
|
|
710
|
+
schedule: every weekday at 9am
|
|
711
|
+
enabled: true
|
|
712
|
+
connections:
|
|
713
|
+
- slack
|
|
714
|
+
---
|
|
715
|
+
|
|
716
|
+
Generate standup update from yesterday's activity and post to Slack.
|
|
717
|
+
|
|
718
|
+
1. Query activity-summary for yesterday
|
|
719
|
+
2. Search for key accomplishments and blockers
|
|
720
|
+
3. Format as: Done / Doing / Blocked
|
|
721
|
+
4. POST to Slack webhook from GET http://localhost:3030/connections/slack
|
|
722
|
+
\`\`\`
|
|
723
|
+
|
|
724
|
+
## Optimization tips
|
|
725
|
+
|
|
726
|
+
- Be specific about expected output format
|
|
727
|
+
- Give step-by-step instructions
|
|
728
|
+
- Add error handling: "if API returns empty, try content_type=accessibility instead of ocr"
|
|
729
|
+
- Add validation: "before writing, verify you have at least 3 entries"
|
|
730
|
+
- Specify exact file paths, API parameters, output structure
|
|
731
|
+
- Keep search limit low (10-20) and use time ranges from the context header
|
|
732
|
+
- Use \`min_length=50\` to skip noisy OCR fragments`,
|
|
733
|
+
},
|
|
734
|
+
],
|
|
735
|
+
};
|
|
736
|
+
|
|
737
|
+
case "screenpipe://api-reference":
|
|
738
|
+
return {
|
|
739
|
+
contents: [
|
|
740
|
+
{
|
|
741
|
+
uri,
|
|
742
|
+
mimeType: "text/markdown",
|
|
743
|
+
text: `# Screenpipe REST API Reference
|
|
744
|
+
|
|
745
|
+
Local REST API at \`http://localhost:3030\`. Full reference (60+ endpoints): https://docs.screenpi.pe/llms-full.txt
|
|
746
|
+
|
|
747
|
+
## Shell
|
|
748
|
+
|
|
749
|
+
- **macOS/Linux** → \`bash\`, \`curl\`
|
|
750
|
+
- **Windows** → \`powershell\`, \`curl.exe\` (not the alias)
|
|
751
|
+
|
|
752
|
+
## Context Window Protection
|
|
753
|
+
|
|
754
|
+
API responses can be large. Always write curl output to a file first (\`curl ... -o /tmp/sp_result.json\`), check size (\`wc -c\`), and if over 5KB read only the first 50-100 lines. Extract what you need with \`jq\`. NEVER dump full large responses into context.
|
|
755
|
+
|
|
756
|
+
---
|
|
757
|
+
|
|
758
|
+
## 1. Search — \`GET /search\`
|
|
759
|
+
|
|
760
|
+
\`\`\`bash
|
|
761
|
+
curl "http://localhost:3030/search?q=QUERY&content_type=all&limit=10&start_time=1h%20ago"
|
|
762
|
+
\`\`\`
|
|
763
|
+
|
|
764
|
+
### Parameters
|
|
765
|
+
|
|
766
|
+
| Parameter | Type | Required | Description |
|
|
767
|
+
|-----------|------|----------|-------------|
|
|
768
|
+
| \`q\` | string | No | Keywords. Do NOT use for audio — transcriptions are noisy. |
|
|
769
|
+
| \`content_type\` | string | No | \`all\` (default), \`ocr\`, \`audio\`, \`input\`, \`accessibility\` |
|
|
770
|
+
| \`limit\` | integer | No | Max 1-20. Default: 10 |
|
|
771
|
+
| \`offset\` | integer | No | Pagination. Default: 0 |
|
|
772
|
+
| \`start_time\` | ISO 8601 or relative | **Yes** | \`2024-01-15T10:00:00Z\` or \`16h ago\`, \`2d ago\`, \`30m ago\` |
|
|
773
|
+
| \`end_time\` | ISO 8601 or relative | No | Defaults to now. \`now\`, \`1h ago\` |
|
|
774
|
+
| \`app_name\` | string | No | e.g. "Google Chrome", "Slack", "zoom.us" |
|
|
775
|
+
| \`window_name\` | string | No | Window title substring |
|
|
776
|
+
| \`speaker_name\` | string | No | Filter audio by speaker (case-insensitive partial) |
|
|
777
|
+
| \`focused\` | boolean | No | Only focused windows |
|
|
778
|
+
|
|
779
|
+
### Critical Rules
|
|
780
|
+
|
|
781
|
+
1. **ALWAYS include \`start_time\`** — queries without time bounds WILL timeout
|
|
782
|
+
2. **Start with 1-2 hour ranges** — expand only if no results
|
|
783
|
+
3. **Use \`app_name\`** when user mentions a specific app
|
|
784
|
+
4. **"recent"** = 30 min. **"today"** = since midnight. **"yesterday"** = yesterday's range
|
|
785
|
+
|
|
786
|
+
### Response Format
|
|
787
|
+
|
|
788
|
+
\`\`\`json
|
|
789
|
+
{
|
|
790
|
+
"data": [
|
|
791
|
+
{"type": "OCR", "content": {"frame_id": 12345, "text": "...", "timestamp": "...", "app_name": "Chrome"}},
|
|
792
|
+
{"type": "Audio", "content": {"chunk_id": 678, "transcription": "...", "timestamp": "...", "speaker": {"name": "John"}}},
|
|
793
|
+
{"type": "UI", "content": {"id": 999, "text": "Clicked Submit", "timestamp": "...", "app_name": "Safari"}}
|
|
794
|
+
],
|
|
795
|
+
"pagination": {"limit": 10, "offset": 0, "total": 42}
|
|
796
|
+
}
|
|
797
|
+
\`\`\`
|
|
798
|
+
|
|
799
|
+
---
|
|
800
|
+
|
|
801
|
+
## 2. Activity Summary — \`GET /activity-summary\`
|
|
802
|
+
|
|
803
|
+
\`\`\`bash
|
|
804
|
+
curl "http://localhost:3030/activity-summary?start_time=1h%20ago&end_time=now"
|
|
805
|
+
\`\`\`
|
|
806
|
+
|
|
807
|
+
Returns app usage with \`active_minutes\`, first/last seen, recent texts, audio summary. ~200-500 tokens. Best starting point.
|
|
808
|
+
|
|
809
|
+
---
|
|
810
|
+
|
|
811
|
+
## 3. Elements — \`GET /elements\`
|
|
812
|
+
|
|
813
|
+
Lightweight FTS search across UI elements (~100-500 bytes each).
|
|
814
|
+
|
|
815
|
+
\`\`\`bash
|
|
816
|
+
curl "http://localhost:3030/elements?q=Submit&role=AXButton&start_time=1h%20ago&limit=10"
|
|
817
|
+
\`\`\`
|
|
818
|
+
|
|
819
|
+
Parameters: \`q\`, \`frame_id\`, \`source\` (\`accessibility\`|\`ocr\`), \`role\`, \`start_time\`, \`end_time\`, \`app_name\`, \`limit\`, \`offset\`.
|
|
820
|
+
|
|
821
|
+
### Frame Context — \`GET /frames/{id}/context\`
|
|
822
|
+
|
|
823
|
+
Returns accessibility text, parsed nodes, and extracted URLs for a frame.
|
|
824
|
+
|
|
825
|
+
Common roles: \`AXButton\`, \`AXStaticText\`, \`AXLink\`, \`AXTextField\`, \`AXTextArea\`, \`AXMenuItem\`, \`AXCheckBox\`
|
|
826
|
+
|
|
827
|
+
---
|
|
828
|
+
|
|
829
|
+
## 4. Frames — \`GET /frames/{frame_id}\`
|
|
830
|
+
|
|
831
|
+
Returns raw PNG screenshot. Never fetch more than 2-3 per query.
|
|
832
|
+
|
|
833
|
+
---
|
|
834
|
+
|
|
835
|
+
## 5. Media Export — \`POST /frames/export\`
|
|
836
|
+
|
|
837
|
+
\`\`\`bash
|
|
838
|
+
curl -X POST http://localhost:3030/frames/export \\
|
|
839
|
+
-H "Content-Type: application/json" \\
|
|
840
|
+
-d '{"start_time": "5m ago", "end_time": "now", "fps": 1.0}'
|
|
841
|
+
\`\`\`
|
|
842
|
+
|
|
843
|
+
FPS guidelines: 5min→1.0, 30min→0.5, 1h→0.2, 2h+→0.1. Max 10,000 frames.
|
|
844
|
+
|
|
845
|
+
---
|
|
846
|
+
|
|
847
|
+
## 6. Retranscribe — \`POST /audio/retranscribe\`
|
|
848
|
+
|
|
849
|
+
\`\`\`bash
|
|
850
|
+
curl -X POST http://localhost:3030/audio/retranscribe \\
|
|
851
|
+
-H "Content-Type: application/json" \\
|
|
852
|
+
-d '{"start": "1h ago", "end": "now"}'
|
|
853
|
+
\`\`\`
|
|
854
|
+
|
|
855
|
+
Optional: \`engine\`, \`vocabulary\` (array of \`{"word": "...", "replacement": "..."}\`), \`prompt\` (topic context).
|
|
856
|
+
|
|
857
|
+
---
|
|
858
|
+
|
|
859
|
+
## 7. Raw SQL — \`POST /raw_sql\`
|
|
860
|
+
|
|
861
|
+
\`\`\`bash
|
|
862
|
+
curl -X POST http://localhost:3030/raw_sql \\
|
|
863
|
+
-H "Content-Type: application/json" \\
|
|
864
|
+
-d '{"query": "SELECT ... LIMIT 100"}'
|
|
865
|
+
\`\`\`
|
|
866
|
+
|
|
867
|
+
Every SELECT needs LIMIT. Always filter by time. Read-only.
|
|
868
|
+
|
|
869
|
+
### Schema
|
|
870
|
+
|
|
871
|
+
| Table | Key Columns | Time Column |
|
|
872
|
+
|-------|-------------|-------------|
|
|
873
|
+
| \`frames\` | \`app_name\`, \`window_name\`, \`browser_url\`, \`focused\` | \`timestamp\` |
|
|
874
|
+
| \`ocr_text\` | \`text\`, \`app_name\`, \`window_name\` | join via \`frame_id\` |
|
|
875
|
+
| \`elements\` | \`source\`, \`role\`, \`text\` | join via \`frame_id\` |
|
|
876
|
+
| \`audio_transcriptions\` | \`transcription\`, \`device\`, \`speaker_id\`, \`is_input_device\` | \`timestamp\` |
|
|
877
|
+
| \`speakers\` | \`name\`, \`metadata\` | — |
|
|
878
|
+
| \`ui_events\` | \`event_type\`, \`app_name\`, \`window_title\`, \`browser_url\` | \`timestamp\` |
|
|
879
|
+
| \`accessibility\` | \`app_name\`, \`window_name\`, \`text_content\` | \`timestamp\` |
|
|
880
|
+
|
|
881
|
+
### Example Queries
|
|
882
|
+
|
|
883
|
+
\`\`\`sql
|
|
884
|
+
-- Most used apps (last 24h)
|
|
885
|
+
SELECT app_name, COUNT(*) as frames FROM frames
|
|
886
|
+
WHERE timestamp > datetime('now', '-24 hours') AND app_name IS NOT NULL
|
|
887
|
+
GROUP BY app_name ORDER BY frames DESC LIMIT 20
|
|
888
|
+
|
|
889
|
+
-- Speaker stats
|
|
890
|
+
SELECT COALESCE(NULLIF(s.name, ''), 'Unknown') as speaker, COUNT(*) as segments
|
|
891
|
+
FROM audio_transcriptions at LEFT JOIN speakers s ON at.speaker_id = s.id
|
|
892
|
+
WHERE at.timestamp > datetime('now', '-24 hours')
|
|
893
|
+
GROUP BY at.speaker_id ORDER BY segments DESC LIMIT 20
|
|
894
|
+
\`\`\`
|
|
895
|
+
|
|
896
|
+
---
|
|
897
|
+
|
|
898
|
+
## 8. Connections — \`GET /connections\`
|
|
899
|
+
|
|
900
|
+
\`\`\`bash
|
|
901
|
+
curl http://localhost:3030/connections # List all
|
|
902
|
+
curl http://localhost:3030/connections/telegram # Get credentials
|
|
903
|
+
\`\`\`
|
|
904
|
+
|
|
905
|
+
Services: Telegram (\`bot_token\` + \`chat_id\`), Slack (\`webhook_url\`), Discord (\`webhook_url\`), Todoist (\`api_token\`), Teams (\`webhook_url\`), Email (SMTP config).
|
|
906
|
+
|
|
907
|
+
---
|
|
908
|
+
|
|
909
|
+
## 9. Speakers
|
|
910
|
+
|
|
911
|
+
\`\`\`bash
|
|
912
|
+
curl "http://localhost:3030/speakers/search?name=John"
|
|
913
|
+
curl "http://localhost:3030/speakers/unnamed?limit=10"
|
|
914
|
+
curl -X POST http://localhost:3030/speakers/update -H "Content-Type: application/json" -d '{"id": 5, "name": "John"}'
|
|
915
|
+
curl -X POST http://localhost:3030/speakers/merge -H "Content-Type: application/json" -d '{"speaker_to_keep_id": 1, "speaker_to_merge_id": 2}'
|
|
916
|
+
\`\`\`
|
|
917
|
+
|
|
918
|
+
---
|
|
919
|
+
|
|
920
|
+
## 10. Other Endpoints
|
|
921
|
+
|
|
922
|
+
\`\`\`bash
|
|
923
|
+
curl http://localhost:3030/health # Health check
|
|
924
|
+
curl http://localhost:3030/audio/list # Audio devices
|
|
925
|
+
curl http://localhost:3030/vision/list # Monitors
|
|
926
|
+
\`\`\`
|
|
927
|
+
|
|
928
|
+
## Pipes API
|
|
929
|
+
|
|
930
|
+
\`\`\`bash
|
|
931
|
+
curl http://localhost:3030/pipes/list # List all pipes
|
|
932
|
+
curl -X POST http://localhost:3030/pipes/enable -d '{"name":"..."}' # Enable
|
|
933
|
+
curl -X POST http://localhost:3030/pipes/disable -d '{"name":"..."}' # Disable
|
|
934
|
+
curl -X POST http://localhost:3030/pipes/run -d '{"name":"..."}' # Run once
|
|
935
|
+
curl "http://localhost:3030/pipes/{name}/executions?limit=5" # Execution history
|
|
936
|
+
\`\`\`
|
|
937
|
+
|
|
938
|
+
## Deep Links
|
|
939
|
+
|
|
940
|
+
\`\`\`markdown
|
|
941
|
+
[10:30 AM — Chrome](screenpipe://frame/12345) # OCR results (use frame_id)
|
|
942
|
+
[meeting at 3pm](screenpipe://timeline?timestamp=ISO8601) # Audio results (use timestamp)
|
|
943
|
+
\`\`\`
|
|
944
|
+
|
|
945
|
+
Only use IDs/timestamps from actual search results. Never fabricate.`,
|
|
946
|
+
},
|
|
947
|
+
],
|
|
948
|
+
};
|
|
949
|
+
|
|
950
|
+
case "screenpipe://cli-reference":
|
|
951
|
+
return {
|
|
952
|
+
contents: [
|
|
953
|
+
{
|
|
954
|
+
uri,
|
|
955
|
+
mimeType: "text/markdown",
|
|
956
|
+
text: `# Screenpipe CLI Reference
|
|
957
|
+
|
|
958
|
+
Use \`bunx screenpipe@latest\` to run CLI commands (or \`npx screenpipe@latest\`). No separate install needed.
|
|
959
|
+
|
|
960
|
+
## Shell
|
|
961
|
+
|
|
962
|
+
- **macOS/Linux** → \`bash\`
|
|
963
|
+
- **Windows** → \`powershell\`
|
|
964
|
+
|
|
965
|
+
---
|
|
966
|
+
|
|
967
|
+
## Pipe Management
|
|
968
|
+
|
|
969
|
+
Pipes are markdown-based AI automations. Each pipe lives at \`~/.screenpipe/pipes/<name>/pipe.md\`.
|
|
970
|
+
|
|
971
|
+
### Commands
|
|
972
|
+
|
|
973
|
+
\`\`\`bash
|
|
974
|
+
bunx screenpipe@latest pipe list # List all pipes (compact table)
|
|
975
|
+
bunx screenpipe@latest pipe enable <name> # Enable a pipe
|
|
976
|
+
bunx screenpipe@latest pipe disable <name> # Disable a pipe
|
|
977
|
+
bunx screenpipe@latest pipe run <name> # Run once immediately (for testing)
|
|
978
|
+
bunx screenpipe@latest pipe logs <name> # View execution logs
|
|
979
|
+
bunx screenpipe@latest pipe install <url-or-path> # Install from GitHub or local path
|
|
980
|
+
bunx screenpipe@latest pipe delete <name> # Delete a pipe
|
|
981
|
+
bunx screenpipe@latest pipe models list # View AI model presets
|
|
982
|
+
\`\`\`
|
|
983
|
+
|
|
984
|
+
### Creating a Pipe
|
|
985
|
+
|
|
986
|
+
Create \`~/.screenpipe/pipes/<name>/pipe.md\` with YAML frontmatter + prompt:
|
|
987
|
+
|
|
988
|
+
\`\`\`markdown
|
|
989
|
+
---
|
|
990
|
+
schedule: every 30m
|
|
991
|
+
enabled: true
|
|
992
|
+
preset: Oai
|
|
993
|
+
---
|
|
994
|
+
|
|
995
|
+
Your prompt instructions here. The AI agent executes this on schedule.
|
|
996
|
+
\`\`\`
|
|
997
|
+
|
|
998
|
+
**Schedule syntax**: \`every 30m\`, \`every 1h\`, \`every day at 9am\`, \`every monday at 9am\`, \`manual\`, or cron: \`*/30 * * * *\`
|
|
999
|
+
|
|
1000
|
+
**Config fields**: \`schedule\`, \`enabled\` (bool), \`preset\` (AI preset name), \`history\` (bool — include previous output), \`connections\` (list of required integrations)
|
|
1001
|
+
|
|
1002
|
+
After creating:
|
|
1003
|
+
\`\`\`bash
|
|
1004
|
+
bunx screenpipe@latest pipe install ~/.screenpipe/pipes/my-pipe
|
|
1005
|
+
bunx screenpipe@latest pipe enable my-pipe
|
|
1006
|
+
bunx screenpipe@latest pipe run my-pipe # test immediately
|
|
1007
|
+
\`\`\`
|
|
1008
|
+
|
|
1009
|
+
### Editing Config
|
|
1010
|
+
|
|
1011
|
+
Edit frontmatter in the pipe.md file directly, or via API:
|
|
1012
|
+
|
|
1013
|
+
\`\`\`bash
|
|
1014
|
+
curl -X POST http://localhost:3030/pipes/<name>/config \\
|
|
1015
|
+
-H "Content-Type: application/json" \\
|
|
1016
|
+
-d '{"config": {"schedule": "every 1h", "enabled": true}}'
|
|
1017
|
+
\`\`\`
|
|
1018
|
+
|
|
1019
|
+
### Rules
|
|
1020
|
+
|
|
1021
|
+
1. Use \`pipe list\` (not \`--json\`) — table output is compact
|
|
1022
|
+
2. Never dump full pipe JSON — can be 15MB+
|
|
1023
|
+
3. Check logs first when debugging: \`pipe logs <name>\`
|
|
1024
|
+
4. Use \`pipe run <name>\` to test before waiting for schedule
|
|
1025
|
+
|
|
1026
|
+
---
|
|
1027
|
+
|
|
1028
|
+
## Connection Management
|
|
1029
|
+
|
|
1030
|
+
Manage integrations (Telegram, Slack, Discord, Email, Todoist, Teams) from the CLI.
|
|
1031
|
+
|
|
1032
|
+
### Commands
|
|
1033
|
+
|
|
1034
|
+
\`\`\`bash
|
|
1035
|
+
bunx screenpipe@latest connection list # List all connections + status
|
|
1036
|
+
bunx screenpipe@latest connection list --json # JSON output
|
|
1037
|
+
bunx screenpipe@latest connection get <id> # Show saved credentials
|
|
1038
|
+
bunx screenpipe@latest connection set <id> key=val # Save credentials
|
|
1039
|
+
bunx screenpipe@latest connection test <id> # Test a connection
|
|
1040
|
+
bunx screenpipe@latest connection remove <id> # Remove credentials
|
|
1041
|
+
\`\`\`
|
|
1042
|
+
|
|
1043
|
+
### Examples
|
|
1044
|
+
|
|
1045
|
+
\`\`\`bash
|
|
1046
|
+
# Set up Telegram
|
|
1047
|
+
bunx screenpipe@latest connection set telegram bot_token=123456:ABC-DEF chat_id=5776185278
|
|
1048
|
+
|
|
1049
|
+
# Set up Slack webhook
|
|
1050
|
+
bunx screenpipe@latest connection set slack webhook_url=https://hooks.slack.com/services/...
|
|
1051
|
+
|
|
1052
|
+
# Verify it works
|
|
1053
|
+
bunx screenpipe@latest connection test telegram
|
|
1054
|
+
\`\`\`
|
|
1055
|
+
|
|
1056
|
+
Connection IDs: \`telegram\`, \`slack\`, \`discord\`, \`email\`, \`todoist\`, \`teams\`, \`google-calendar\`, \`apple-intelligence\`, \`openclaw\`, \`obsidian\`
|
|
1057
|
+
|
|
1058
|
+
Credentials are stored locally at \`~/.screenpipe/connections.json\`.`,
|
|
1059
|
+
},
|
|
1060
|
+
],
|
|
1061
|
+
};
|
|
1062
|
+
|
|
524
1063
|
default:
|
|
525
1064
|
throw new Error(`Unknown resource: ${uri}`);
|
|
526
1065
|
}
|
|
@@ -551,6 +1090,14 @@ const PROMPTS = [
|
|
|
551
1090
|
{ name: "hours", description: "Hours to look back (default: 3)", required: false },
|
|
552
1091
|
],
|
|
553
1092
|
},
|
|
1093
|
+
{
|
|
1094
|
+
name: "create-pipe",
|
|
1095
|
+
description: "Create a new screenpipe pipe (scheduled AI automation)",
|
|
1096
|
+
arguments: [
|
|
1097
|
+
{ name: "description", description: "What the pipe should do", required: true },
|
|
1098
|
+
{ name: "schedule", description: "Schedule (e.g., 'every 30m', 'every day at 9am', 'manual')", required: false },
|
|
1099
|
+
],
|
|
1100
|
+
},
|
|
554
1101
|
];
|
|
555
1102
|
|
|
556
1103
|
// List prompts handler
|
|
@@ -644,6 +1191,85 @@ Common meeting apps: zoom.us, Microsoft Teams, Google Meet, Slack`,
|
|
|
644
1191
|
};
|
|
645
1192
|
}
|
|
646
1193
|
|
|
1194
|
+
case "create-pipe": {
|
|
1195
|
+
const description = promptArgs?.description || "a useful automation";
|
|
1196
|
+
const schedule = promptArgs?.schedule || "every 30m";
|
|
1197
|
+
|
|
1198
|
+
return {
|
|
1199
|
+
description: `Create a new screenpipe pipe: ${description}`,
|
|
1200
|
+
messages: [
|
|
1201
|
+
{
|
|
1202
|
+
role: "user" as const,
|
|
1203
|
+
content: {
|
|
1204
|
+
type: "text" as const,
|
|
1205
|
+
text: `Create a new screenpipe pipe based on this description: "${description}"
|
|
1206
|
+
Schedule: ${schedule}
|
|
1207
|
+
|
|
1208
|
+
## How to create a pipe
|
|
1209
|
+
|
|
1210
|
+
A pipe is a TypeScript file that runs on a schedule or manually. It uses the screenpipe API to access screen/audio data and can send notifications, call AI, etc.
|
|
1211
|
+
|
|
1212
|
+
### Pipe structure
|
|
1213
|
+
\`\`\`typescript
|
|
1214
|
+
const pipe = () => import("https://raw.githubusercontent.com/nichochar/screenpipe/refs/heads/main/pipes/pipe-modules/pipe-core/index.ts");
|
|
1215
|
+
|
|
1216
|
+
async function main() {
|
|
1217
|
+
const sp = await pipe();
|
|
1218
|
+
|
|
1219
|
+
// Query recent screen/audio data
|
|
1220
|
+
const results = await sp.queryScreenpipe({
|
|
1221
|
+
q: "search term",
|
|
1222
|
+
contentType: "all", // "ocr" | "audio" | "all" | "ui"
|
|
1223
|
+
limit: 50,
|
|
1224
|
+
startTime: new Date(Date.now() - 30 * 60 * 1000).toISOString(),
|
|
1225
|
+
endTime: new Date().toISOString(),
|
|
1226
|
+
});
|
|
1227
|
+
|
|
1228
|
+
// Send notification
|
|
1229
|
+
await sp.sendDesktopNotification({ title: "Title", body: "Body" });
|
|
1230
|
+
|
|
1231
|
+
// Call AI (uses user's configured AI provider)
|
|
1232
|
+
const response = await sp.generateText({
|
|
1233
|
+
messages: [{ role: "user", content: "Analyze this data..." }],
|
|
1234
|
+
});
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
main();
|
|
1238
|
+
\`\`\`
|
|
1239
|
+
|
|
1240
|
+
### Key APIs available in pipes
|
|
1241
|
+
- \`queryScreenpipe(params)\` - Search screen text (OCR/UI), audio transcriptions
|
|
1242
|
+
- \`sendDesktopNotification({ title, body })\` - System notifications
|
|
1243
|
+
- \`generateText({ messages, model? })\` - AI text generation
|
|
1244
|
+
- \`generateObject({ messages, schema, model? })\` - AI structured output
|
|
1245
|
+
- \`loadPipeConfig()\` - Load pipe configuration
|
|
1246
|
+
- \`fetch()\` - HTTP requests to external services
|
|
1247
|
+
|
|
1248
|
+
### pipe.json config
|
|
1249
|
+
\`\`\`json
|
|
1250
|
+
{
|
|
1251
|
+
"cron": "${schedule === "manual" ? "" : schedule.replace("every ", "*/").replace("m", " * * * *").replace("h", " * * *")}",
|
|
1252
|
+
"is_nextjs": false,
|
|
1253
|
+
"fields": [
|
|
1254
|
+
{ "name": "setting_name", "type": "string", "default": "value", "description": "Setting description" }
|
|
1255
|
+
]
|
|
1256
|
+
}
|
|
1257
|
+
\`\`\`
|
|
1258
|
+
|
|
1259
|
+
### Important notes
|
|
1260
|
+
- Use \`contentType: "ui"\` for accessibility/structured text, \`"ocr"\` for raw screen text
|
|
1261
|
+
- Always handle empty results gracefully
|
|
1262
|
+
- Use \`startTime\`/\`endTime\` to scope queries
|
|
1263
|
+
- Pipes run in Bun runtime with full TypeScript support
|
|
1264
|
+
- For scheduled pipes, keep execution fast (< 30s)
|
|
1265
|
+
|
|
1266
|
+
Create the pipe with the necessary files (pipe.ts and pipe.json). Follow the patterns above exactly.`,
|
|
1267
|
+
},
|
|
1268
|
+
},
|
|
1269
|
+
],
|
|
1270
|
+
};
|
|
1271
|
+
}
|
|
1272
|
+
|
|
647
1273
|
default:
|
|
648
1274
|
throw new Error(`Unknown prompt: ${name}`);
|
|
649
1275
|
}
|