stagent 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +70 -23
  2. package/dist/cli.js +44 -10
  3. package/docs/.last-generated +1 -1
  4. package/docs/features/chat.md +54 -49
  5. package/docs/features/schedules.md +38 -32
  6. package/docs/features/settings.md +105 -50
  7. package/docs/manifest.json +8 -8
  8. package/docs/superpowers/specs/2026-03-27-chat-screenshot-display-design.md +303 -0
  9. package/drizzle.config.ts +3 -1
  10. package/package.json +5 -1
  11. package/src/app/api/book/bookmarks/route.ts +73 -0
  12. package/src/app/api/book/progress/route.ts +79 -0
  13. package/src/app/api/book/regenerate/route.ts +111 -0
  14. package/src/app/api/book/stage/route.ts +13 -0
  15. package/src/app/api/chat/conversations/[id]/messages/route.ts +3 -2
  16. package/src/app/api/chat/conversations/[id]/respond/route.ts +19 -20
  17. package/src/app/api/chat/conversations/[id]/route.ts +2 -1
  18. package/src/app/api/chat/entities/search/route.ts +97 -0
  19. package/src/app/api/documents/[id]/file/route.ts +4 -1
  20. package/src/app/api/documents/[id]/route.ts +34 -2
  21. package/src/app/api/documents/route.ts +91 -0
  22. package/src/app/api/projects/[id]/route.ts +119 -9
  23. package/src/app/api/projects/__tests__/delete-project.test.ts +170 -0
  24. package/src/app/api/settings/browser-tools/route.ts +68 -0
  25. package/src/app/api/settings/runtime/route.ts +29 -8
  26. package/src/app/book/page.tsx +14 -0
  27. package/src/app/chat/page.tsx +7 -1
  28. package/src/app/globals.css +375 -0
  29. package/src/app/projects/[id]/page.tsx +31 -6
  30. package/src/app/settings/page.tsx +2 -0
  31. package/src/app/{playbook → user-guide}/[slug]/page.tsx +12 -2
  32. package/src/app/{playbook → user-guide}/page.tsx +2 -2
  33. package/src/app/workflows/[id]/page.tsx +28 -2
  34. package/src/components/book/book-reader.tsx +801 -0
  35. package/src/components/book/chapter-generation-bar.tsx +109 -0
  36. package/src/components/book/content-blocks.tsx +432 -0
  37. package/src/components/book/path-progress.tsx +33 -0
  38. package/src/components/book/path-selector.tsx +42 -0
  39. package/src/components/book/try-it-now.tsx +164 -0
  40. package/src/components/chat/chat-activity-indicator.tsx +92 -0
  41. package/src/components/chat/chat-command-popover.tsx +277 -0
  42. package/src/components/chat/chat-input.tsx +85 -10
  43. package/src/components/chat/chat-message-list.tsx +3 -0
  44. package/src/components/chat/chat-message.tsx +29 -7
  45. package/src/components/chat/chat-permission-request.tsx +5 -1
  46. package/src/components/chat/chat-question.tsx +3 -0
  47. package/src/components/chat/chat-shell.tsx +159 -24
  48. package/src/components/chat/conversation-list.tsx +8 -2
  49. package/src/components/chat/screenshot-gallery.tsx +96 -0
  50. package/src/components/monitoring/log-entry.tsx +61 -27
  51. package/src/components/playbook/adoption-heatmap.tsx +1 -1
  52. package/src/components/playbook/journey-card.tsx +1 -1
  53. package/src/components/playbook/playbook-card.tsx +1 -1
  54. package/src/components/playbook/playbook-detail-view.tsx +15 -5
  55. package/src/components/playbook/playbook-homepage.tsx +1 -1
  56. package/src/components/playbook/playbook-updated-badge.tsx +1 -1
  57. package/src/components/projects/project-detail.tsx +160 -27
  58. package/src/components/projects/project-form-sheet.tsx +6 -2
  59. package/src/components/projects/project-list.tsx +1 -1
  60. package/src/components/schedules/schedule-create-sheet.tsx +24 -330
  61. package/src/components/schedules/schedule-detail-sheet.tsx +37 -21
  62. package/src/components/schedules/schedule-edit-sheet.tsx +159 -0
  63. package/src/components/schedules/schedule-form.tsx +410 -0
  64. package/src/components/schedules/schedule-list.tsx +16 -0
  65. package/src/components/settings/browser-tools-section.tsx +247 -0
  66. package/src/components/settings/runtime-timeout-section.tsx +117 -37
  67. package/src/components/shared/app-sidebar.tsx +7 -1
  68. package/src/components/shared/command-palette.tsx +4 -33
  69. package/src/components/shared/screenshot-lightbox.tsx +151 -0
  70. package/src/hooks/use-caret-position.ts +104 -0
  71. package/src/hooks/use-chapter-generation.ts +255 -0
  72. package/src/hooks/use-chat-autocomplete.ts +290 -0
  73. package/src/lib/agents/__tests__/browser-mcp.test.ts +175 -0
  74. package/src/lib/agents/__tests__/claude-agent.test.ts +3 -0
  75. package/src/lib/agents/browser-mcp.ts +119 -0
  76. package/src/lib/agents/claude-agent.ts +78 -14
  77. package/src/lib/book/chapter-generator.ts +193 -0
  78. package/src/lib/book/chapter-mapping.ts +91 -0
  79. package/src/lib/book/content.ts +251 -0
  80. package/src/lib/book/markdown-parser.ts +317 -0
  81. package/src/lib/book/reading-paths.ts +82 -0
  82. package/src/lib/book/types.ts +152 -0
  83. package/src/lib/book/update-detector.ts +157 -0
  84. package/src/lib/chat/codex-engine.ts +537 -0
  85. package/src/lib/chat/command-data.ts +50 -0
  86. package/src/lib/chat/context-builder.ts +145 -7
  87. package/src/lib/chat/engine.ts +207 -49
  88. package/src/lib/chat/model-discovery.ts +13 -5
  89. package/src/lib/chat/permission-bridge.ts +14 -2
  90. package/src/lib/chat/slash-commands.ts +191 -0
  91. package/src/lib/chat/stagent-tools.ts +2 -0
  92. package/src/lib/chat/system-prompt.ts +16 -1
  93. package/src/lib/chat/tool-catalog.ts +185 -0
  94. package/src/lib/chat/tools/chat-history-tools.ts +177 -0
  95. package/src/lib/chat/tools/document-tools.ts +241 -0
  96. package/src/lib/chat/tools/settings-tools.ts +29 -3
  97. package/src/lib/chat/types.ts +19 -2
  98. package/src/lib/constants/settings.ts +5 -0
  99. package/src/lib/data/chat.ts +83 -2
  100. package/src/lib/data/clear.ts +24 -4
  101. package/src/lib/db/bootstrap.ts +29 -0
  102. package/src/lib/db/migrations/0012_add_screenshot_columns.sql +5 -0
  103. package/src/lib/db/schema.ts +37 -0
  104. package/src/lib/docs/types.ts +9 -0
  105. package/src/lib/screenshots/__tests__/persist.test.ts +104 -0
  106. package/src/lib/screenshots/persist.ts +114 -0
  107. package/src/lib/utils/stagent-paths.ts +4 -0
  108. /package/src/app/api/{playbook → user-guide}/status/route.ts +0 -0
@@ -3,83 +3,138 @@ title: "Settings"
3
3
  category: "feature-reference"
4
4
  section: "settings"
5
5
  route: "/settings"
6
- tags: [settings, authentication, budget, permissions, data, providers, oauth, api-key, codex]
7
- features: ["tool-permission-persistence", "provider-runtime-abstraction", "spend-budget-guardrails", "tool-permission-presets", "openai-codex-app-server"]
8
- screengrabCount: 4
9
- lastUpdated: "2026-03-21"
6
+ tags: ["settings", "configuration", "auth", "runtime", "browser-tools", "permissions", "budget"]
7
+ features: ["session-management", "tool-permission-persistence", "tool-permission-presets", "browser-use", "spend-budget-guardrails", "settings-interactive-controls"]
8
+ screengrabCount: 5
9
+ lastUpdated: "2026-03-27"
10
10
  ---
11
11
 
12
12
  # Settings
13
13
 
14
- Configure authentication, budgets, tool permissions, and data management from a single settings page. Settings supports two provider runtimes -- Claude (Agent SDK with OAuth or API key) and Codex (App Server with WebSocket JSON-RPC) -- along with budget guardrails, permission presets with risk-level badges, and data management tools for clearing or exporting workspace data.
14
+ The Settings page is the central configuration hub for Stagent. From a single scrollable page you can manage authentication for both Claude and Codex runtimes, tune how long agents are allowed to run, pick a default chat model, enable browser automation, set monthly cost caps, choose permission presets, review individually approved tools, and reset workspace data. Each section saves changes immediately with confirmation feedback.
15
15
 
16
16
  ## Screenshots
17
17
 
18
- ![Settings page overview with authentication section](../screengrabs/settings-list.png)
19
- *The settings page showing the authentication section with provider configuration, OAuth vs API key selection, and connection test.*
18
+ ![Settings page overview showing authentication and runtime sections](../screengrabs/settings-list.png)
19
+ *Full settings page with authentication, Codex runtime, chat defaults, runtime configuration, and browser tools sections visible.*
20
20
 
21
- ![Budget settings section](../screengrabs/settings-budget.png)
22
- *Budget configuration with overall spend cap, monthly split, OAuth billing indicator, and current pacing meter.*
21
+ ![Browser tools section with Chrome DevTools and Playwright toggles](../screengrabs/settings-browser-tools.png)
22
+ *Browser Tools section showing independent toggles for Chrome DevTools and Playwright browser automation.*
23
23
 
24
- ![Permission presets section](../screengrabs/settings-presets.png)
25
- *Tool permission presets showing Read Only, Git Safe, and Full Auto tiers with risk-level badges.*
24
+ ![Budget guardrails section with spend caps and split configuration](../screengrabs/settings-budget.png)
25
+ *Cost and Usage Guardrails with overall spend cap, monthly split, billing indicator, and pacing meter.*
26
26
 
27
- ![Data management section](../screengrabs/settings-data.png)
28
- *Data management section with clear data and export options.*
27
+ ![Permission presets with risk badges and toggle controls](../screengrabs/settings-presets.png)
28
+ *Permission Presets showing Read Only, Git Safe, and Full Auto tiers with color-coded risk badges.*
29
+
30
+ ![Data management section with clear and populate options](../screengrabs/settings-data.png)
31
+ *Data Management section for resetting or populating workspace data.*
29
32
 
30
33
  ## Key Features
31
34
 
32
35
  ### Authentication
33
- Configure how Stagent authenticates with provider runtimes. For Claude, choose between OAuth (uses your Max subscription with no additional API charges) and API Key (uses your Anthropic API key from `.env.local`). For Codex, configure the App Server connection endpoint. A connection test button validates that your credentials and endpoints are working.
34
36
 
35
- ### Provider Runtime Abstraction
36
- Two provider runtimes are supported out of the box. Claude uses the Anthropic Agent SDK and supports both OAuth and API key authentication modes. Codex connects via the App Server using WebSocket JSON-RPC for real-time communication. The runtime abstraction means tasks and profiles work identically regardless of which provider executes them.
37
+ Choose how Stagent connects to Claude. **OAuth** uses your existing Max subscription at no additional API cost. **API Key** uses the Anthropic key stored in your environment. A **Test Connection** button validates whichever method you select. A separate section configures the Codex App Server endpoint for tasks that run through the Codex runtime.
38
+
39
+ ### Runtime Configuration
40
+
41
+ Two controls govern how agents behave during execution:
42
+
43
+ - **SDK Timeout** -- how many seconds an individual agent call is allowed to run before timing out. Lower values return faster; higher values give the agent more time for complex reasoning.
44
+ - **Max Turns** -- how many back-and-forth tool-use cycles the agent can perform in a single run. Fewer turns suit quick lookups; more turns allow extended multi-step work.
45
+
46
+ Both controls are planned for an upgrade to interactive sliders with contextual labels and recommended-range indicators (see the Settings Interactive Controls feature, currently pending).
47
+
48
+ ### Chat Defaults
49
+
50
+ Pick the default model for new chat conversations. The selector shows available Claude and Codex models with relative cost tiers so you can balance capability against spend before starting a conversation.
51
+
52
+ ### Browser Tools
37
53
 
38
- ### Budget Configuration
39
- Set an overall spend cap to limit total workspace costs. Configure monthly splits to distribute the budget across billing periods. The OAuth billing indicator shows whether the current authentication method incurs API charges. A pacing meter visualizes current spend against the budget, with color-coded status for healthy, warning, and critical spend levels.
54
+ Enable browser automation for chat and task execution without leaving Stagent. Two independent toggles control complementary capabilities:
55
+
56
+ - **Chrome DevTools** -- connects to a running Chrome window. Useful for debugging your own app, inspecting network traffic, running performance audits, and taking screenshots of live pages.
57
+ - **Playwright** -- launches its own headless browser. Useful for autonomous web research, page scraping, structured analysis, and cross-browser testing.
58
+
59
+ When enabled, read-only browser actions (screenshots, page snapshots, console reads) are auto-approved. Actions that change page state (clicking, typing, navigating) go through the normal permission approval flow. Both toggles are off by default -- no background processes are spawned when unused.
60
+
61
+ ### Cost and Usage Guardrails
62
+
63
+ Set spend caps to prevent runaway costs from autonomous agent work:
64
+
65
+ - **Overall spend cap** -- a hard monthly ceiling across all providers.
66
+ - **Monthly split** -- distribute the budget across billing periods.
67
+ - **Per-provider caps** -- optional daily and monthly limits for Claude and Codex independently, with advanced token-level overrides.
68
+
69
+ A pacing meter shows current spend against the cap with color-coded health (green, amber, red). When usage crosses 80% of a configured cap an inbox notification is sent. After the cap is exceeded, new agent work is blocked with an explicit message -- already-running tasks are allowed to finish. The next reset time is displayed so you know when the budget window rolls over.
40
70
 
41
71
  ### Permission Presets
42
- Three permission tiers control what tools agents are allowed to use. **Read Only** grants access to file reading and search tools with no write permissions -- the lowest risk tier. **Git Safe** adds version-controlled write operations (file edits, git commits) with moderate risk. **Full Auto** enables all tools including shell commands, network access, and file system writes -- the highest risk tier. Each tier displays a risk badge for clear visibility.
43
72
 
44
- ### Tool Permission Persistence
45
- The "Always Allow" feature remembers tool permission decisions across sessions. When you approve a tool for a given permission tier, the decision is stored in the settings table so agents do not prompt for the same permission again.
73
+ Three one-click bundles set tool permissions in bulk, reducing first-run friction:
74
+
75
+ | Preset | What it allows | Risk |
76
+ |--------|---------------|------|
77
+ | **Read Only** | File reading, search, directory listing | Lowest |
78
+ | **Git Safe** | Everything in Read Only plus file edits and git commands | Medium |
79
+ | **Full Auto** | All tools except direct user questions | Highest |
80
+
81
+ Each preset shows a color-coded risk badge. Presets are additive -- enabling Git Safe automatically includes Read Only tools. Disabling a preset removes only its unique additions without affecting tools you approved individually.
82
+
83
+ ### Tool Permissions
84
+
85
+ Below the presets, a list shows every individually approved tool pattern. Patterns follow the format used by Claude Code:
86
+
87
+ - **Tool-level**: `Read`, `Write` -- blanket approval for any invocation.
88
+ - **Pattern-level**: `Bash(command:git *)` -- approve only when the command starts with `git`.
89
+ - **Browser tools**: `mcp__playwright__browser_snapshot` -- approve a specific browser action.
90
+
91
+ Each pattern has a **Revoke** button. Revoking a pattern means the agent will prompt for permission again the next time it tries to use that tool. The special `AskUserQuestion` tool is never auto-approved regardless of presets or saved patterns.
46
92
 
47
93
  ### Data Management
48
- Clear workspace data or export it for backup. The clear data function removes tasks, logs, documents, and other workspace content while preserving settings. Export creates a snapshot of your workspace data for external storage or migration.
94
+
95
+ Two operations for managing workspace content:
96
+
97
+ - **Clear Data** -- removes tasks, logs, documents, schedules, and other workspace content. Settings and permissions are preserved.
98
+ - **Populate Sample Data** -- seeds the workspace with example projects, tasks, and documents for exploration or demo purposes.
49
99
 
50
100
  ## How To
51
101
 
52
- ### Configure Claude Authentication
53
- 1. Navigate to `/settings` from the sidebar under the **Configure** group.
54
- 2. In the **Authentication** section, select either **OAuth** or **API Key** for the Claude runtime.
55
- 3. For OAuth, ensure you have an active Claude Max subscription. For API Key, verify that `ANTHROPIC_API_KEY` is set in `.env.local`.
56
- 4. Click **Test Connection** to validate the configuration.
57
-
58
- ### Set Up Codex Runtime
59
- 1. Open the **Authentication** section in settings.
60
- 2. Locate the Codex App Server configuration.
61
- 3. Enter the WebSocket endpoint for the Codex App Server.
62
- 4. Test the connection to verify connectivity.
63
-
64
- ### Configure Budget Guardrails
65
- 1. Navigate to the **Budget** section in settings.
66
- 2. Enter the overall spend cap amount.
67
- 3. Set the monthly split to distribute the budget.
68
- 4. Monitor the pacing meter to track spend against the cap.
69
- 5. Alerts will notify you when spend approaches the limit.
70
-
71
- ### Choose a Permission Preset
72
- 1. Open the **Permission Presets** section in settings.
73
- 2. Review the three tiers: Read Only, Git Safe, and Full Auto.
74
- 3. Note the risk badge on each tier to understand the permission scope.
75
- 4. Select the tier that matches your risk tolerance for agent operations.
102
+ ### Enable Browser Automation
103
+
104
+ 1. Open **Settings** from the sidebar (under the Configure group).
105
+ 2. Scroll to the **Browser Tools** section.
106
+ 3. Toggle **Chrome DevTools** on if you want to debug pages in your running Chrome browser.
107
+ 4. Toggle **Playwright** on if you want agents to launch their own headless browser for research and scraping.
108
+ 5. Both can be enabled at the same time. Changes take effect immediately for the next chat message or task execution.
109
+
110
+ ### Set a Monthly Budget
111
+
112
+ 1. Open **Settings** and scroll to **Cost & Usage Guardrails**.
113
+ 2. Enter an overall monthly spend cap (in dollars).
114
+ 3. Optionally set per-provider daily or monthly caps for finer control.
115
+ 4. Watch the pacing meter to track spend throughout the month.
116
+ 5. You will receive an inbox notification at 80% usage and a hard stop at 100%.
117
+
118
+ ### Configure Permission Presets
119
+
120
+ 1. Open **Settings** and scroll to **Permission Presets**.
121
+ 2. Review the three tiers and their risk badges.
122
+ 3. Toggle on the preset that matches your comfort level -- Read Only for cautious use, Git Safe for development workflows, Full Auto for fully autonomous operation.
123
+ 4. The preset's tools are added to your approved list immediately. You can still revoke individual tools below if needed.
124
+
125
+ ### Change the Default Chat Model
126
+
127
+ 1. Open **Settings** and find the **Chat Defaults** section.
128
+ 2. Select a model from the dropdown. Cost tier labels help you compare options.
129
+ 3. New conversations will use this model by default. You can still switch models per-conversation from the chat input bar.
76
130
 
77
131
  ### Clear Workspace Data
78
- 1. Scroll to the **Data Management** section in settings.
79
- 2. Click **Clear Data** to remove workspace content (tasks, logs, documents).
80
- 3. Confirm the action. Settings are preserved; only workspace data is cleared.
132
+
133
+ 1. Scroll to **Data Management** at the bottom of Settings.
134
+ 2. Click **Clear Data**.
135
+ 3. Confirm the action. All tasks, logs, documents, and schedules are removed. Your settings, permissions, and authentication configuration are preserved.
81
136
 
82
137
  ## Related
138
+
83
139
  - [Cost & Usage](./cost-usage.md)
84
140
  - [Tool Permissions](./tool-permissions.md)
85
- - [Provider Runtimes](./provider-runtimes.md)
@@ -1,5 +1,5 @@
1
1
  {
2
- "generated": "2026-03-21T21:00:00Z",
2
+ "generated": "2026-03-27T17:45:00Z",
3
3
  "version": 1,
4
4
  "sections": [
5
5
  {
@@ -38,7 +38,7 @@
38
38
  "category": "feature-reference",
39
39
  "path": "features/chat.md",
40
40
  "route": "/chat",
41
- "tags": ["chat", "conversations", "ai", "model-selection", "suggested-prompts", "quick-access"],
41
+ "tags": ["chat", "conversations", "ai", "tool-catalog", "mentions", "model-selection"],
42
42
  "features": ["chat-data-layer", "chat-engine", "chat-api-routes", "chat-ui-shell", "chat-message-rendering", "chat-input-composer"],
43
43
  "screengrabCount": 5
44
44
  },
@@ -118,9 +118,9 @@
118
118
  "category": "feature-reference",
119
119
  "path": "features/settings.md",
120
120
  "route": "/settings",
121
- "tags": ["settings", "authentication", "permissions", "presets", "budgets"],
122
- "features": ["session-management", "tool-permission-persistence", "tool-permission-presets", "data-management"],
123
- "screengrabCount": 2
121
+ "tags": ["settings", "authentication", "permissions", "presets", "budgets", "browser-tools"],
122
+ "features": ["session-management", "tool-permission-persistence", "tool-permission-presets", "browser-use", "spend-budget-guardrails"],
123
+ "screengrabCount": 5
124
124
  },
125
125
  {
126
126
  "slug": "playbook",
@@ -233,8 +233,8 @@
233
233
  ],
234
234
  "metadata": {
235
235
  "totalDocs": 25,
236
- "totalScreengrabs": 37,
237
- "featuresCovered": 56,
238
- "appSections": 13
236
+ "totalScreengrabs": 44,
237
+ "featuresCovered": 58,
238
+ "appSections": 15
239
239
  }
240
240
  }
@@ -0,0 +1,303 @@
1
+ # Chat Screenshot Display — Design Spec
2
+
3
+ **Date:** 2026-03-27
4
+ **Scope mode:** HOLD
5
+ **Approach:** Metadata-Driven Screenshot Attachments (Approach A)
6
+
7
+ ## Overview
8
+
9
+ When the agent uses browser MCP tools (Chrome DevTools or Playwright) during chat conversations or task execution, screenshots are persisted to disk and the documents table, then displayed inline in the chat UI and task log views. Clicking a thumbnail opens a lightbox overlay with the full-resolution image.
10
+
11
+ ## Requirements
12
+
13
+ - Screenshots from `take_screenshot` (Chrome DevTools) and `browser_take_screenshot` (Playwright) displayed inline
14
+ - Dual surface: chat messages AND task log views
15
+ - Persisted to disk + documents table (survives restart, visible in Documents manager)
16
+ - Original + 800px-wide thumbnail stored (thumbnail for inline, original for lightbox)
17
+ - Inline in assistant messages at point of capture
18
+ - Lightbox overlay on click (zoom, pan, Escape to close)
19
+
20
+ ## Data Flow
21
+
22
+ ```
23
+ ── Chat Path (engine.ts) ──────────────────────────────
24
+
25
+ 1. Agent SDK calls take_screenshot via MCP server
26
+ 2. SDK stream yields assistant event with tool_use block
27
+ └─ Capture tool name for screenshot detection
28
+ 3. SDK stream yields tool_result with image content block
29
+ └─ content: [{ type: "image", source: { type: "base64", data: "..." } }]
30
+ 4. NEW → Detect screenshot tool names:
31
+ └─ mcp__chrome-devtools__take_screenshot
32
+ └─ mcp__playwright__browser_take_screenshot
33
+ 5. NEW → persistScreenshot(base64, metadata)
34
+ ├─ Decode base64 → Buffer
35
+ ├─ Write ~/.stagent/screenshots/{uuid}.png (original)
36
+ ├─ Generate thumbnail → {uuid}_thumb.png (800px wide, sharp)
37
+ └─ INSERT into documents table (source="screenshot")
38
+ 6. NEW → Yield SSE: { type: "screenshot", documentId, thumbnailUrl, ... }
39
+ 7. Accumulate in attachments[] array
40
+ 8. On stream complete → merge into message metadata.attachments
41
+
42
+ ── Task Path (claude-agent.ts) ────────────────────────
43
+
44
+ 1. Same SDK stream event detection
45
+ 2. NEW → Call same persistScreenshot() module
46
+ 3. NEW → Log as event: "screenshot" in agent_logs
47
+ └─ payload: { documentId, thumbnailUrl, toolName }
48
+ ```
49
+
50
+ ## Schema Changes
51
+
52
+ ### documents table — new columns
53
+
54
+ ```sql
55
+ ALTER TABLE documents ADD COLUMN source TEXT DEFAULT 'upload';
56
+ -- "upload" | "screenshot"
57
+
58
+ ALTER TABLE documents ADD COLUMN conversation_id TEXT REFERENCES conversations(id);
59
+ -- Links screenshot to chat context
60
+
61
+ ALTER TABLE documents ADD COLUMN message_id TEXT;
62
+ -- Links to the assistant message that generated it
63
+ ```
64
+
65
+ Existing columns reused:
66
+ - `taskId` → for task execution screenshots
67
+ - `processedPath` → thumbnail path
68
+ - `direction` → "output"
69
+ - `category` → "screenshot"
70
+
71
+ ### Drizzle schema update (schema.ts)
72
+
73
+ Add to `documents` table definition:
74
+ ```typescript
75
+ source: text("source").default("upload"),
76
+ conversationId: text("conversation_id").references(() => conversations.id),
77
+ messageId: text("message_id"),
78
+ ```
79
+
80
+ ### ChatStreamEvent (types.ts) — new variant
81
+
82
+ ```typescript
83
+ | { type: "screenshot";
84
+ documentId: string;
85
+ thumbnailUrl: string;
86
+ originalUrl: string;
87
+ width: number;
88
+ height: number; }
89
+ ```
90
+
91
+ ### Message metadata — attachments field
92
+
93
+ ```typescript
94
+ interface ScreenshotAttachment {
95
+ documentId: string;
96
+ thumbnailUrl: string; // /api/documents/{id}/file?inline=1&thumb=1
97
+ originalUrl: string; // /api/documents/{id}/file?inline=1
98
+ width: number;
99
+ height: number;
100
+ }
101
+
102
+ // Added to existing metadata JSON:
103
+ {
104
+ modelId?: string,
105
+ quickAccess?: QuickAccessItem[],
106
+ attachments?: ScreenshotAttachment[] // NEW
107
+ }
108
+ ```
109
+
110
+ ## New Module: src/lib/screenshots/persist.ts
111
+
112
+ ```typescript
113
+ persistScreenshot(base64: string, opts: {
114
+ conversationId?: string,
115
+ messageId?: string,
116
+ taskId?: string,
117
+ projectId?: string,
118
+ toolName: string
119
+ }): Promise<ScreenshotAttachment | null>
120
+ ```
121
+
122
+ **Behavior:**
123
+ 1. Ensure `~/.stagent/screenshots/` directory exists (`mkdirSync` with `recursive: true` on first call)
124
+ 2. Validate base64 length (reject > 20MB)
125
+ 3. Decode to Buffer
126
+ 4. Extract dimensions via `image-size`
127
+ 5. Write original to `~/.stagent/screenshots/{uuid}.png`
128
+ 5. Generate 800px-wide thumbnail via `sharp` (optional dep with fallback)
129
+ 6. Write thumbnail to `~/.stagent/screenshots/{uuid}_thumb.png`
130
+ 7. Insert document record with `source: "screenshot"`, `direction: "output"`, `category: "screenshot"`
131
+ 8. Return `{ documentId, thumbnailUrl, originalUrl, width, height }` or `null` on failure
132
+
133
+ **sharp fallback:** If `sharp` is unavailable (try/catch dynamic import), skip thumbnail generation. Set `processedPath = storagePath`. Frontend serves original with CSS `max-width` constraint.
134
+
135
+ ## File Serving Update
136
+
137
+ `src/app/api/documents/[id]/file/route.ts` — add `?thumb=1` query parameter:
138
+
139
+ Add `processedPath` to the existing `select()` clause (currently only fetches `originalName`, `mimeType`, `storagePath`):
140
+
141
+ ```typescript
142
+ const [doc] = await db.select({
143
+ originalName: documents.originalName,
144
+ mimeType: documents.mimeType,
145
+ storagePath: documents.storagePath,
146
+ processedPath: documents.processedPath, // NEW
147
+ }).from(documents).where(eq(documents.id, id));
148
+
149
+ const thumb = req.nextUrl.searchParams.get("thumb") === "1";
150
+ // If thumb=1 and processedPath exists, read from processedPath
151
+ // Otherwise fall back to storagePath
152
+ const filePath = (thumb && doc.processedPath) ? doc.processedPath : doc.storagePath;
153
+ ```
154
+
155
+ ## Frontend Components
156
+
157
+ ### New: ScreenshotGallery (src/components/chat/screenshot-gallery.tsx)
158
+
159
+ - **Props:** `attachments: ScreenshotAttachment[]`
160
+ - **Renders:** flex-wrap grid of clickable thumbnails
161
+ - **Thumbnail sizing:** `max-w-[200px]`, `max-h-[150px]`, `object-cover`
162
+ - **Loading state:** skeleton placeholder until `<img onLoad>`
163
+ - **Overflow:** if `attachments.length > 4`, collapse with "Show N screenshots" toggle
164
+ - **Hover:** border highlight (primary color)
165
+ - **Click:** opens ScreenshotLightbox
166
+
167
+ ### New: ScreenshotLightbox (src/components/shared/screenshot-lightbox.tsx)
168
+
169
+ - **Props:** `open: boolean`, `onClose: () => void`, `imageUrl: string`, `width: number`, `height: number`
170
+ - **Built on:** shadcn Dialog component
171
+ - **Features:**
172
+ - Full-res image loaded on open
173
+ - Fit-to-viewport with preserved aspect ratio
174
+ - Mouse wheel → zoom in/out
175
+ - Drag to pan when zoomed
176
+ - Loading skeleton while image fetches
177
+ - Footer: dimensions, "Open in new tab" link
178
+ - Escape or overlay click to close
179
+ - **Shared:** Used by both chat messages and task log entries
180
+
181
+ ### Modified: chat-message.tsx
182
+
183
+ Parse `metadata.attachments` and render `<ScreenshotGallery>` after `<ChatMessageMarkdown>`, before the streaming cursor:
184
+
185
+ ```tsx
186
+ // Inside assistant message rendering
187
+ <ChatMessageMarkdown content={message.content} />
188
+ {attachments.length > 0 && (
189
+ <ScreenshotGallery attachments={attachments} />
190
+ )}
191
+ {isStreaming && message.content && (
192
+ <span className="... animate-pulse" />
193
+ )}
194
+ ```
195
+
196
+ ### Modified: chat-shell.tsx
197
+
198
+ New SSE event handler alongside existing delta, status, done, etc.:
199
+
200
+ ```typescript
201
+ else if (event.type === "screenshot") {
202
+ setMessages(prev => prev.map(m =>
203
+ m.id === assistantMsgId
204
+ ? { ...m, metadata: mergeAttachment(m.metadata, {
205
+ documentId: event.documentId,
206
+ thumbnailUrl: event.thumbnailUrl,
207
+ originalUrl: event.originalUrl,
208
+ width: event.width,
209
+ height: event.height
210
+ }) }
211
+ : m
212
+ ));
213
+ }
214
+ ```
215
+
216
+ ### Modified: log-entry.tsx
217
+
218
+ Detect `event: "screenshot"` and render inline thumbnail:
219
+
220
+ ```typescript
221
+ // In LogEntry component
222
+ if (entry.event === "screenshot" && parsed.documentId) {
223
+ return (
224
+ // Thumbnail with click-to-lightbox, alongside timestamp and tool name
225
+ );
226
+ }
227
+ ```
228
+
229
+ Add `"screenshot"` to `eventColors` map with primary color.
230
+
231
+ ## Error & Rescue Registry
232
+
233
+ | Error | Trigger | Impact | Rescue |
234
+ |-------|---------|--------|--------|
235
+ | Base64 decode fails | Corrupted/truncated MCP result | Screenshot lost, text unaffected | Log warning, skip persist, continue stream |
236
+ | Disk write fails | Disk full, permissions error | Screenshot not persisted | Catch in `persistScreenshot()`, return null, engine skips |
237
+ | sharp unavailable | Native module build fails | No thumbnails | Optional dep, fallback to serving original with CSS max-width |
238
+ | Oversized screenshot | Full-page 4K (15-20MB base64) | Memory spike | Reject base64 > 20MB, log warning with size |
239
+ | DB insert fails | WAL lock, schema mismatch | Orphan file on disk | Catch, cleanup written files, return null |
240
+ | Thumbnail load 404 | File deleted, path mismatch | Broken thumbnail | `<img onError>` → fallback to original URL → placeholder |
241
+ | SSE event lost | Network hiccup during stream | Screenshot not shown live | Self-heals on reload: metadata.attachments is authoritative |
242
+ | SDK event shape change | Agent SDK update | Detection stops working | Defensive extraction, multiple known paths, unit tests with fixtures |
243
+ | Rapid screenshots | 10+ screenshots in succession | I/O contention, UI jank | Sequential persist, collapsible gallery for 4+ items |
244
+ | Conversation reload | User switches/refreshes | Must reconstruct from DB | metadata.attachments persisted with message, renders on load |
245
+
246
+ **Core invariant:** Screenshot failures NEVER break the chat stream or task execution. Every failure returns null, engine skips, text conversation continues.
247
+
248
+ ## What Already Exists (reuse)
249
+
250
+ - **documents table** (`schema.ts:109-140`) — image MIME, storagePath, processedPath, direction, category
251
+ - **Document file API** (`api/documents/[id]/file/route.ts`) — serves files inline with MIME headers
252
+ - **Image processor** (`lib/documents/processors/image.ts`) — dimensions extraction via image-size
253
+ - **shadcn Dialog** (`components/ui/dialog.tsx`) — overlay, close-on-escape, portal
254
+ - **SSE infrastructure** (`engine.ts` + `chat-shell.tsx`) — event types, streaming, side-channel
255
+ - **Browser tool detection** (`engine.ts:isBrowserReadOnly()`) — screenshot tool name matching
256
+ - **STAGENT_DATA_DIR** (`lib/documents/processor.ts`) — centralized data dir resolution
257
+
258
+ ## NOT In Scope
259
+
260
+ - **Screenshot annotations** — Drawing/highlighting on screenshots. Reason: no user request; significant complexity for uncertain value.
261
+ - **Screenshot comparison/diff** — Visual diff between before/after. Reason: requires separate image processing pipeline.
262
+ - **User-pasted images in chat input** — Clipboard paste into chat. Reason: different flow (user→agent); separate feature.
263
+ - **Screenshot gallery page** — Dedicated /screenshots route. Reason: Documents page with source filter covers this.
264
+ - **Video/GIF capture** — Recording browser interactions. Reason: fundamentally different data type.
265
+ - **Screenshot retention policy** — Auto-cleanup of old screenshots. Reason: premature optimization.
266
+ - **Codex (OpenAI) path** — Screenshot handling for Codex runtime. Reason: different event model; address when that runtime supports browser tools.
267
+
268
+ ## File Manifest
269
+
270
+ ### New files (5)
271
+ - `src/lib/screenshots/persist.ts` — core persistence module
272
+ - `src/lib/screenshots/__tests__/persist.test.ts` — unit tests
273
+ - `src/components/chat/screenshot-gallery.tsx` — inline thumbnail grid
274
+ - `src/components/shared/screenshot-lightbox.tsx` — fullscreen viewer
275
+ - `src/lib/db/migrations/XXXX_add_screenshot_columns.sql` — schema migration
276
+
277
+ ### Modified files (11)
278
+ - `src/lib/db/schema.ts` — add source, conversationId, messageId columns
279
+ - `src/lib/db/index.ts` — bootstrap new columns
280
+ - `src/lib/chat/types.ts` — add screenshot SSE event type + ScreenshotAttachment interface
281
+ - `src/lib/chat/engine.ts` — intercept screenshot tool results, persist, emit SSE
282
+ - `src/lib/agents/claude-agent.ts` — intercept screenshot tool results, persist, log
283
+ - `src/app/api/documents/[id]/file/route.ts` — add ?thumb=1 support via processedPath
284
+ - `src/components/chat/chat-message.tsx` — render ScreenshotGallery from metadata.attachments
285
+ - `src/components/chat/chat-shell.tsx` — handle screenshot SSE event type
286
+ - `src/components/monitoring/log-entry.tsx` — render screenshot log events with thumbnail
287
+ - `src/lib/data/clear.ts` — add `~/.stagent/screenshots/` filesystem cleanup (no new table, only columns added)
288
+ - `package.json` — add sharp dependency
289
+
290
+ ## Verification Plan
291
+
292
+ 1. **Unit tests:** `persist.test.ts` — mock fs/sharp, test base64 decode, thumbnail generation, DB insert, error paths, size rejection
293
+ 2. **Integration test:** Start dev server, open chat, enable Chrome DevTools MCP, send prompt that triggers `take_screenshot`, verify:
294
+ - Screenshot file exists at `~/.stagent/screenshots/`
295
+ - Thumbnail file exists alongside original
296
+ - Document record in DB with `source: "screenshot"`
297
+ - SSE event received by frontend
298
+ - Thumbnail renders inline in chat message
299
+ - Click opens lightbox with full-res image
300
+ 3. **Task path:** Create a task that uses browser tools, verify screenshot appears in log entry view
301
+ 4. **Reload test:** Refresh page, switch conversations, verify screenshots persist from metadata
302
+ 5. **Error paths:** Test with sharp uninstalled, with oversized image mock, with disk permission error
303
+ 6. **Documents surface:** Verify screenshots appear in /documents with source=screenshot filter
package/drizzle.config.ts CHANGED
@@ -2,11 +2,13 @@ import { defineConfig } from "drizzle-kit";
2
2
  import { homedir } from "os";
3
3
  import { join } from "path";
4
4
 
5
+ const dataDir = process.env.STAGENT_DATA_DIR || join(homedir(), ".stagent");
6
+
5
7
  export default defineConfig({
6
8
  schema: "./src/lib/db/schema.ts",
7
9
  out: "./src/lib/db/migrations",
8
10
  dialect: "sqlite",
9
11
  dbCredentials: {
10
- url: join(homedir(), ".stagent", "stagent.db"),
12
+ url: join(dataDir, "stagent.db"),
11
13
  },
12
14
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stagent",
3
- "version": "0.3.6",
3
+ "version": "0.5.0",
4
4
  "description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
5
5
  "keywords": [
6
6
  "ai",
@@ -50,6 +50,7 @@
50
50
  "test:e2e": "vitest run --config vitest.config.e2e.ts",
51
51
  "test:ui": "vitest --ui",
52
52
  "validate:tokens": "npx tsx design-system/validate-tokens.ts",
53
+ "sync-worktree": "bash bin/sync-worktree.sh",
53
54
  "prepublishOnly": "npm run build:cli"
54
55
  },
55
56
  "engines": {
@@ -86,8 +87,10 @@
86
87
  "react-hook-form": "^7.71.2",
87
88
  "react-markdown": "^10.1.0",
88
89
  "remark-gfm": "^4.0.1",
90
+ "sharp": "^0.34.5",
89
91
  "smol-toml": "^1.6.0",
90
92
  "sonner": "^2.0.7",
93
+ "sugar-high": "^1.0.0",
91
94
  "tailwind-merge": "^3",
92
95
  "tailwindcss": "^4",
93
96
  "tw-animate-css": "^1",
@@ -104,6 +107,7 @@
104
107
  "@types/node": "^22",
105
108
  "@types/react": "^19",
106
109
  "@types/react-dom": "^19",
110
+ "@types/sharp": "^0.31.1",
107
111
  "@vitejs/plugin-react": "^5.1.4",
108
112
  "@vitest/coverage-v8": "^4.0.18",
109
113
  "drizzle-kit": "^0.30",