npm - stagent - Versions diffs - 0.3.6 → 0.5.0 - Mend

stagent 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/README.md +70 -23
package/dist/cli.js +44 -10
package/docs/.last-generated +1 -1
package/docs/features/chat.md +54 -49
package/docs/features/schedules.md +38 -32
package/docs/features/settings.md +105 -50
package/docs/manifest.json +8 -8
package/docs/superpowers/specs/2026-03-27-chat-screenshot-display-design.md +303 -0
package/drizzle.config.ts +3 -1
package/package.json +5 -1
package/src/app/api/book/bookmarks/route.ts +73 -0
package/src/app/api/book/progress/route.ts +79 -0
package/src/app/api/book/regenerate/route.ts +111 -0
package/src/app/api/book/stage/route.ts +13 -0
package/src/app/api/chat/conversations/[id]/messages/route.ts +3 -2
package/src/app/api/chat/conversations/[id]/respond/route.ts +19 -20
package/src/app/api/chat/conversations/[id]/route.ts +2 -1
package/src/app/api/chat/entities/search/route.ts +97 -0
package/src/app/api/documents/[id]/file/route.ts +4 -1
package/src/app/api/documents/[id]/route.ts +34 -2
package/src/app/api/documents/route.ts +91 -0
package/src/app/api/projects/[id]/route.ts +119 -9
package/src/app/api/projects/__tests__/delete-project.test.ts +170 -0
package/src/app/api/settings/browser-tools/route.ts +68 -0
package/src/app/api/settings/runtime/route.ts +29 -8
package/src/app/book/page.tsx +14 -0
package/src/app/chat/page.tsx +7 -1
package/src/app/globals.css +375 -0
package/src/app/projects/[id]/page.tsx +31 -6
package/src/app/settings/page.tsx +2 -0
package/src/app/{playbook → user-guide}/[slug]/page.tsx +12 -2
package/src/app/{playbook → user-guide}/page.tsx +2 -2
package/src/app/workflows/[id]/page.tsx +28 -2
package/src/components/book/book-reader.tsx +801 -0
package/src/components/book/chapter-generation-bar.tsx +109 -0
package/src/components/book/content-blocks.tsx +432 -0
package/src/components/book/path-progress.tsx +33 -0
package/src/components/book/path-selector.tsx +42 -0
package/src/components/book/try-it-now.tsx +164 -0
package/src/components/chat/chat-activity-indicator.tsx +92 -0
package/src/components/chat/chat-command-popover.tsx +277 -0
package/src/components/chat/chat-input.tsx +85 -10
package/src/components/chat/chat-message-list.tsx +3 -0
package/src/components/chat/chat-message.tsx +29 -7
package/src/components/chat/chat-permission-request.tsx +5 -1
package/src/components/chat/chat-question.tsx +3 -0
package/src/components/chat/chat-shell.tsx +159 -24
package/src/components/chat/conversation-list.tsx +8 -2
package/src/components/chat/screenshot-gallery.tsx +96 -0
package/src/components/monitoring/log-entry.tsx +61 -27
package/src/components/playbook/adoption-heatmap.tsx +1 -1
package/src/components/playbook/journey-card.tsx +1 -1
package/src/components/playbook/playbook-card.tsx +1 -1
package/src/components/playbook/playbook-detail-view.tsx +15 -5
package/src/components/playbook/playbook-homepage.tsx +1 -1
package/src/components/playbook/playbook-updated-badge.tsx +1 -1
package/src/components/projects/project-detail.tsx +160 -27
package/src/components/projects/project-form-sheet.tsx +6 -2
package/src/components/projects/project-list.tsx +1 -1
package/src/components/schedules/schedule-create-sheet.tsx +24 -330
package/src/components/schedules/schedule-detail-sheet.tsx +37 -21
package/src/components/schedules/schedule-edit-sheet.tsx +159 -0
package/src/components/schedules/schedule-form.tsx +410 -0
package/src/components/schedules/schedule-list.tsx +16 -0
package/src/components/settings/browser-tools-section.tsx +247 -0
package/src/components/settings/runtime-timeout-section.tsx +117 -37
package/src/components/shared/app-sidebar.tsx +7 -1
package/src/components/shared/command-palette.tsx +4 -33
package/src/components/shared/screenshot-lightbox.tsx +151 -0
package/src/hooks/use-caret-position.ts +104 -0
package/src/hooks/use-chapter-generation.ts +255 -0
package/src/hooks/use-chat-autocomplete.ts +290 -0
package/src/lib/agents/__tests__/browser-mcp.test.ts +175 -0
package/src/lib/agents/__tests__/claude-agent.test.ts +3 -0
package/src/lib/agents/browser-mcp.ts +119 -0
package/src/lib/agents/claude-agent.ts +78 -14
package/src/lib/book/chapter-generator.ts +193 -0
package/src/lib/book/chapter-mapping.ts +91 -0
package/src/lib/book/content.ts +251 -0
package/src/lib/book/markdown-parser.ts +317 -0
package/src/lib/book/reading-paths.ts +82 -0
package/src/lib/book/types.ts +152 -0
package/src/lib/book/update-detector.ts +157 -0
package/src/lib/chat/codex-engine.ts +537 -0
package/src/lib/chat/command-data.ts +50 -0
package/src/lib/chat/context-builder.ts +145 -7
package/src/lib/chat/engine.ts +207 -49
package/src/lib/chat/model-discovery.ts +13 -5
package/src/lib/chat/permission-bridge.ts +14 -2
package/src/lib/chat/slash-commands.ts +191 -0
package/src/lib/chat/stagent-tools.ts +2 -0
package/src/lib/chat/system-prompt.ts +16 -1
package/src/lib/chat/tool-catalog.ts +185 -0
package/src/lib/chat/tools/chat-history-tools.ts +177 -0
package/src/lib/chat/tools/document-tools.ts +241 -0
package/src/lib/chat/tools/settings-tools.ts +29 -3
package/src/lib/chat/types.ts +19 -2
package/src/lib/constants/settings.ts +5 -0
package/src/lib/data/chat.ts +83 -2
package/src/lib/data/clear.ts +24 -4
package/src/lib/db/bootstrap.ts +29 -0
package/src/lib/db/migrations/0012_add_screenshot_columns.sql +5 -0
package/src/lib/db/schema.ts +37 -0
package/src/lib/docs/types.ts +9 -0
package/src/lib/screenshots/__tests__/persist.test.ts +104 -0
package/src/lib/screenshots/persist.ts +114 -0
package/src/lib/utils/stagent-paths.ts +4 -0
/package/src/app/api/{playbook → user-guide}/status/route.ts +0 -0

package/docs/features/settings.md CHANGED Viewed

@@ -3,83 +3,138 @@ title: "Settings"
 category: "feature-reference"
 section: "settings"
 route: "/settings"
-tags: [settings, authentication, budget, permissions, data, providers, oauth, api-key, codex]
-features: ["tool-permission-persistence", "provider-runtime-abstraction", "spend-budget-guardrails", "tool-permission-presets", "openai-codex-app-server"]
-screengrabCount: 4
-lastUpdated: "2026-03-21"
+tags: ["settings", "configuration", "auth", "runtime", "browser-tools", "permissions", "budget"]
+features: ["session-management", "tool-permission-persistence", "tool-permission-presets", "browser-use", "spend-budget-guardrails", "settings-interactive-controls"]
+screengrabCount: 5
+lastUpdated: "2026-03-27"
 ---
 # Settings
-Configure authentication, budgets, tool permissions, and data management from a single settings page. Settings supports two provider runtimes -- Claude (Agent SDK with OAuth or API key) and Codex (App Server with WebSocket JSON-RPC) -- along with budget guardrails, permission presets with risk-level badges, and data management tools for clearing or exporting workspace data.
+The Settings page is the central configuration hub for Stagent. From a single scrollable page you can manage authentication for both Claude and Codex runtimes, tune how long agents are allowed to run, pick a default chat model, enable browser automation, set monthly cost caps, choose permission presets, review individually approved tools, and reset workspace data. Each section saves changes immediately with confirmation feedback.
 ## Screenshots
-![Settings page overview with authentication section](../screengrabs/settings-list.png)
-*The settings page showing the authentication section with provider configuration, OAuth vs API key selection, and connection test.*
+![Settings page overview showing authentication and runtime sections](../screengrabs/settings-list.png)
+*Full settings page with authentication, Codex runtime, chat defaults, runtime configuration, and browser tools sections visible.*
-![Budget settings section](../screengrabs/settings-budget.png)
-*Budget configuration with overall spend cap, monthly split, OAuth billing indicator, and current pacing meter.*
+![Browser tools section with Chrome DevTools and Playwright toggles](../screengrabs/settings-browser-tools.png)
+*Browser Tools section showing independent toggles for Chrome DevTools and Playwright browser automation.*
-![Permission presets section](../screengrabs/settings-presets.png)
-*Tool permission presets showing Read Only, Git Safe, and Full Auto tiers with risk-level badges.*
+![Budget guardrails section with spend caps and split configuration](../screengrabs/settings-budget.png)
+*Cost and Usage Guardrails with overall spend cap, monthly split, billing indicator, and pacing meter.*
-![Data management section](../screengrabs/settings-data.png)
-*Data management section with clear data and export options.*
+![Permission presets with risk badges and toggle controls](../screengrabs/settings-presets.png)
+*Permission Presets showing Read Only, Git Safe, and Full Auto tiers with color-coded risk badges.*
+![Data management section with clear and populate options](../screengrabs/settings-data.png)
+*Data Management section for resetting or populating workspace data.*
 ## Key Features
 ### Authentication
-Configure how Stagent authenticates with provider runtimes. For Claude, choose between OAuth (uses your Max subscription with no additional API charges) and API Key (uses your Anthropic API key from `.env.local`). For Codex, configure the App Server connection endpoint. A connection test button validates that your credentials and endpoints are working.
-### Provider Runtime Abstraction
-Two provider runtimes are supported out of the box. Claude uses the Anthropic Agent SDK and supports both OAuth and API key authentication modes. Codex connects via the App Server using WebSocket JSON-RPC for real-time communication. The runtime abstraction means tasks and profiles work identically regardless of which provider executes them.
+Choose how Stagent connects to Claude. **OAuth** uses your existing Max subscription at no additional API cost. **API Key** uses the Anthropic key stored in your environment. A **Test Connection** button validates whichever method you select. A separate section configures the Codex App Server endpoint for tasks that run through the Codex runtime.
+### Runtime Configuration
+Two controls govern how agents behave during execution:
+- **SDK Timeout** -- how many seconds an individual agent call is allowed to run before timing out. Lower values return faster; higher values give the agent more time for complex reasoning.
+- **Max Turns** -- how many back-and-forth tool-use cycles the agent can perform in a single run. Fewer turns suit quick lookups; more turns allow extended multi-step work.
+Both controls are planned for an upgrade to interactive sliders with contextual labels and recommended-range indicators (see the Settings Interactive Controls feature, currently pending).
+### Chat Defaults
+Pick the default model for new chat conversations. The selector shows available Claude and Codex models with relative cost tiers so you can balance capability against spend before starting a conversation.
+### Browser Tools
-### Budget Configuration
-Set an overall spend cap to limit total workspace costs. Configure monthly splits to distribute the budget across billing periods. The OAuth billing indicator shows whether the current authentication method incurs API charges. A pacing meter visualizes current spend against the budget, with color-coded status for healthy, warning, and critical spend levels.
+Enable browser automation for chat and task execution without leaving Stagent. Two independent toggles control complementary capabilities:
+- **Chrome DevTools** -- connects to a running Chrome window. Useful for debugging your own app, inspecting network traffic, running performance audits, and taking screenshots of live pages.
+- **Playwright** -- launches its own headless browser. Useful for autonomous web research, page scraping, structured analysis, and cross-browser testing.
+When enabled, read-only browser actions (screenshots, page snapshots, console reads) are auto-approved. Actions that change page state (clicking, typing, navigating) go through the normal permission approval flow. Both toggles are off by default -- no background processes are spawned when unused.
+### Cost and Usage Guardrails
+Set spend caps to prevent runaway costs from autonomous agent work:
+- **Overall spend cap** -- a hard monthly ceiling across all providers.
+- **Monthly split** -- distribute the budget across billing periods.
+- **Per-provider caps** -- optional daily and monthly limits for Claude and Codex independently, with advanced token-level overrides.
+A pacing meter shows current spend against the cap with color-coded health (green, amber, red). When usage crosses 80% of a configured cap an inbox notification is sent. After the cap is exceeded, new agent work is blocked with an explicit message -- already-running tasks are allowed to finish. The next reset time is displayed so you know when the budget window rolls over.
 ### Permission Presets
-Three permission tiers control what tools agents are allowed to use. **Read Only** grants access to file reading and search tools with no write permissions -- the lowest risk tier. **Git Safe** adds version-controlled write operations (file edits, git commits) with moderate risk. **Full Auto** enables all tools including shell commands, network access, and file system writes -- the highest risk tier. Each tier displays a risk badge for clear visibility.
-### Tool Permission Persistence
-The "Always Allow" feature remembers tool permission decisions across sessions. When you approve a tool for a given permission tier, the decision is stored in the settings table so agents do not prompt for the same permission again.
+Three one-click bundles set tool permissions in bulk, reducing first-run friction:
+| Preset | What it allows | Risk |
+|--------|---------------|------|
+| **Read Only** | File reading, search, directory listing | Lowest |
+| **Git Safe** | Everything in Read Only plus file edits and git commands | Medium |
+| **Full Auto** | All tools except direct user questions | Highest |
+Each preset shows a color-coded risk badge. Presets are additive -- enabling Git Safe automatically includes Read Only tools. Disabling a preset removes only its unique additions without affecting tools you approved individually.
+### Tool Permissions
+Below the presets, a list shows every individually approved tool pattern. Patterns follow the format used by Claude Code:
+- **Tool-level**: `Read`, `Write` -- blanket approval for any invocation.
+- **Pattern-level**: `Bash(command:git *)` -- approve only when the command starts with `git`.
+- **Browser tools**: `mcp__playwright__browser_snapshot` -- approve a specific browser action.
+Each pattern has a **Revoke** button. Revoking a pattern means the agent will prompt for permission again the next time it tries to use that tool. The special `AskUserQuestion` tool is never auto-approved regardless of presets or saved patterns.
 ### Data Management
-Clear workspace data or export it for backup. The clear data function removes tasks, logs, documents, and other workspace content while preserving settings. Export creates a snapshot of your workspace data for external storage or migration.
+Two operations for managing workspace content:
+- **Clear Data** -- removes tasks, logs, documents, schedules, and other workspace content. Settings and permissions are preserved.
+- **Populate Sample Data** -- seeds the workspace with example projects, tasks, and documents for exploration or demo purposes.
 ## How To
-### Configure Claude Authentication
-1. Navigate to `/settings` from the sidebar under the **Configure** group.
-2. In the **Authentication** section, select either **OAuth** or **API Key** for the Claude runtime.
-3. For OAuth, ensure you have an active Claude Max subscription. For API Key, verify that `ANTHROPIC_API_KEY` is set in `.env.local`.
-4. Click **Test Connection** to validate the configuration.
-### Set Up Codex Runtime
-1. Open the **Authentication** section in settings.
-2. Locate the Codex App Server configuration.
-3. Enter the WebSocket endpoint for the Codex App Server.
-4. Test the connection to verify connectivity.
-### Configure Budget Guardrails
-1. Navigate to the **Budget** section in settings.
-2. Enter the overall spend cap amount.
-3. Set the monthly split to distribute the budget.
-4. Monitor the pacing meter to track spend against the cap.
-5. Alerts will notify you when spend approaches the limit.
-### Choose a Permission Preset
-1. Open the **Permission Presets** section in settings.
-2. Review the three tiers: Read Only, Git Safe, and Full Auto.
-3. Note the risk badge on each tier to understand the permission scope.
-4. Select the tier that matches your risk tolerance for agent operations.
+### Enable Browser Automation
+1. Open **Settings** from the sidebar (under the Configure group).
+2. Scroll to the **Browser Tools** section.
+3. Toggle **Chrome DevTools** on if you want to debug pages in your running Chrome browser.
+4. Toggle **Playwright** on if you want agents to launch their own headless browser for research and scraping.
+5. Both can be enabled at the same time. Changes take effect immediately for the next chat message or task execution.
+### Set a Monthly Budget
+1. Open **Settings** and scroll to **Cost & Usage Guardrails**.
+2. Enter an overall monthly spend cap (in dollars).
+3. Optionally set per-provider daily or monthly caps for finer control.
+4. Watch the pacing meter to track spend throughout the month.
+5. You will receive an inbox notification at 80% usage and a hard stop at 100%.
+### Configure Permission Presets
+1. Open **Settings** and scroll to **Permission Presets**.
+2. Review the three tiers and their risk badges.
+3. Toggle on the preset that matches your comfort level -- Read Only for cautious use, Git Safe for development workflows, Full Auto for fully autonomous operation.
+4. The preset's tools are added to your approved list immediately. You can still revoke individual tools below if needed.
+### Change the Default Chat Model
+1. Open **Settings** and find the **Chat Defaults** section.
+2. Select a model from the dropdown. Cost tier labels help you compare options.
+3. New conversations will use this model by default. You can still switch models per-conversation from the chat input bar.
 ### Clear Workspace Data
-1. Scroll to the **Data Management** section in settings.
-2. Click **Clear Data** to remove workspace content (tasks, logs, documents).
-3. Confirm the action. Settings are preserved; only workspace data is cleared.
+1. Scroll to **Data Management** at the bottom of Settings.
+2. Click **Clear Data**.
+3. Confirm the action. All tasks, logs, documents, and schedules are removed. Your settings, permissions, and authentication configuration are preserved.
 ## Related
 - [Cost & Usage](./cost-usage.md)
 - [Tool Permissions](./tool-permissions.md)
-- [Provider Runtimes](./provider-runtimes.md)

package/docs/manifest.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "generated": "2026-03-21T21:00:00Z",
+  "generated": "2026-03-27T17:45:00Z",
   "version": 1,
   "sections": [
     {
@@ -38,7 +38,7 @@
       "category": "feature-reference",
       "path": "features/chat.md",
       "route": "/chat",
-      "tags": ["chat", "conversations", "ai", "model-selection", "suggested-prompts", "quick-access"],
+      "tags": ["chat", "conversations", "ai", "tool-catalog", "mentions", "model-selection"],
       "features": ["chat-data-layer", "chat-engine", "chat-api-routes", "chat-ui-shell", "chat-message-rendering", "chat-input-composer"],
       "screengrabCount": 5
     },
@@ -118,9 +118,9 @@
       "category": "feature-reference",
       "path": "features/settings.md",
       "route": "/settings",
-      "tags": ["settings", "authentication", "permissions", "presets", "budgets"],
-      "features": ["session-management", "tool-permission-persistence", "tool-permission-presets", "data-management"],
-      "screengrabCount": 2
+      "tags": ["settings", "authentication", "permissions", "presets", "budgets", "browser-tools"],
+      "features": ["session-management", "tool-permission-persistence", "tool-permission-presets", "browser-use", "spend-budget-guardrails"],
+      "screengrabCount": 5
     },
     {
       "slug": "playbook",
@@ -233,8 +233,8 @@
   ],
   "metadata": {
     "totalDocs": 25,
-    "totalScreengrabs": 37,
-    "featuresCovered": 56,
-    "appSections": 13
+    "totalScreengrabs": 44,
+    "featuresCovered": 58,
+    "appSections": 15
   }
 }

package/docs/superpowers/specs/2026-03-27-chat-screenshot-display-design.md ADDED Viewed

@@ -0,0 +1,303 @@
+# Chat Screenshot Display — Design Spec
+**Date:** 2026-03-27
+**Scope mode:** HOLD
+**Approach:** Metadata-Driven Screenshot Attachments (Approach A)
+## Overview
+When the agent uses browser MCP tools (Chrome DevTools or Playwright) during chat conversations or task execution, screenshots are persisted to disk and the documents table, then displayed inline in the chat UI and task log views. Clicking a thumbnail opens a lightbox overlay with the full-resolution image.
+## Requirements
+- Screenshots from `take_screenshot` (Chrome DevTools) and `browser_take_screenshot` (Playwright) displayed inline
+- Dual surface: chat messages AND task log views
+- Persisted to disk + documents table (survives restart, visible in Documents manager)
+- Original + 800px-wide thumbnail stored (thumbnail for inline, original for lightbox)
+- Inline in assistant messages at point of capture
+- Lightbox overlay on click (zoom, pan, Escape to close)
+## Data Flow
+```
+── Chat Path (engine.ts) ──────────────────────────────
+1. Agent SDK calls take_screenshot via MCP server
+2. SDK stream yields assistant event with tool_use block
+   └─ Capture tool name for screenshot detection
+3. SDK stream yields tool_result with image content block
+   └─ content: [{ type: "image", source: { type: "base64", data: "..." } }]
+4. NEW → Detect screenshot tool names:
+   └─ mcp__chrome-devtools__take_screenshot
+   └─ mcp__playwright__browser_take_screenshot
+5. NEW → persistScreenshot(base64, metadata)
+   ├─ Decode base64 → Buffer
+   ├─ Write ~/.stagent/screenshots/{uuid}.png (original)
+   ├─ Generate thumbnail → {uuid}_thumb.png (800px wide, sharp)
+   └─ INSERT into documents table (source="screenshot")
+6. NEW → Yield SSE: { type: "screenshot", documentId, thumbnailUrl, ... }
+7. Accumulate in attachments[] array
+8. On stream complete → merge into message metadata.attachments
+── Task Path (claude-agent.ts) ────────────────────────
+1. Same SDK stream event detection
+2. NEW → Call same persistScreenshot() module
+3. NEW → Log as event: "screenshot" in agent_logs
+   └─ payload: { documentId, thumbnailUrl, toolName }
+```
+## Schema Changes
+### documents table — new columns
+```sql
+ALTER TABLE documents ADD COLUMN source TEXT DEFAULT 'upload';
+-- "upload" | "screenshot"
+ALTER TABLE documents ADD COLUMN conversation_id TEXT REFERENCES conversations(id);
+-- Links screenshot to chat context
+ALTER TABLE documents ADD COLUMN message_id TEXT;
+-- Links to the assistant message that generated it
+```
+Existing columns reused:
+- `taskId` → for task execution screenshots
+- `processedPath` → thumbnail path
+- `direction` → "output"
+- `category` → "screenshot"
+### Drizzle schema update (schema.ts)
+Add to `documents` table definition:
+```typescript
+source: text("source").default("upload"),
+conversationId: text("conversation_id").references(() => conversations.id),
+messageId: text("message_id"),
+```
+### ChatStreamEvent (types.ts) — new variant
+```typescript
+| { type: "screenshot";
+    documentId: string;
+    thumbnailUrl: string;
+    originalUrl: string;
+    width: number;
+    height: number; }
+```
+### Message metadata — attachments field
+```typescript
+interface ScreenshotAttachment {
+  documentId: string;
+  thumbnailUrl: string;   // /api/documents/{id}/file?inline=1&thumb=1
+  originalUrl: string;    // /api/documents/{id}/file?inline=1
+  width: number;
+  height: number;
+}
+// Added to existing metadata JSON:
+{
+  modelId?: string,
+  quickAccess?: QuickAccessItem[],
+  attachments?: ScreenshotAttachment[]  // NEW
+}
+```
+## New Module: src/lib/screenshots/persist.ts
+```typescript
+persistScreenshot(base64: string, opts: {
+  conversationId?: string,
+  messageId?: string,
+  taskId?: string,
+  projectId?: string,
+  toolName: string
+}): Promise<ScreenshotAttachment | null>
+```
+**Behavior:**
+1. Ensure `~/.stagent/screenshots/` directory exists (`mkdirSync` with `recursive: true` on first call)
+2. Validate base64 length (reject > 20MB)
+3. Decode to Buffer
+4. Extract dimensions via `image-size`
+5. Write original to `~/.stagent/screenshots/{uuid}.png`
+5. Generate 800px-wide thumbnail via `sharp` (optional dep with fallback)
+6. Write thumbnail to `~/.stagent/screenshots/{uuid}_thumb.png`
+7. Insert document record with `source: "screenshot"`, `direction: "output"`, `category: "screenshot"`
+8. Return `{ documentId, thumbnailUrl, originalUrl, width, height }` or `null` on failure
+**sharp fallback:** If `sharp` is unavailable (try/catch dynamic import), skip thumbnail generation. Set `processedPath = storagePath`. Frontend serves original with CSS `max-width` constraint.
+## File Serving Update
+`src/app/api/documents/[id]/file/route.ts` — add `?thumb=1` query parameter:
+Add `processedPath` to the existing `select()` clause (currently only fetches `originalName`, `mimeType`, `storagePath`):
+```typescript
+const [doc] = await db.select({
+  originalName: documents.originalName,
+  mimeType: documents.mimeType,
+  storagePath: documents.storagePath,
+  processedPath: documents.processedPath,  // NEW
+}).from(documents).where(eq(documents.id, id));
+const thumb = req.nextUrl.searchParams.get("thumb") === "1";
+// If thumb=1 and processedPath exists, read from processedPath
+// Otherwise fall back to storagePath
+const filePath = (thumb && doc.processedPath) ? doc.processedPath : doc.storagePath;
+```
+## Frontend Components
+### New: ScreenshotGallery (src/components/chat/screenshot-gallery.tsx)
+- **Props:** `attachments: ScreenshotAttachment[]`
+- **Renders:** flex-wrap grid of clickable thumbnails
+- **Thumbnail sizing:** `max-w-[200px]`, `max-h-[150px]`, `object-cover`
+- **Loading state:** skeleton placeholder until `<img onLoad>`
+- **Overflow:** if `attachments.length > 4`, collapse with "Show N screenshots" toggle
+- **Hover:** border highlight (primary color)
+- **Click:** opens ScreenshotLightbox
+### New: ScreenshotLightbox (src/components/shared/screenshot-lightbox.tsx)
+- **Props:** `open: boolean`, `onClose: () => void`, `imageUrl: string`, `width: number`, `height: number`
+- **Built on:** shadcn Dialog component
+- **Features:**
+  - Full-res image loaded on open
+  - Fit-to-viewport with preserved aspect ratio
+  - Mouse wheel → zoom in/out
+  - Drag to pan when zoomed
+  - Loading skeleton while image fetches
+  - Footer: dimensions, "Open in new tab" link
+  - Escape or overlay click to close
+- **Shared:** Used by both chat messages and task log entries
+### Modified: chat-message.tsx
+Parse `metadata.attachments` and render `<ScreenshotGallery>` after `<ChatMessageMarkdown>`, before the streaming cursor:
+```tsx
+// Inside assistant message rendering
+<ChatMessageMarkdown content={message.content} />
+{attachments.length > 0 && (
+  <ScreenshotGallery attachments={attachments} />
+)}
+{isStreaming && message.content && (
+  <span className="... animate-pulse" />
+)}
+```
+### Modified: chat-shell.tsx
+New SSE event handler alongside existing delta, status, done, etc.:
+```typescript
+else if (event.type === "screenshot") {
+  setMessages(prev => prev.map(m =>
+    m.id === assistantMsgId
+      ? { ...m, metadata: mergeAttachment(m.metadata, {
+          documentId: event.documentId,
+          thumbnailUrl: event.thumbnailUrl,
+          originalUrl: event.originalUrl,
+          width: event.width,
+          height: event.height
+        }) }
+      : m
+  ));
+}
+```
+### Modified: log-entry.tsx
+Detect `event: "screenshot"` and render inline thumbnail:
+```typescript
+// In LogEntry component
+if (entry.event === "screenshot" && parsed.documentId) {
+  return (
+    // Thumbnail with click-to-lightbox, alongside timestamp and tool name
+  );
+}
+```
+Add `"screenshot"` to `eventColors` map with primary color.
+## Error & Rescue Registry
+| Error | Trigger | Impact | Rescue |
+|-------|---------|--------|--------|
+| Base64 decode fails | Corrupted/truncated MCP result | Screenshot lost, text unaffected | Log warning, skip persist, continue stream |
+| Disk write fails | Disk full, permissions error | Screenshot not persisted | Catch in `persistScreenshot()`, return null, engine skips |
+| sharp unavailable | Native module build fails | No thumbnails | Optional dep, fallback to serving original with CSS max-width |
+| Oversized screenshot | Full-page 4K (15-20MB base64) | Memory spike | Reject base64 > 20MB, log warning with size |
+| DB insert fails | WAL lock, schema mismatch | Orphan file on disk | Catch, cleanup written files, return null |
+| Thumbnail load 404 | File deleted, path mismatch | Broken thumbnail | `<img onError>` → fallback to original URL → placeholder |
+| SSE event lost | Network hiccup during stream | Screenshot not shown live | Self-heals on reload: metadata.attachments is authoritative |
+| SDK event shape change | Agent SDK update | Detection stops working | Defensive extraction, multiple known paths, unit tests with fixtures |
+| Rapid screenshots | 10+ screenshots in succession | I/O contention, UI jank | Sequential persist, collapsible gallery for 4+ items |
+| Conversation reload | User switches/refreshes | Must reconstruct from DB | metadata.attachments persisted with message, renders on load |
+**Core invariant:** Screenshot failures NEVER break the chat stream or task execution. Every failure returns null, engine skips, text conversation continues.
+## What Already Exists (reuse)
+- **documents table** (`schema.ts:109-140`) — image MIME, storagePath, processedPath, direction, category
+- **Document file API** (`api/documents/[id]/file/route.ts`) — serves files inline with MIME headers
+- **Image processor** (`lib/documents/processors/image.ts`) — dimensions extraction via image-size
+- **shadcn Dialog** (`components/ui/dialog.tsx`) — overlay, close-on-escape, portal
+- **SSE infrastructure** (`engine.ts` + `chat-shell.tsx`) — event types, streaming, side-channel
+- **Browser tool detection** (`engine.ts:isBrowserReadOnly()`) — screenshot tool name matching
+- **STAGENT_DATA_DIR** (`lib/documents/processor.ts`) — centralized data dir resolution
+## NOT In Scope
+- **Screenshot annotations** — Drawing/highlighting on screenshots. Reason: no user request; significant complexity for uncertain value.
+- **Screenshot comparison/diff** — Visual diff between before/after. Reason: requires separate image processing pipeline.
+- **User-pasted images in chat input** — Clipboard paste into chat. Reason: different flow (user→agent); separate feature.
+- **Screenshot gallery page** — Dedicated /screenshots route. Reason: Documents page with source filter covers this.
+- **Video/GIF capture** — Recording browser interactions. Reason: fundamentally different data type.
+- **Screenshot retention policy** — Auto-cleanup of old screenshots. Reason: premature optimization.
+- **Codex (OpenAI) path** — Screenshot handling for Codex runtime. Reason: different event model; address when that runtime supports browser tools.
+## File Manifest
+### New files (5)
+- `src/lib/screenshots/persist.ts` — core persistence module
+- `src/lib/screenshots/__tests__/persist.test.ts` — unit tests
+- `src/components/chat/screenshot-gallery.tsx` — inline thumbnail grid
+- `src/components/shared/screenshot-lightbox.tsx` — fullscreen viewer
+- `src/lib/db/migrations/XXXX_add_screenshot_columns.sql` — schema migration
+### Modified files (11)
+- `src/lib/db/schema.ts` — add source, conversationId, messageId columns
+- `src/lib/db/index.ts` — bootstrap new columns
+- `src/lib/chat/types.ts` — add screenshot SSE event type + ScreenshotAttachment interface
+- `src/lib/chat/engine.ts` — intercept screenshot tool results, persist, emit SSE
+- `src/lib/agents/claude-agent.ts` — intercept screenshot tool results, persist, log
+- `src/app/api/documents/[id]/file/route.ts` — add ?thumb=1 support via processedPath
+- `src/components/chat/chat-message.tsx` — render ScreenshotGallery from metadata.attachments
+- `src/components/chat/chat-shell.tsx` — handle screenshot SSE event type
+- `src/components/monitoring/log-entry.tsx` — render screenshot log events with thumbnail
+- `src/lib/data/clear.ts` — add `~/.stagent/screenshots/` filesystem cleanup (no new table, only columns added)
+- `package.json` — add sharp dependency
+## Verification Plan
+1. **Unit tests:** `persist.test.ts` — mock fs/sharp, test base64 decode, thumbnail generation, DB insert, error paths, size rejection
+2. **Integration test:** Start dev server, open chat, enable Chrome DevTools MCP, send prompt that triggers `take_screenshot`, verify:
+   - Screenshot file exists at `~/.stagent/screenshots/`
+   - Thumbnail file exists alongside original
+   - Document record in DB with `source: "screenshot"`
+   - SSE event received by frontend
+   - Thumbnail renders inline in chat message
+   - Click opens lightbox with full-res image
+3. **Task path:** Create a task that uses browser tools, verify screenshot appears in log entry view
+4. **Reload test:** Refresh page, switch conversations, verify screenshots persist from metadata
+5. **Error paths:** Test with sharp uninstalled, with oversized image mock, with disk permission error
+6. **Documents surface:** Verify screenshots appear in /documents with source=screenshot filter

package/drizzle.config.ts CHANGED Viewed

@@ -2,11 +2,13 @@ import { defineConfig } from "drizzle-kit";
 import { homedir } from "os";
 import { join } from "path";
+const dataDir = process.env.STAGENT_DATA_DIR || join(homedir(), ".stagent");
 export default defineConfig({
   schema: "./src/lib/db/schema.ts",
   out: "./src/lib/db/migrations",
   dialect: "sqlite",
   dbCredentials: {
-    url: join(homedir(), ".stagent", "stagent.db"),
+    url: join(dataDir, "stagent.db"),
   },
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "stagent",
-  "version": "0.3.6",
+  "version": "0.5.0",
   "description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
   "keywords": [
     "ai",
@@ -50,6 +50,7 @@
     "test:e2e": "vitest run --config vitest.config.e2e.ts",
     "test:ui": "vitest --ui",
     "validate:tokens": "npx tsx design-system/validate-tokens.ts",
+    "sync-worktree": "bash bin/sync-worktree.sh",
     "prepublishOnly": "npm run build:cli"
   },
   "engines": {
@@ -86,8 +87,10 @@
     "react-hook-form": "^7.71.2",
     "react-markdown": "^10.1.0",
     "remark-gfm": "^4.0.1",
+    "sharp": "^0.34.5",
     "smol-toml": "^1.6.0",
     "sonner": "^2.0.7",
+    "sugar-high": "^1.0.0",
     "tailwind-merge": "^3",
     "tailwindcss": "^4",
     "tw-animate-css": "^1",
@@ -104,6 +107,7 @@
     "@types/node": "^22",
     "@types/react": "^19",
     "@types/react-dom": "^19",
+    "@types/sharp": "^0.31.1",
     "@vitejs/plugin-react": "^5.1.4",
     "@vitest/coverage-v8": "^4.0.18",
     "drizzle-kit": "^0.30",