stagent 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -49
- package/package.json +3 -2
- package/public/readme/cost-usage-list.png +0 -0
- package/public/readme/dashboard-bulk-select.png +0 -0
- package/public/readme/dashboard-card-edit.png +0 -0
- package/public/readme/dashboard-create-form-ai-applied.png +0 -0
- package/public/readme/dashboard-create-form-ai-assist.png +0 -0
- package/public/readme/dashboard-create-form-empty.png +0 -0
- package/public/readme/dashboard-create-form-filled.png +0 -0
- package/public/readme/dashboard-filtered.png +0 -0
- package/public/readme/dashboard-list.png +0 -0
- package/public/readme/dashboard-workflow-confirm.png +0 -0
- package/public/readme/home-below-fold.png +0 -0
- package/public/readme/home-list.png +0 -0
- package/public/readme/inbox-list.png +0 -0
- package/public/readme/playbook-list.png +0 -0
- package/public/readme/profiles-list.png +0 -0
- package/public/readme/settings-list.png +0 -0
- package/public/readme/workflows-list.png +0 -0
- package/src/__tests__/e2e/blueprint.test.ts +63 -0
- package/src/__tests__/e2e/cross-runtime.test.ts +77 -0
- package/src/__tests__/e2e/helpers.ts +286 -0
- package/src/__tests__/e2e/parallel-workflow.test.ts +120 -0
- package/src/__tests__/e2e/sequence-workflow.test.ts +109 -0
- package/src/__tests__/e2e/setup.ts +156 -0
- package/src/__tests__/e2e/single-task.test.ts +170 -0
- package/src/app/api/command-palette/recent/route.ts +41 -18
- package/src/app/api/context/batch/route.ts +44 -0
- package/src/app/api/permissions/presets/route.ts +80 -0
- package/src/app/api/playbook/status/route.ts +15 -0
- package/src/app/api/profiles/route.ts +23 -20
- package/src/app/api/settings/pricing/route.ts +15 -0
- package/src/app/api/tasks/[id]/route.ts +54 -3
- package/src/app/api/workflows/[id]/route.ts +43 -4
- package/src/app/api/workflows/[id]/status/route.ts +70 -2
- package/src/app/api/workflows/from-assist/route.ts +6 -32
- package/src/app/costs/page.tsx +53 -43
- package/src/app/dashboard/page.tsx +59 -21
- package/src/app/documents/[id]/page.tsx +10 -8
- package/src/app/globals.css +11 -0
- package/src/app/page.tsx +60 -3
- package/src/app/playbook/[slug]/page.tsx +76 -0
- package/src/app/playbook/page.tsx +54 -0
- package/src/app/profiles/page.tsx +7 -4
- package/src/app/settings/page.tsx +2 -2
- package/src/app/tasks/[id]/page.tsx +22 -2
- package/src/components/costs/cost-dashboard.tsx +226 -320
- package/src/components/dashboard/activity-feed.tsx +6 -2
- package/src/components/dashboard/greeting.tsx +3 -1
- package/src/components/dashboard/priority-queue.tsx +58 -9
- package/src/components/dashboard/stats-cards.tsx +16 -2
- package/src/components/documents/document-chip-bar.tsx +183 -0
- package/src/components/documents/document-content-renderer.tsx +146 -0
- package/src/components/documents/document-detail-view.tsx +16 -239
- package/src/components/documents/image-zoom-view.tsx +60 -0
- package/src/components/documents/smart-extracted-text.tsx +47 -0
- package/src/components/documents/utils.ts +70 -0
- package/src/components/notifications/batch-proposal-review.tsx +150 -0
- package/src/components/notifications/inbox-list.tsx +4 -5
- package/src/components/notifications/notification-item.tsx +73 -6
- package/src/components/notifications/pending-approval-host.tsx +63 -14
- package/src/components/playbook/adoption-heatmap.tsx +69 -0
- package/src/components/playbook/journey-card.tsx +110 -0
- package/src/components/playbook/playbook-action-button.tsx +22 -0
- package/src/components/playbook/playbook-browser.tsx +143 -0
- package/src/components/playbook/playbook-card.tsx +102 -0
- package/src/components/playbook/playbook-detail-view.tsx +225 -0
- package/src/components/playbook/playbook-homepage.tsx +142 -0
- package/src/components/playbook/playbook-toc.tsx +90 -0
- package/src/components/playbook/playbook-updated-badge.tsx +23 -0
- package/src/components/playbook/related-docs.tsx +30 -0
- package/src/components/profiles/__tests__/learned-context-panel.test.tsx +175 -0
- package/src/components/profiles/context-proposal-review.tsx +7 -3
- package/src/components/profiles/learned-context-panel.tsx +116 -8
- package/src/components/profiles/profile-browser.tsx +1 -0
- package/src/components/profiles/profile-card.tsx +16 -8
- package/src/components/profiles/profile-detail-view.tsx +12 -4
- package/src/components/settings/__tests__/auth-config-section.test.tsx +147 -0
- package/src/components/settings/api-key-form.tsx +5 -43
- package/src/components/settings/auth-config-section.tsx +10 -6
- package/src/components/settings/auth-status-badge.tsx +8 -0
- package/src/components/settings/budget-guardrails-section.tsx +403 -620
- package/src/components/settings/connection-test-control.tsx +63 -0
- package/src/components/settings/permissions-section.tsx +85 -75
- package/src/components/settings/permissions-sections.tsx +24 -0
- package/src/components/settings/presets-section.tsx +159 -0
- package/src/components/settings/pricing-registry-panel.tsx +164 -0
- package/src/components/shared/app-sidebar.tsx +4 -2
- package/src/components/shared/command-palette.tsx +30 -0
- package/src/components/shared/light-markdown.tsx +134 -0
- package/src/components/tasks/__tests__/kanban-board-accessibility.test.tsx +1 -1
- package/src/components/tasks/ai-assist-panel.tsx +108 -78
- package/src/components/tasks/content-preview.tsx +2 -1
- package/src/components/tasks/kanban-board.tsx +57 -5
- package/src/components/tasks/kanban-column.tsx +34 -23
- package/src/components/tasks/task-bento-cell.tsx +50 -0
- package/src/components/tasks/task-bento-grid.tsx +155 -0
- package/src/components/tasks/task-card.tsx +14 -16
- package/src/components/tasks/task-chip-bar.tsx +207 -0
- package/src/components/tasks/task-detail-view.tsx +42 -190
- package/src/components/tasks/task-result-renderer.tsx +33 -0
- package/src/components/workflows/blueprint-gallery.tsx +19 -12
- package/src/components/workflows/blueprint-preview.tsx +8 -1
- package/src/components/workflows/loop-status-view.tsx +2 -4
- package/src/components/workflows/swarm-dashboard.tsx +2 -3
- package/src/components/workflows/workflow-confirmation-view.tsx +2 -7
- package/src/components/workflows/workflow-full-output.tsx +80 -0
- package/src/components/workflows/workflow-kanban-card.tsx +121 -0
- package/src/components/workflows/workflow-list.tsx +47 -42
- package/src/components/workflows/workflow-status-view.tsx +163 -16
- package/src/lib/agents/learned-context.ts +27 -15
- package/src/lib/agents/learning-session.ts +354 -0
- package/src/lib/agents/pattern-extractor.ts +19 -0
- package/src/lib/agents/profiles/__tests__/sort.test.ts +42 -0
- package/src/lib/agents/profiles/sort.ts +7 -0
- package/src/lib/constants/card-icons.tsx +202 -0
- package/src/lib/constants/prose-styles.ts +7 -0
- package/src/lib/constants/settings.ts +1 -0
- package/src/lib/constants/task-status.ts +3 -0
- package/src/lib/db/schema.ts +3 -0
- package/src/lib/docs/adoption.ts +105 -0
- package/src/lib/docs/journey-tracker.ts +21 -0
- package/src/lib/docs/reader.ts +107 -0
- package/src/lib/docs/types.ts +54 -0
- package/src/lib/docs/usage-stage.ts +60 -0
- package/src/lib/documents/context-builder.ts +41 -0
- package/src/lib/notifications/actionable.ts +18 -10
- package/src/lib/queries/chart-data.ts +20 -1
- package/src/lib/settings/__tests__/budget-guardrails.test.ts +86 -24
- package/src/lib/settings/budget-guardrails.ts +213 -85
- package/src/lib/settings/permission-presets.ts +150 -0
- package/src/lib/settings/runtime-setup.ts +71 -0
- package/src/lib/usage/__tests__/ledger.test.ts +2 -2
- package/src/lib/usage/__tests__/pricing-registry.test.ts +78 -0
- package/src/lib/usage/ledger.ts +1 -1
- package/src/lib/usage/pricing-registry.ts +570 -0
- package/src/lib/usage/pricing.ts +15 -95
- package/src/lib/utils/__tests__/learned-context-history.test.ts +171 -0
- package/src/lib/utils/learned-context-history.ts +150 -0
- package/src/lib/validators/__tests__/settings.test.ts +23 -16
- package/src/lib/validators/settings.ts +3 -9
- package/src/lib/workflows/engine.ts +75 -61
- package/src/lib/workflows/types.ts +2 -0
- package/tsconfig.json +2 -1
- package/src/components/documents/document-preview.tsx +0 -68
package/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# Stagent
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> Governed AI Agent Workspace — Supervised Local Execution, Workflows, Documents, and Provider Runtimes.
|
|
4
4
|
|
|
5
|
-
[](https://nextjs.org/) [](https://react.dev/) [](https://www.typescriptlang.org/) [](https://docs.anthropic.com/) [](https://developers.openai.com/codex/app-server) [](LICENSE)
|
|
5
|
+
[](https://www.npmjs.com/package/stagent) [](https://nextjs.org/) [](https://react.dev/) [](https://www.typescriptlang.org/) [](https://docs.anthropic.com/) [](https://developers.openai.com/codex/app-server) [](LICENSE)
|
|
6
|
+
|
|
7
|
+
**[stagent.io](https://stagent.io)** · **[GitHub](https://github.com/navam-io/stagent)**
|
|
6
8
|
|
|
7
9
|
## Quick Start
|
|
8
10
|
|
|
@@ -10,9 +12,9 @@
|
|
|
10
12
|
npx stagent
|
|
11
13
|
```
|
|
12
14
|
|
|
13
|
-
Open [localhost:3000](http://localhost:3000).
|
|
15
|
+
Open [localhost:3000](http://localhost:3000). That's it — zero config, local SQLite, own your data.
|
|
14
16
|
|
|
15
|
-
**Profiles & Policies** · **Blueprints & Schedules** · **Open Source**
|
|
17
|
+
**Profiles & Policies** · **Blueprints & Schedules** · **Built-in Playbook** · **Open Source**
|
|
16
18
|
|
|
17
19
|
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/home-list.png" alt="Stagent home workspace" width="1200" />
|
|
18
20
|
|
|
@@ -24,13 +26,20 @@ Open [localhost:3000](http://localhost:3000).
|
|
|
24
26
|
|
|
25
27
|
## Why Stagent
|
|
26
28
|
|
|
27
|
-
AI agents are powerful — but production use breaks down when teams cannot see what the agent is doing, which rules it follows, or intervene before an unsafe action lands. Stagent gives you a governed operations workspace where every run is visible, every profile is reusable, and every approval is auditable.
|
|
29
|
+
AI agents are powerful — but production use breaks down when teams cannot see what the agent is doing, which rules it follows, or intervene before an unsafe action lands. Stagent gives you a **governed operations workspace** where every run is visible, every profile is reusable, and every approval is auditable.
|
|
30
|
+
|
|
31
|
+
- **Local-first** — SQLite database, no cloud dependency, `npx stagent` and go
|
|
32
|
+
- **Multi-provider** — Claude Code + OpenAI Codex App Server behind one runtime registry
|
|
33
|
+
- **Human-in-the-loop** — Inbox approvals, ambient toasts, tool permission policies
|
|
34
|
+
- **Reusable profiles** — 13+ agent profiles with instructions, tool policies, and runtime tuning
|
|
35
|
+
- **Workflow orchestration** — 6 patterns (sequence, planner-executor, checkpoint, parallel, loop, swarm)
|
|
36
|
+
- **Cost governance** — Usage metering, budgets, and spend visibility per provider and model
|
|
28
37
|
|
|
29
38
|
---
|
|
30
39
|
|
|
31
40
|
## Runtime Bridge
|
|
32
41
|
|
|
33
|
-
Stagent ships a shared runtime registry that routes tasks, schedules, and workflow steps through two governed execution backends: **Claude Code** (Anthropic Claude Agent SDK) and **OpenAI Codex App Server**. Both land in the same inbox, monitoring, and task-state surfaces —
|
|
42
|
+
Stagent ships a shared runtime registry that routes tasks, schedules, and workflow steps through two governed execution backends: **Claude Code** (Anthropic Claude Agent SDK) and **OpenAI Codex App Server**. Both land in the same inbox, monitoring, and task-state surfaces — switching providers is a config change, not a rewrite.
|
|
34
43
|
|
|
35
44
|
---
|
|
36
45
|
|
|
@@ -56,6 +65,10 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
|
|
|
56
65
|
| 📋 | **[Kanban Board](#kanban-board-operations)** | Inline editing, bulk operations, and persistent board state |
|
|
57
66
|
| 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution |
|
|
58
67
|
| 🧬 | **[Agent Self-Improvement](#agent-self-improvement)** | Agents learn patterns from execution history with human-approved context evolution |
|
|
68
|
+
| 🎯 | **[Tool Permission Presets](#tool-permission-presets)** | Pre-configured permission bundles (read-only, git-safe, full-auto) with layered apply/remove |
|
|
69
|
+
| 📦 | **[Workflow Context Batching](#workflow-context-batching)** | Workflow-scoped proposal buffering with batch approve/reject for learned context |
|
|
70
|
+
| 🧪 | **[E2E Test Automation](#e2e-test-automation)** | API-level end-to-end test suite covering both runtimes, 4 profiles, and 4 workflow patterns |
|
|
71
|
+
| 📖 | **[Playbook](#playbook)** | Built-in documentation with usage-stage awareness, adoption heatmap, and guided learning journeys |
|
|
59
72
|
|
|
60
73
|
---
|
|
61
74
|
|
|
@@ -73,6 +86,7 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
|
|
|
73
86
|
- **Reusable agent profiles** — Profiles define instructions, allowed tools, runtime tuning, and MCP configs for repeated use
|
|
74
87
|
- **Permission pre-check** — Saved "Always Allow" patterns bypass the notification loop for trusted tools
|
|
75
88
|
- **Learned context loop** — Pattern extraction → human approval → versioned context injection creates a supervised self-improvement cycle
|
|
89
|
+
- **Permission presets** — Layered preset bundles (read-only ⊂ git-safe ⊂ full-auto) that compose with individual "Always Allow" patterns
|
|
76
90
|
|
|
77
91
|
---
|
|
78
92
|
|
|
@@ -115,9 +129,9 @@ Claude Agent SDK integration with the `canUseTool` polling pattern remains the d
|
|
|
115
129
|
OpenAI Codex App Server is integrated as Stagent's second governed runtime. Codex-backed tasks preserve project working directories, document context, resumable thread IDs, inbox approval requests, user questions, and provider-labeled logs. The same runtime can also power task assist, scheduled firings, and workflow child tasks.
|
|
116
130
|
|
|
117
131
|
#### Agent Profiles
|
|
118
|
-
Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, max turns, and output format so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
|
|
132
|
+
Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, max turns, and output format so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Profile cards display role-based icon circles with keyword-inferred colors (blue for work, purple for personal), alongside domain tags, runtime badges, and tool counts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
|
|
119
133
|
|
|
120
|
-
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/profiles-list.png" alt="Stagent agent profiles" width="1200" />
|
|
134
|
+
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/profiles-list.png" alt="Stagent agent profiles with role-based icon circles" width="1200" />
|
|
121
135
|
|
|
122
136
|
#### Workflows
|
|
123
137
|
Multi-step task orchestration with six patterns:
|
|
@@ -152,6 +166,9 @@ Bridge from AI task assist to workflow engine: when task assist recommends a mul
|
|
|
152
166
|
#### Agent Self-Improvement
|
|
153
167
|
Agents learn from execution history through a human-approved instruction evolution loop. After each task completion, the pattern extractor analyzes logs and proposes context updates — concise behavioral rules the agent should follow in future runs. Operators approve, reject, or edit proposals before they take effect. Learned context is versioned with rollback support and size-limited summarization to prevent unbounded growth. A sweep agent can audit the codebase for improvement opportunities and create prioritized tasks from its findings.
|
|
154
168
|
|
|
169
|
+
#### Workflow Context Batching
|
|
170
|
+
During workflow execution, the pattern extractor buffers context proposals into a learning session instead of creating individual notifications per proposal. When the workflow completes, all proposals are surfaced as a single batch for review. Operators can approve all, reject all, or review individually — reducing notification noise from multi-step workflows while preserving human oversight. The batch review component integrates into the existing pending approval host.
|
|
171
|
+
|
|
155
172
|
#### Session Management
|
|
156
173
|
Resume failed or cancelled agent tasks with one click. Tracks retry counts (limit: 3), detects expired sessions, and provides atomic claim to prevent duplicate runs.
|
|
157
174
|
|
|
@@ -159,12 +176,12 @@ Resume failed or cancelled agent tasks with one click. Tracks retry counts (limi
|
|
|
159
176
|
Iterative agent loop pattern with four stop conditions: max iterations, time budget, human cancel, and agent-signaled completion. Each iteration creates a child task with previous output as context. Loop status view with iteration timeline, progress bar, and expandable results. Pause/resume via DB status polling.
|
|
160
177
|
|
|
161
178
|
#### Agent Profile Catalog
|
|
162
|
-
Curated agent profiles across work and personal domains, built as portable Claude Code skill directories with `profile.yaml` sidecars. The profile gallery supports domain filtering and search, while YAML customization, GitHub import, and behavioral smoke tests keep profile behavior inspectable and reusable.
|
|
179
|
+
Curated agent profiles across work and personal domains, built as portable Claude Code skill directories with `profile.yaml` sidecars. The profile gallery displays role-based icon circles with keyword-inferred colors and supports domain filtering and search, while YAML customization, GitHub import, and behavioral smoke tests keep profile behavior inspectable and reusable.
|
|
163
180
|
|
|
164
181
|
#### Workflow Blueprints
|
|
165
|
-
Pre-configured workflow templates across work and personal domains. Browse blueprints in a gallery with
|
|
182
|
+
Pre-configured workflow templates across work and personal domains. Browse blueprints in a gallery with pattern-colored icon circles, domain tags, and difficulty badges. Preview steps and required variables, fill in a dynamic form, and create draft workflows with resolved prompts and profile assignments. Create custom blueprints via YAML or import from GitHub URLs. Lineage tracking connects workflows back to their source blueprint.
|
|
166
183
|
|
|
167
|
-
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/workflows-list.png" alt="Stagent
|
|
184
|
+
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/workflows-list.png" alt="Stagent workflows with keyword-inferred icon circles" width="1200" />
|
|
168
185
|
|
|
169
186
|
### Documents
|
|
170
187
|
|
|
@@ -181,6 +198,11 @@ Automatic text extraction on upload for five file types: text, PDF (pdf-parse),
|
|
|
181
198
|
#### Agent Document Context
|
|
182
199
|
Documents linked to a task are automatically injected into the agent's prompt as context. The context builder aggregates extracted text from all linked documents, giving agents access to uploaded reference material without manual copy-paste.
|
|
183
200
|
|
|
201
|
+
### Knowledge
|
|
202
|
+
|
|
203
|
+
#### Playbook
|
|
204
|
+
Built-in documentation system at `/playbook` with usage-stage awareness that adapts content to your experience level (new, early, active, power user). Browse feature reference docs and guided learning journeys organized by persona (Personal, Work, Power User, Developer). Adoption heatmap tracks which features you've explored, while journey cards show progress through multi-step learning paths. Markdown rendering with automatic internal link resolution, table of contents, related docs, and screengrab embedding.
|
|
205
|
+
|
|
184
206
|
### Platform
|
|
185
207
|
|
|
186
208
|
#### Tool Permission Persistence
|
|
@@ -189,6 +211,9 @@ Documents linked to a task are automatically injected into the agent's prompt as
|
|
|
189
211
|
#### Ambient Approvals
|
|
190
212
|
Pending permission requests now surface through a shell-level approval presenter on any route, so operators can respond without leaving the page they are working on. Inbox remains the durable queue and source of truth, while the ambient surface provides the fast path for active supervision.
|
|
191
213
|
|
|
214
|
+
#### Tool Permission Presets
|
|
215
|
+
Pre-configured permission bundles that reduce friction for common tool approval patterns. Three layered presets — read-only (file reads, glob, grep), git-safe (adds git operations), and full-auto (adds write, edit, bash) — compose with existing "Always Allow" patterns. Presets are layered: enabling git-safe automatically includes read-only patterns; removing git-safe only strips its unique additions. Risk badges indicate the trust level of each preset. Manage presets from the Settings page alongside individual tool permissions.
|
|
216
|
+
|
|
192
217
|
#### Schedules
|
|
193
218
|
Time-based scheduling for agent tasks with human-friendly intervals (`5m`, `2h`, `1d`) and raw 5-field cron expressions. One-shot and recurring modes with pause/resume lifecycle, expiry limits, and max firings. Each firing creates a child task through the shared execution pipeline, and schedules can now target a runtime explicitly. Scheduler runs as a poll-based engine started via Next.js instrumentation hook.
|
|
194
219
|
|
|
@@ -218,7 +243,9 @@ Real-time agent log streaming via Server-Sent Events. Filter by task or event ty
|
|
|
218
243
|
File upload with drag-and-drop in task creation. Type-aware content preview for text, markdown (via react-markdown), code, and JSON. Copy-to-clipboard and download-as-file for task outputs.
|
|
219
244
|
|
|
220
245
|
#### Settings
|
|
221
|
-
Configuration hub with provider-aware sections: Claude authentication (API key or OAuth), OpenAI Codex runtime API-key management, tool permissions (saved "Always Allow" patterns with revoke), and data management.
|
|
246
|
+
Configuration hub with provider-aware sections: Claude authentication (API key or OAuth), OpenAI Codex runtime API-key management, tool permissions (saved "Always Allow" patterns with revoke), permission presets, budget configuration, and data management.
|
|
247
|
+
|
|
248
|
+
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/settings-list.png" alt="Stagent settings" width="1200" />
|
|
222
249
|
|
|
223
250
|
#### CLI
|
|
224
251
|
The `npx stagent` entry point boots a Next.js server from the published npm package. It is built from `bin/cli.ts` into `dist/cli.js` using tsup, and serves as the primary distribution channel — no clone required.
|
|
@@ -227,7 +254,10 @@ The `npx stagent` entry point boots a Next.js server from the published npm pack
|
|
|
227
254
|
SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Ten tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`, `learned_context`, `usage_ledger`. Self-healing bootstrap — tables are created on startup if missing.
|
|
228
255
|
|
|
229
256
|
#### App Shell
|
|
230
|
-
Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard,
|
|
257
|
+
Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard, Inbox, Monitor, Projects, Workflows, Documents, Profiles, Schedules, Cost & Usage, Playbook, Settings.
|
|
258
|
+
|
|
259
|
+
#### E2E Test Automation
|
|
260
|
+
API-level end-to-end test suite built on Vitest with 120-second timeouts and sequential execution. Five test files cover single-task execution, sequence workflows, parallel workflows, blueprints, and cross-runtime scenarios across both Claude and Codex backends. Tests skip gracefully when runtimes are not configured, preventing CI failures. Run with `npm run test:e2e`.
|
|
231
261
|
|
|
232
262
|
---
|
|
233
263
|
|
|
@@ -256,11 +286,13 @@ npm run dev # Next.js dev server (Turbopack)
|
|
|
256
286
|
npm run build:cli # Build CLI → dist/cli.js
|
|
257
287
|
npm test # Run Vitest
|
|
258
288
|
npm run test:coverage # Coverage report
|
|
289
|
+
npm run test:e2e # E2E integration tests (requires runtime credentials)
|
|
259
290
|
```
|
|
260
291
|
|
|
261
292
|
### Project Structure
|
|
262
293
|
|
|
263
294
|
```
|
|
295
|
+
docs/ # Playbook markdown docs + manifest.json
|
|
264
296
|
src/
|
|
265
297
|
├── app/ # Next.js App Router pages
|
|
266
298
|
│ ├── dashboard/ # Task kanban board
|
|
@@ -271,6 +303,7 @@ src/
|
|
|
271
303
|
│ ├── workflows/ # Workflow management + blueprints
|
|
272
304
|
│ ├── schedules/ # Schedule management
|
|
273
305
|
│ ├── costs/ # Cost & usage dashboard
|
|
306
|
+
│ ├── playbook/ # Documentation & learning journeys
|
|
274
307
|
│ ├── inbox/ # Notifications
|
|
275
308
|
│ ├── monitor/ # Log streaming
|
|
276
309
|
│ └── settings/ # Configuration
|
|
@@ -281,6 +314,7 @@ src/
|
|
|
281
314
|
│ ├── workflows/ # Workflow UI + blueprints + swarm
|
|
282
315
|
│ ├── documents/ # Document browser + upload
|
|
283
316
|
│ ├── costs/ # Cost dashboard + filters
|
|
317
|
+
│ ├── playbook/ # Playbook docs + journeys + adoption
|
|
284
318
|
│ ├── schedules/ # Schedule management
|
|
285
319
|
│ ├── monitoring/ # Log viewer
|
|
286
320
|
│ ├── notifications/ # Inbox + permission actions
|
|
@@ -290,6 +324,7 @@ src/
|
|
|
290
324
|
└── lib/
|
|
291
325
|
├── agents/ # Runtime adapters, profiles, learned context, pattern extraction
|
|
292
326
|
├── db/ # Schema, migrations
|
|
327
|
+
├── docs/ # Playbook reader, adoption, usage-stage, journey tracker
|
|
293
328
|
├── documents/ # Preprocessing + context builder
|
|
294
329
|
├── workflows/ # Engine + types + blueprints
|
|
295
330
|
├── schedules/ # Scheduler engine + interval parser
|
|
@@ -301,7 +336,7 @@ src/
|
|
|
301
336
|
└── utils/ # Shared helpers
|
|
302
337
|
```
|
|
303
338
|
|
|
304
|
-
### API Endpoints (
|
|
339
|
+
### API Endpoints (52 routes)
|
|
305
340
|
|
|
306
341
|
| Domain | Endpoint | Method | Purpose |
|
|
307
342
|
|--------|----------|--------|---------|
|
|
@@ -349,7 +384,10 @@ src/
|
|
|
349
384
|
| | `/api/settings/test` | POST | Provider-aware runtime connectivity test |
|
|
350
385
|
| | `/api/settings/budgets` | GET/POST | Budget configuration |
|
|
351
386
|
| | `/api/permissions` | GET/POST/DELETE | Tool permission patterns |
|
|
387
|
+
| | `/api/permissions/presets` | GET/POST/DELETE | Permission preset bundles |
|
|
388
|
+
| **Context** | `/api/context/batch` | POST | Batch approve/reject context proposals |
|
|
352
389
|
| **Monitoring** | `/api/logs/stream` | GET | SSE agent log stream |
|
|
390
|
+
| **Playbook** | `/api/playbook/status` | GET | Playbook adoption status and usage stage |
|
|
353
391
|
| **Platform** | `/api/command-palette/recent` | GET | Recent command palette items |
|
|
354
392
|
| | `/api/data/clear` | POST | Clear all data |
|
|
355
393
|
| | `/api/data/seed` | POST | Seed sample data |
|
|
@@ -368,40 +406,23 @@ All 14 features shipped across three layers:
|
|
|
368
406
|
| **Core** | Project management, task board, agent integration, inbox notifications, monitoring dashboard |
|
|
369
407
|
| **Polish** | Homepage dashboard, UX fixes, workflow engine, AI task assist, content handling, session management |
|
|
370
408
|
|
|
371
|
-
### Post-MVP —
|
|
372
|
-
|
|
373
|
-
| Category |
|
|
374
|
-
|
|
375
|
-
| **Documents** | File
|
|
376
|
-
|
|
|
377
|
-
| | Agent
|
|
378
|
-
|
|
|
379
|
-
| |
|
|
380
|
-
| **
|
|
381
|
-
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
|
386
|
-
|
|
387
|
-
|
|
|
388
|
-
| | Micro-Visualizations | Sparklines, mini bars, donut rings — zero-dependency SVG charts |
|
|
389
|
-
| | Command Palette | ⌘K palette with navigation, create actions, recent items, theme toggle |
|
|
390
|
-
| | Operational Surface | Cross-route composition with consistent layout, density, and interaction patterns |
|
|
391
|
-
| | Profile Surface | Profile gallery stability, detail views, and behavioral testing UI |
|
|
392
|
-
| | Accessibility | ARIA labels, keyboard navigation, focus management, screen reader support |
|
|
393
|
-
| | UI Density Refinement | Tightened spacing, typography, and visual hierarchy across all routes |
|
|
394
|
-
| | Kanban Board Operations | Inline editing, bulk operations, card animations, edit dialog |
|
|
395
|
-
| | Board Context Persistence | Persisted filters, sort order, and project selection across sessions |
|
|
396
|
-
| **Platform** | Scheduled Prompt Loops | Cron + human-friendly intervals, one-shot/recurring, pause/resume lifecycle |
|
|
397
|
-
| | Tool Permission Persistence | "Always Allow" patterns, pre-check bypass, Settings management |
|
|
398
|
-
| | Provider Runtimes | Shared runtime registry with Claude Code and OpenAI Codex App Server adapters |
|
|
399
|
-
| | OpenAI Codex Runtime | Codex App Server integration with inbox approvals, logs, and thread resumption |
|
|
400
|
-
| | Cross-Provider Profiles | Profile compatibility layer ensuring profiles work across Claude and Codex runtimes |
|
|
401
|
-
| | Parallel Fork/Join | 2-5 concurrent research branches with synthesis step |
|
|
402
|
-
| **Governance** | Usage Metering Ledger | Provider-normalized token and spend tracking across all execution paths |
|
|
403
|
-
| | Spend Budget Guardrails | Per-project and global budgets with enforcement and alerts |
|
|
404
|
-
| | Cost & Usage Dashboard | Summary cards, trend views, provider/model breakdowns, budget audit visibility |
|
|
409
|
+
### Post-MVP — 31 features shipped
|
|
410
|
+
|
|
411
|
+
| Category | Features |
|
|
412
|
+
|----------|---------|
|
|
413
|
+
| **Documents** (5) | File attachments, preprocessing (5 formats), agent context injection, document browser, output generation |
|
|
414
|
+
| **Agent Intelligence** (6) | Multi-agent routing, autonomous loops, multi-agent swarm, AI assist→workflows, agent self-improvement, workflow context batching |
|
|
415
|
+
| **Agent Profiles** (2) | Agent profile catalog (13+ profiles), workflow blueprints (8 templates) |
|
|
416
|
+
| **UI Enhancement** (13) | Ambient approvals, learned context UX, micro-visualizations, command palette, operational surface, profile surface, accessibility, UI density, kanban operations, board persistence, detail view redesign, playbook documentation, workflow UX overhaul (in-progress) |
|
|
417
|
+
| **Platform** (8) | Scheduled prompt loops, tool permissions, provider runtimes, OpenAI Codex runtime, cross-provider profiles, parallel fork/join, tool permission presets, npm publish (deferred) |
|
|
418
|
+
| **Runtime Quality** (2) | SDK runtime hardening, E2E test automation |
|
|
419
|
+
| **Governance** (3) | Usage metering ledger, spend budget guardrails, cost & usage dashboard |
|
|
420
|
+
|
|
421
|
+
### In Progress
|
|
422
|
+
|
|
423
|
+
| Feature | Description |
|
|
424
|
+
|---------|-------------|
|
|
425
|
+
| Workflow UX Overhaul | Document context propagation, output readability, dashboard visibility, AI assist guidance |
|
|
405
426
|
|
|
406
427
|
---
|
|
407
428
|
|
|
@@ -410,7 +431,7 @@ All 14 features shipped across three layers:
|
|
|
410
431
|
### Contributor Setup
|
|
411
432
|
|
|
412
433
|
```bash
|
|
413
|
-
git clone
|
|
434
|
+
git clone https://github.com/navam-io/stagent.git && cd stagent && npm install
|
|
414
435
|
|
|
415
436
|
# Set up one or both runtime credentials
|
|
416
437
|
cat > .env.local <<'EOF'
|
|
@@ -432,6 +453,10 @@ npm run dev
|
|
|
432
453
|
|
|
433
454
|
See `AGENTS.md` for architecture details and development conventions.
|
|
434
455
|
|
|
456
|
+
---
|
|
457
|
+
|
|
435
458
|
## License
|
|
436
459
|
|
|
437
460
|
Licensed under the [Apache License 2.0](LICENSE).
|
|
461
|
+
|
|
462
|
+
Copyright 2025-2026 [Navam](https://navam.io)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stagent",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.13",
|
|
4
4
|
"description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"bugs": {
|
|
36
36
|
"url": "https://github.com/navam-io/stagent/issues"
|
|
37
37
|
},
|
|
38
|
-
"homepage": "https://
|
|
38
|
+
"homepage": "https://stagent.io",
|
|
39
39
|
"scripts": {
|
|
40
40
|
"dev": "next dev --turbopack",
|
|
41
41
|
"build": "next build",
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"test": "vitest run",
|
|
44
44
|
"test:watch": "vitest",
|
|
45
45
|
"test:coverage": "vitest run --coverage",
|
|
46
|
+
"test:e2e": "vitest run --config vitest.config.e2e.ts",
|
|
46
47
|
"test:ui": "vitest --ui",
|
|
47
48
|
"prepublishOnly": "npm run build:cli"
|
|
48
49
|
},
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E: Blueprint instantiation and execution.
|
|
3
|
+
*
|
|
4
|
+
* Tests that blueprints can be listed, instantiated with variables,
|
|
5
|
+
* and executed as workflows with variable resolution.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
setupE2E,
|
|
10
|
+
teardownE2E,
|
|
11
|
+
testProjectId,
|
|
12
|
+
claudeAvailable,
|
|
13
|
+
} from "./setup";
|
|
14
|
+
import {
|
|
15
|
+
listBlueprints,
|
|
16
|
+
instantiateBlueprint,
|
|
17
|
+
executeWorkflow,
|
|
18
|
+
pollWorkflowUntilDone,
|
|
19
|
+
} from "./helpers";
|
|
20
|
+
|
|
21
|
+
beforeAll(async () => {
|
|
22
|
+
await setupE2E();
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
afterAll(async () => {
|
|
26
|
+
await teardownE2E();
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe("Blueprint — Gallery & Instantiation", () => {
|
|
30
|
+
it("lists available blueprints", async () => {
|
|
31
|
+
const { ok, data } = await listBlueprints();
|
|
32
|
+
expect(ok).toBe(true);
|
|
33
|
+
expect(Array.isArray(data)).toBe(true);
|
|
34
|
+
expect(data!.length).toBeGreaterThan(0);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it.skipIf(!claudeAvailable)(
|
|
38
|
+
"instantiates and executes documentation-generation blueprint",
|
|
39
|
+
async () => {
|
|
40
|
+
// Instantiate with variables
|
|
41
|
+
const { ok: instOk, data: instData } = await instantiateBlueprint(
|
|
42
|
+
"documentation-generation",
|
|
43
|
+
{
|
|
44
|
+
target: "src/index.ts and src/utils.ts",
|
|
45
|
+
docType: "API Documentation",
|
|
46
|
+
},
|
|
47
|
+
testProjectId
|
|
48
|
+
);
|
|
49
|
+
expect(instOk).toBe(true);
|
|
50
|
+
|
|
51
|
+
const workflow = instData?.workflow;
|
|
52
|
+
expect(workflow).toBeTruthy();
|
|
53
|
+
expect(workflow!.status).toBe("draft");
|
|
54
|
+
|
|
55
|
+
// Execute the instantiated workflow
|
|
56
|
+
const exec = await executeWorkflow(workflow!.id);
|
|
57
|
+
expect(exec.status).toBe(202);
|
|
58
|
+
|
|
59
|
+
const result = await pollWorkflowUntilDone(workflow!.id);
|
|
60
|
+
expect(result.status).toBe("completed");
|
|
61
|
+
}
|
|
62
|
+
);
|
|
63
|
+
});
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E: Cross-runtime comparison.
|
|
3
|
+
*
|
|
4
|
+
* Tests that the same task produces valid results on both Claude Code
|
|
5
|
+
* and Codex runtimes, verifying runtime parity.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
setupE2E,
|
|
10
|
+
teardownE2E,
|
|
11
|
+
testProjectId,
|
|
12
|
+
claudeAvailable,
|
|
13
|
+
codexAvailable,
|
|
14
|
+
} from "./setup";
|
|
15
|
+
import {
|
|
16
|
+
createTask,
|
|
17
|
+
executeTask,
|
|
18
|
+
pollTaskUntilDone,
|
|
19
|
+
updateTask,
|
|
20
|
+
} from "./helpers";
|
|
21
|
+
|
|
22
|
+
beforeAll(async () => {
|
|
23
|
+
await setupE2E();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterAll(async () => {
|
|
27
|
+
await teardownE2E();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe("Cross-Runtime Comparison", () => {
|
|
31
|
+
const bothAvailable = () => claudeAvailable && codexAvailable;
|
|
32
|
+
|
|
33
|
+
it.skipIf(!bothAvailable())(
|
|
34
|
+
"same task produces valid results on both runtimes",
|
|
35
|
+
async () => {
|
|
36
|
+
const taskPrompt =
|
|
37
|
+
"Describe the TypeScript code in src/index.ts. List the exported functions and any bugs.";
|
|
38
|
+
|
|
39
|
+
// Create and execute on Claude
|
|
40
|
+
const { data: claudeTask } = await createTask({
|
|
41
|
+
title: "Cross-runtime test (Claude)",
|
|
42
|
+
description: taskPrompt,
|
|
43
|
+
projectId: testProjectId,
|
|
44
|
+
agentProfile: "general",
|
|
45
|
+
});
|
|
46
|
+
await updateTask(claudeTask!.id, { status: "queued" });
|
|
47
|
+
await executeTask(claudeTask!.id);
|
|
48
|
+
|
|
49
|
+
// Create and execute on Codex
|
|
50
|
+
const { data: codexTask } = await createTask({
|
|
51
|
+
title: "Cross-runtime test (Codex)",
|
|
52
|
+
description: taskPrompt,
|
|
53
|
+
projectId: testProjectId,
|
|
54
|
+
assignedAgent: "codex",
|
|
55
|
+
agentProfile: "general",
|
|
56
|
+
});
|
|
57
|
+
await updateTask(codexTask!.id, { status: "queued" });
|
|
58
|
+
await executeTask(codexTask!.id);
|
|
59
|
+
|
|
60
|
+
// Wait for both
|
|
61
|
+
const [claudeResult, codexResult] = await Promise.all([
|
|
62
|
+
pollTaskUntilDone(claudeTask!.id),
|
|
63
|
+
pollTaskUntilDone(codexTask!.id),
|
|
64
|
+
]);
|
|
65
|
+
|
|
66
|
+
// Both should complete
|
|
67
|
+
expect(claudeResult.status).toBe("completed");
|
|
68
|
+
expect(codexResult.status).toBe("completed");
|
|
69
|
+
|
|
70
|
+
// Both should produce non-empty results
|
|
71
|
+
expect(claudeResult.result).toBeTruthy();
|
|
72
|
+
expect(codexResult.result).toBeTruthy();
|
|
73
|
+
expect(claudeResult.result!.length).toBeGreaterThan(50);
|
|
74
|
+
expect(codexResult.result!.length).toBeGreaterThan(50);
|
|
75
|
+
}
|
|
76
|
+
);
|
|
77
|
+
});
|