stagent 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +58 -27
  2. package/package.json +3 -3
  3. package/src/__tests__/e2e/blueprint.test.ts +63 -0
  4. package/src/__tests__/e2e/cross-runtime.test.ts +77 -0
  5. package/src/__tests__/e2e/helpers.ts +286 -0
  6. package/src/__tests__/e2e/parallel-workflow.test.ts +120 -0
  7. package/src/__tests__/e2e/sequence-workflow.test.ts +109 -0
  8. package/src/__tests__/e2e/setup.ts +156 -0
  9. package/src/__tests__/e2e/single-task.test.ts +170 -0
  10. package/src/app/api/command-palette/recent/route.ts +41 -18
  11. package/src/app/api/context/batch/route.ts +44 -0
  12. package/src/app/api/permissions/presets/route.ts +80 -0
  13. package/src/app/api/playbook/status/route.ts +15 -0
  14. package/src/app/api/profiles/route.ts +23 -21
  15. package/src/app/api/settings/pricing/route.ts +15 -0
  16. package/src/app/costs/page.tsx +53 -43
  17. package/src/app/globals.css +0 -5
  18. package/src/app/playbook/[slug]/page.tsx +76 -0
  19. package/src/app/playbook/page.tsx +54 -0
  20. package/src/app/profiles/page.tsx +7 -4
  21. package/src/app/settings/page.tsx +2 -2
  22. package/src/app/tasks/page.tsx +5 -0
  23. package/src/components/costs/cost-dashboard.tsx +226 -320
  24. package/src/components/dashboard/activity-feed.tsx +6 -2
  25. package/src/components/notifications/batch-proposal-review.tsx +150 -0
  26. package/src/components/notifications/notification-item.tsx +6 -3
  27. package/src/components/notifications/pending-approval-host.tsx +57 -11
  28. package/src/components/playbook/adoption-heatmap.tsx +69 -0
  29. package/src/components/playbook/journey-card.tsx +110 -0
  30. package/src/components/playbook/playbook-action-button.tsx +22 -0
  31. package/src/components/playbook/playbook-browser.tsx +143 -0
  32. package/src/components/playbook/playbook-card.tsx +102 -0
  33. package/src/components/playbook/playbook-detail-view.tsx +223 -0
  34. package/src/components/playbook/playbook-homepage.tsx +142 -0
  35. package/src/components/playbook/playbook-toc.tsx +90 -0
  36. package/src/components/playbook/playbook-updated-badge.tsx +23 -0
  37. package/src/components/playbook/related-docs.tsx +30 -0
  38. package/src/components/profiles/__tests__/learned-context-panel.test.tsx +175 -0
  39. package/src/components/profiles/context-proposal-review.tsx +7 -3
  40. package/src/components/profiles/learned-context-panel.tsx +116 -8
  41. package/src/components/profiles/profile-detail-view.tsx +7 -19
  42. package/src/components/profiles/profile-form-view.tsx +0 -22
  43. package/src/components/settings/__tests__/auth-config-section.test.tsx +147 -0
  44. package/src/components/settings/api-key-form.tsx +5 -43
  45. package/src/components/settings/auth-config-section.tsx +10 -6
  46. package/src/components/settings/auth-status-badge.tsx +8 -0
  47. package/src/components/settings/budget-guardrails-section.tsx +403 -620
  48. package/src/components/settings/connection-test-control.tsx +63 -0
  49. package/src/components/settings/permissions-section.tsx +85 -75
  50. package/src/components/settings/permissions-sections.tsx +24 -0
  51. package/src/components/settings/presets-section.tsx +159 -0
  52. package/src/components/settings/pricing-registry-panel.tsx +164 -0
  53. package/src/components/shared/app-sidebar.tsx +2 -0
  54. package/src/components/shared/command-palette.tsx +30 -0
  55. package/src/components/shared/light-markdown.tsx +134 -0
  56. package/src/components/workflows/loop-status-view.tsx +8 -4
  57. package/src/components/workflows/workflow-status-view.tsx +16 -9
  58. package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
  59. package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
  60. package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
  61. package/src/lib/agents/__tests__/sweep.test.ts +202 -0
  62. package/src/lib/agents/claude-agent.ts +104 -78
  63. package/src/lib/agents/learned-context.ts +32 -28
  64. package/src/lib/agents/learning-session.ts +234 -0
  65. package/src/lib/agents/pattern-extractor.ts +34 -64
  66. package/src/lib/agents/profiles/__tests__/sort.test.ts +42 -0
  67. package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
  68. package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
  69. package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
  70. package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
  71. package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
  72. package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
  73. package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
  74. package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
  75. package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
  76. package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
  77. package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
  78. package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
  79. package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
  80. package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
  81. package/src/lib/agents/profiles/registry.ts +0 -1
  82. package/src/lib/agents/profiles/sort.ts +7 -0
  83. package/src/lib/agents/profiles/types.ts +0 -1
  84. package/src/lib/agents/runtime/catalog.ts +1 -1
  85. package/src/lib/agents/runtime/claude.ts +66 -0
  86. package/src/lib/constants/settings.ts +1 -0
  87. package/src/lib/constants/task-status.ts +6 -0
  88. package/src/lib/data/seed-data/profiles.ts +0 -3
  89. package/src/lib/db/schema.ts +3 -0
  90. package/src/lib/docs/adoption.ts +105 -0
  91. package/src/lib/docs/journey-tracker.ts +21 -0
  92. package/src/lib/docs/reader.ts +102 -0
  93. package/src/lib/docs/types.ts +54 -0
  94. package/src/lib/docs/usage-stage.ts +60 -0
  95. package/src/lib/notifications/actionable.ts +18 -10
  96. package/src/lib/settings/__tests__/budget-guardrails.test.ts +86 -24
  97. package/src/lib/settings/budget-guardrails.ts +213 -85
  98. package/src/lib/settings/permission-presets.ts +150 -0
  99. package/src/lib/settings/runtime-setup.ts +71 -0
  100. package/src/lib/usage/__tests__/ledger.test.ts +29 -5
  101. package/src/lib/usage/__tests__/pricing-registry.test.ts +78 -0
  102. package/src/lib/usage/ledger.ts +4 -2
  103. package/src/lib/usage/pricing-registry.ts +570 -0
  104. package/src/lib/usage/pricing.ts +15 -41
  105. package/src/lib/utils/__tests__/learned-context-history.test.ts +171 -0
  106. package/src/lib/utils/learned-context-history.ts +150 -0
  107. package/src/lib/validators/__tests__/profile.test.ts +0 -15
  108. package/src/lib/validators/__tests__/settings.test.ts +23 -16
  109. package/src/lib/validators/profile.ts +0 -1
  110. package/src/lib/validators/settings.ts +3 -9
  111. package/src/lib/workflows/__tests__/engine.test.ts +2 -0
  112. package/src/lib/workflows/engine.ts +20 -1
package/README.md CHANGED
@@ -12,7 +12,7 @@ npx stagent
12
12
 
13
13
  Open [localhost:3000](http://localhost:3000).
14
14
 
15
- **Profiles & Policies** · **Blueprints & Schedules** · **Open Source**
15
+ **Profiles & Policies** · **Blueprints & Schedules** · **Built-in Playbook** · **Open Source**
16
16
 
17
17
  <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/home-list.png" alt="Stagent home workspace" width="1200" />
18
18
 
@@ -54,7 +54,12 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
54
54
  | 🚨 | **[Ambient Approvals](#ambient-approvals)** | Shell-level approval prompts that keep Inbox as the durable supervision queue |
55
55
  | 🔒 | **[Tool Permissions](#tool-permission-persistence)** | Trusted-tool policies with explicit "Always Allow" rules |
56
56
  | 📋 | **[Kanban Board](#kanban-board-operations)** | Inline editing, bulk operations, and persistent board state |
57
- | 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution *(in progress)* |
57
+ | 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution |
58
+ | 🧬 | **[Agent Self-Improvement](#agent-self-improvement)** | Agents learn patterns from execution history with human-approved context evolution |
59
+ | 🎯 | **[Tool Permission Presets](#tool-permission-presets)** | Pre-configured permission bundles (read-only, git-safe, full-auto) with layered apply/remove |
60
+ | 📦 | **[Workflow Context Batching](#workflow-context-batching)** | Workflow-scoped proposal buffering with batch approve/reject for learned context |
61
+ | 🧪 | **[E2E Test Automation](#e2e-test-automation)** | API-level end-to-end test suite covering both runtimes, 4 profiles, and 4 workflow patterns |
62
+ | 📖 | **[Playbook](#playbook)** | Built-in documentation with usage-stage awareness, adoption heatmap, and guided learning journeys |
58
63
 
59
64
  ---
60
65
 
@@ -71,6 +76,8 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
71
76
  - **Provider runtime abstraction** — Tasks, schedules, workflows, task assist, and health checks route through shared runtime adapters instead of provider-specific entry points
72
77
  - **Reusable agent profiles** — Profiles define instructions, allowed tools, runtime tuning, and MCP configs for repeated use
73
78
  - **Permission pre-check** — Saved "Always Allow" patterns bypass the notification loop for trusted tools
79
+ - **Learned context loop** — Pattern extraction → human approval → versioned context injection creates a supervised self-improvement cycle
80
+ - **Permission presets** — Layered preset bundles (read-only ⊂ git-safe ⊂ full-auto) that compose with individual "Always Allow" patterns
74
81
 
75
82
  ---
76
83
 
@@ -113,7 +120,7 @@ Claude Agent SDK integration with the `canUseTool` polling pattern remains the d
113
120
  OpenAI Codex App Server is integrated as Stagent's second governed runtime. Codex-backed tasks preserve project working directories, document context, resumable thread IDs, inbox approval requests, user questions, and provider-labeled logs. The same runtime can also power task assist, scheduled firings, and workflow child tasks.
114
121
 
115
122
  #### Agent Profiles
116
- Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, runtime tuning, and MCP server configuration so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
123
+ Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, max turns, and output format so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
117
124
 
118
125
  <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/profiles-list.png" alt="Stagent agent profiles" width="1200" />
119
126
 
@@ -143,10 +150,16 @@ AI-powered task creation: generate improved descriptions, break tasks into sub-t
143
150
  | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-empty.png" alt="Empty task creation form" width="380" /> | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-ai-assist.png" alt="AI Assist suggestions panel" width="380" /> | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-ai-applied.png" alt="AI suggestions applied to form" width="380" /> |
144
151
 
145
152
  #### AI Assist → Workflow Creation
146
- *(In progress)* Bridge from AI task assist to workflow engine: when task assist recommends a multi-step plan, a "Create as Workflow" button converts the recommendation into a validated workflow definition with per-step profile assignments, dependency ordering, and pattern selection across all six workflow types. The `WorkflowConfirmationSheet` lets operators review and edit steps, profiles, and configuration before creating the workflow. A keyword-based profile suggestion fallback ensures steps get reasonable profile assignments even without the AI classifier.
153
+ Bridge from AI task assist to workflow engine: when task assist recommends a multi-step plan, a "Create as Workflow" button converts the recommendation into a validated workflow definition with per-step profile assignments, dependency ordering, and pattern selection across all six workflow types. The `WorkflowConfirmationSheet` lets operators review and edit steps, profiles, and configuration before creating the workflow. A keyword-based profile suggestion fallback ensures steps get reasonable profile assignments even without the AI classifier.
147
154
 
148
155
  <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-workflow-confirm.png" alt="Workflow creation from AI Assist" width="1200" />
149
156
 
157
+ #### Agent Self-Improvement
158
+ Agents learn from execution history through a human-approved instruction evolution loop. After each task completion, the pattern extractor analyzes logs and proposes context updates — concise behavioral rules the agent should follow in future runs. Operators approve, reject, or edit proposals before they take effect. Learned context is versioned with rollback support and size-limited summarization to prevent unbounded growth. A sweep agent can audit the codebase for improvement opportunities and create prioritized tasks from its findings.
159
+
160
+ #### Workflow Context Batching
161
+ During workflow execution, the pattern extractor buffers context proposals into a learning session instead of creating individual notifications per proposal. When the workflow completes, all proposals are surfaced as a single batch for review. Operators can approve all, reject all, or review individually — reducing notification noise from multi-step workflows while preserving human oversight. The batch review component integrates into the existing pending approval host.
162
+
150
163
  #### Session Management
151
164
  Resume failed or cancelled agent tasks with one click. Tracks retry counts (limit: 3), detects expired sessions, and provides atomic claim to prevent duplicate runs.
152
165
 
@@ -176,6 +189,11 @@ Automatic text extraction on upload for five file types: text, PDF (pdf-parse),
176
189
  #### Agent Document Context
177
190
  Documents linked to a task are automatically injected into the agent's prompt as context. The context builder aggregates extracted text from all linked documents, giving agents access to uploaded reference material without manual copy-paste.
178
191
 
192
+ ### Knowledge
193
+
194
+ #### Playbook
195
+ Built-in documentation system at `/playbook` with usage-stage awareness that adapts content to your experience level (new, early, active, power user). Browse feature reference docs and guided learning journeys organized by persona (Personal, Work, Power User, Developer). Adoption heatmap tracks which features you've explored, while journey cards show progress through multi-step learning paths. Markdown rendering with automatic internal link resolution, table of contents, related docs, and screengrab embedding.
196
+
179
197
  ### Platform
180
198
 
181
199
  #### Tool Permission Persistence
@@ -184,6 +202,9 @@ Documents linked to a task are automatically injected into the agent's prompt as
184
202
  #### Ambient Approvals
185
203
  Pending permission requests now surface through a shell-level approval presenter on any route, so operators can respond without leaving the page they are working on. Inbox remains the durable queue and source of truth, while the ambient surface provides the fast path for active supervision.
186
204
 
205
+ #### Tool Permission Presets
206
+ Pre-configured permission bundles that reduce friction for common tool approval patterns. Three layered presets — read-only (file reads, glob, grep), git-safe (adds git operations), and full-auto (adds write, edit, bash) — compose with existing "Always Allow" patterns. Presets are layered: enabling git-safe automatically includes read-only patterns; removing git-safe only strips its unique additions. Risk badges indicate the trust level of each preset. Manage presets from the Settings page alongside individual tool permissions.
207
+
187
208
  #### Schedules
188
209
  Time-based scheduling for agent tasks with human-friendly intervals (`5m`, `2h`, `1d`) and raw 5-field cron expressions. One-shot and recurring modes with pause/resume lifecycle, expiry limits, and max firings. Each firing creates a child task through the shared execution pipeline, and schedules can now target a runtime explicitly. Scheduler runs as a poll-based engine started via Next.js instrumentation hook.
189
210
 
@@ -219,10 +240,13 @@ Configuration hub with provider-aware sections: Claude authentication (API key o
219
240
  The `npx stagent` entry point boots a Next.js server from the published npm package. It is built from `bin/cli.ts` into `dist/cli.js` using tsup, and serves as the primary distribution channel — no clone required.
220
241
 
221
242
  #### Database
222
- SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Eight tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`. Self-healing bootstrap — tables are created on startup if missing.
243
+ SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Ten tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`, `learned_context`, `usage_ledger`. Self-healing bootstrap — tables are created on startup if missing.
223
244
 
224
245
  #### App Shell
225
- Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard, Projects, Documents, Workflows, Profiles, Schedules, Inbox, Monitor, Settings.
246
+ Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard, Inbox, Monitor, Projects, Workflows, Documents, Profiles, Schedules, Cost & Usage, Playbook, Settings.
247
+
248
+ #### E2E Test Automation
249
+ API-level end-to-end test suite built on Vitest with 120-second timeouts and sequential execution. Five test files cover single-task execution, sequence workflows, parallel workflows, blueprints, and cross-runtime scenarios across both Claude and Codex backends. Tests skip gracefully when runtimes are not configured, preventing CI failures. Run with `npm run test:e2e`.
226
250
 
227
251
  ---
228
252
 
@@ -251,46 +275,57 @@ npm run dev # Next.js dev server (Turbopack)
251
275
  npm run build:cli # Build CLI → dist/cli.js
252
276
  npm test # Run Vitest
253
277
  npm run test:coverage # Coverage report
278
+ npm run test:e2e # E2E integration tests (requires runtime credentials)
254
279
  ```
255
280
 
256
281
  ### Project Structure
257
282
 
258
283
  ```
284
+ docs/ # Playbook markdown docs + manifest.json
259
285
  src/
260
286
  ├── app/ # Next.js App Router pages
261
- │ ├── dashboard/ # Project overview
287
+ │ ├── dashboard/ # Task kanban board
262
288
  │ ├── projects/[id]/ # Project detail
289
+ │ ├── tasks/ # Task detail + creation (redirects to dashboard)
290
+ │ ├── profiles/ # Agent profile gallery + detail + creation
263
291
  │ ├── documents/ # Document browser
264
- │ ├── workflows/ # Workflow management
292
+ │ ├── workflows/ # Workflow management + blueprints
265
293
  │ ├── schedules/ # Schedule management
294
+ │ ├── costs/ # Cost & usage dashboard
295
+ │ ├── playbook/ # Documentation & learning journeys
266
296
  │ ├── inbox/ # Notifications
267
297
  │ ├── monitor/ # Log streaming
268
298
  │ └── settings/ # Configuration
269
299
  ├── components/
270
300
  │ ├── dashboard/ # Homepage widgets + charts
271
301
  │ ├── tasks/ # Board, cards, panels
272
- │ ├── workflows/ # Workflow UI
302
+ │ ├── profiles/ # Profile gallery, detail, form, learned context
303
+ │ ├── workflows/ # Workflow UI + blueprints + swarm
273
304
  │ ├── documents/ # Document browser + upload
305
+ │ ├── costs/ # Cost dashboard + filters
306
+ │ ├── playbook/ # Playbook docs + journeys + adoption
274
307
  │ ├── schedules/ # Schedule management
275
308
  │ ├── monitoring/ # Log viewer
276
309
  │ ├── notifications/ # Inbox + permission actions
277
- │ ├── settings/ # Auth, permissions, data mgmt
310
+ │ ├── settings/ # Auth, permissions, budgets, data mgmt
278
311
  │ ├── shared/ # App shell, sidebar
279
312
  │ └── ui/ # shadcn/ui primitives
280
313
  └── lib/
281
- ├── agents/ # Runtime adapters, provider integrations, profiles
314
+ ├── agents/ # Runtime adapters, profiles, learned context, pattern extraction
282
315
  ├── db/ # Schema, migrations
316
+ ├── docs/ # Playbook reader, adoption, usage-stage, journey tracker
283
317
  ├── documents/ # Preprocessing + context builder
284
318
  ├── workflows/ # Engine + types + blueprints
285
319
  ├── schedules/ # Scheduler engine + interval parser
286
320
  ├── settings/ # Auth, permissions, helpers
321
+ ├── usage/ # Metering ledger + pricing registry
287
322
  ├── constants/ # Status transitions, colors
288
323
  ├── queries/ # Chart data aggregation
289
324
  ├── validators/ # Zod schemas
290
325
  └── utils/ # Shared helpers
291
326
  ```
292
327
 
293
- ### API Endpoints (48 routes)
328
+ ### API Endpoints (52 routes)
294
329
 
295
330
  | Domain | Endpoint | Method | Purpose |
296
331
  |--------|----------|--------|---------|
@@ -324,7 +359,7 @@ src/
324
359
  | **Profiles** | `/api/profiles` | GET | List agent profiles |
325
360
  | | `/api/profiles/[id]` | GET/PUT/DELETE | Profile CRUD |
326
361
  | | `/api/profiles/[id]/test` | POST | Run behavioral tests on a profile |
327
- | | `/api/profiles/[id]/context` | GET | Profile context for agent execution |
362
+ | | `/api/profiles/[id]/context` | GET/POST/PATCH | Learned context: version history, manual add, approve/reject/rollback |
328
363
  | | `/api/profiles/import` | POST | Import profile from GitHub URL |
329
364
  | **Notifications** | `/api/notifications` | GET/POST | List and create notifications |
330
365
  | | `/api/notifications/[id]` | PATCH/DELETE | Update and delete notification |
@@ -338,7 +373,10 @@ src/
338
373
  | | `/api/settings/test` | POST | Provider-aware runtime connectivity test |
339
374
  | | `/api/settings/budgets` | GET/POST | Budget configuration |
340
375
  | | `/api/permissions` | GET/POST/DELETE | Tool permission patterns |
376
+ | | `/api/permissions/presets` | GET/POST/DELETE | Permission preset bundles |
377
+ | **Context** | `/api/context/batch` | POST | Batch approve/reject context proposals |
341
378
  | **Monitoring** | `/api/logs/stream` | GET | SSE agent log stream |
379
+ | **Playbook** | `/api/playbook/status` | GET | Playbook adoption status and usage stage |
342
380
  | **Platform** | `/api/command-palette/recent` | GET | Recent command palette items |
343
381
  | | `/api/data/clear` | POST | Clear all data |
344
382
  | | `/api/data/seed` | POST | Seed sample data |
@@ -357,7 +395,7 @@ All 14 features shipped across three layers:
357
395
  | **Core** | Project management, task board, agent integration, inbox notifications, monitoring dashboard |
358
396
  | **Polish** | Homepage dashboard, UX fixes, workflow engine, AI task assist, content handling, session management |
359
397
 
360
- ### Post-MVP — Complete (25 features)
398
+ ### Post-MVP — Complete (31 features)
361
399
 
362
400
  | Category | Feature | What shipped |
363
401
  |----------|---------|-------------|
@@ -369,6 +407,9 @@ All 14 features shipped across three layers:
369
407
  | **Agent Intelligence** | Multi-Agent Routing | Profile registry (4 profiles), task classifier, per-step profile assignment |
370
408
  | | Autonomous Loop Execution | 4 stop conditions, iteration context chaining, pause/resume, loop status view |
371
409
  | | Multi-Agent Swarm | Mayor → worker pool → refinery orchestration with retryable stages |
410
+ | | AI Assist → Workflows | Bridge task assist into workflow engine with profile assignment and pattern selection |
411
+ | | Agent Self-Improvement | Pattern extraction from logs, human-approved context evolution, versioned rollback |
412
+ | | Workflow Context Batching | Workflow-scoped proposal buffering with batch approve/reject |
372
413
  | **Agent Profiles** | Agent Profile Catalog | 13 domain-specific profiles, GitHub import, behavioral testing, MCP passthrough |
373
414
  | | Workflow Blueprints | 8 templates, gallery, YAML editor, dynamic forms, GitHub import, lineage tracking |
374
415
  | **UI Enhancement** | Ambient Approvals | Shell-level approval presenter on any route for fast supervision |
@@ -382,27 +423,17 @@ All 14 features shipped across three layers:
382
423
  | | Board Context Persistence | Persisted filters, sort order, and project selection across sessions |
383
424
  | **Platform** | Scheduled Prompt Loops | Cron + human-friendly intervals, one-shot/recurring, pause/resume lifecycle |
384
425
  | | Tool Permission Persistence | "Always Allow" patterns, pre-check bypass, Settings management |
426
+ | | Tool Permission Presets | 3 layered presets (read-only, git-safe, full-auto) with risk badges |
385
427
  | | Provider Runtimes | Shared runtime registry with Claude Code and OpenAI Codex App Server adapters |
386
428
  | | OpenAI Codex Runtime | Codex App Server integration with inbox approvals, logs, and thread resumption |
387
- | | npm Publish Readiness | `npx stagent` distribution channel with CLI bundling and package config |
388
429
  | | Cross-Provider Profiles | Profile compatibility layer ensuring profiles work across Claude and Codex runtimes |
389
430
  | | Parallel Fork/Join | 2-5 concurrent research branches with synthesis step |
431
+ | **Runtime Quality** | E2E Test Automation | API-level test suite covering both runtimes, 4 profiles, 4 workflow patterns |
432
+ | **Knowledge** | Playbook | Built-in documentation with usage-stage awareness, adoption heatmap, guided learning journeys |
390
433
  | **Governance** | Usage Metering Ledger | Provider-normalized token and spend tracking across all execution paths |
391
434
  | | Spend Budget Guardrails | Per-project and global budgets with enforcement and alerts |
392
435
  | | Cost & Usage Dashboard | Summary cards, trend views, provider/model breakdowns, budget audit visibility |
393
436
 
394
- ### In Progress
395
-
396
- | Feature | Description |
397
- |---------|-------------|
398
- | **AI Assist → Workflow Creation** | Bridge AI task assist recommendations into the workflow engine with profile assignment and pattern selection |
399
-
400
- ### Planned
401
-
402
- | Feature | Description |
403
- |---------|-------------|
404
- | **Agent Self-Improvement** | Agents learn patterns and update context with human approval |
405
-
406
437
  ---
407
438
 
408
439
  ## Contributing
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stagent",
3
- "version": "0.1.10",
3
+ "version": "0.1.12",
4
4
  "description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
5
5
  "keywords": [
6
6
  "ai",
@@ -35,7 +35,7 @@
35
35
  "bugs": {
36
36
  "url": "https://github.com/navam-io/stagent/issues"
37
37
  },
38
- "homepage": "https://github.com/navam-io/stagent#readme",
38
+ "homepage": "https://stagent.io",
39
39
  "scripts": {
40
40
  "dev": "next dev --turbopack",
41
41
  "build": "next build",
@@ -43,6 +43,7 @@
43
43
  "test": "vitest run",
44
44
  "test:watch": "vitest",
45
45
  "test:coverage": "vitest run --coverage",
46
+ "test:e2e": "vitest run --config vitest.config.e2e.ts",
46
47
  "test:ui": "vitest --ui",
47
48
  "prepublishOnly": "npm run build:cli"
48
49
  },
@@ -51,7 +52,6 @@
51
52
  },
52
53
  "dependencies": {
53
54
  "@anthropic-ai/claude-agent-sdk": "^0.2.71",
54
- "@anthropic-ai/sdk": "^0.78.0",
55
55
  "@dnd-kit/core": "^6.3.1",
56
56
  "@dnd-kit/sortable": "^10.0.0",
57
57
  "@dnd-kit/utilities": "^3.2.2",
@@ -0,0 +1,63 @@
1
+ /**
2
+ * E2E: Blueprint instantiation and execution.
3
+ *
4
+ * Tests that blueprints can be listed, instantiated with variables,
5
+ * and executed as workflows with variable resolution.
6
+ */
7
+
8
+ import {
9
+ setupE2E,
10
+ teardownE2E,
11
+ testProjectId,
12
+ claudeAvailable,
13
+ } from "./setup";
14
+ import {
15
+ listBlueprints,
16
+ instantiateBlueprint,
17
+ executeWorkflow,
18
+ pollWorkflowUntilDone,
19
+ } from "./helpers";
20
+
21
+ beforeAll(async () => {
22
+ await setupE2E();
23
+ });
24
+
25
+ afterAll(async () => {
26
+ await teardownE2E();
27
+ });
28
+
29
+ describe("Blueprint — Gallery & Instantiation", () => {
30
+ it("lists available blueprints", async () => {
31
+ const { ok, data } = await listBlueprints();
32
+ expect(ok).toBe(true);
33
+ expect(Array.isArray(data)).toBe(true);
34
+ expect(data!.length).toBeGreaterThan(0);
35
+ });
36
+
37
+ it.skipIf(!claudeAvailable)(
38
+ "instantiates and executes documentation-generation blueprint",
39
+ async () => {
40
+ // Instantiate with variables
41
+ const { ok: instOk, data: instData } = await instantiateBlueprint(
42
+ "documentation-generation",
43
+ {
44
+ target: "src/index.ts and src/utils.ts",
45
+ docType: "API Documentation",
46
+ },
47
+ testProjectId
48
+ );
49
+ expect(instOk).toBe(true);
50
+
51
+ const workflow = instData?.workflow;
52
+ expect(workflow).toBeTruthy();
53
+ expect(workflow!.status).toBe("draft");
54
+
55
+ // Execute the instantiated workflow
56
+ const exec = await executeWorkflow(workflow!.id);
57
+ expect(exec.status).toBe(202);
58
+
59
+ const result = await pollWorkflowUntilDone(workflow!.id);
60
+ expect(result.status).toBe("completed");
61
+ }
62
+ );
63
+ });
@@ -0,0 +1,77 @@
1
+ /**
2
+ * E2E: Cross-runtime comparison.
3
+ *
4
+ * Tests that the same task produces valid results on both Claude Code
5
+ * and Codex runtimes, verifying runtime parity.
6
+ */
7
+
8
+ import {
9
+ setupE2E,
10
+ teardownE2E,
11
+ testProjectId,
12
+ claudeAvailable,
13
+ codexAvailable,
14
+ } from "./setup";
15
+ import {
16
+ createTask,
17
+ executeTask,
18
+ pollTaskUntilDone,
19
+ updateTask,
20
+ } from "./helpers";
21
+
22
+ beforeAll(async () => {
23
+ await setupE2E();
24
+ });
25
+
26
+ afterAll(async () => {
27
+ await teardownE2E();
28
+ });
29
+
30
+ describe("Cross-Runtime Comparison", () => {
31
+ const bothAvailable = () => claudeAvailable && codexAvailable;
32
+
33
+ it.skipIf(!bothAvailable())(
34
+ "same task produces valid results on both runtimes",
35
+ async () => {
36
+ const taskPrompt =
37
+ "Describe the TypeScript code in src/index.ts. List the exported functions and any bugs.";
38
+
39
+ // Create and execute on Claude
40
+ const { data: claudeTask } = await createTask({
41
+ title: "Cross-runtime test (Claude)",
42
+ description: taskPrompt,
43
+ projectId: testProjectId,
44
+ agentProfile: "general",
45
+ });
46
+ await updateTask(claudeTask!.id, { status: "queued" });
47
+ await executeTask(claudeTask!.id);
48
+
49
+ // Create and execute on Codex
50
+ const { data: codexTask } = await createTask({
51
+ title: "Cross-runtime test (Codex)",
52
+ description: taskPrompt,
53
+ projectId: testProjectId,
54
+ assignedAgent: "codex",
55
+ agentProfile: "general",
56
+ });
57
+ await updateTask(codexTask!.id, { status: "queued" });
58
+ await executeTask(codexTask!.id);
59
+
60
+ // Wait for both
61
+ const [claudeResult, codexResult] = await Promise.all([
62
+ pollTaskUntilDone(claudeTask!.id),
63
+ pollTaskUntilDone(codexTask!.id),
64
+ ]);
65
+
66
+ // Both should complete
67
+ expect(claudeResult.status).toBe("completed");
68
+ expect(codexResult.status).toBe("completed");
69
+
70
+ // Both should produce non-empty results
71
+ expect(claudeResult.result).toBeTruthy();
72
+ expect(codexResult.result).toBeTruthy();
73
+ expect(claudeResult.result!.length).toBeGreaterThan(50);
74
+ expect(codexResult.result!.length).toBeGreaterThan(50);
75
+ }
76
+ );
77
+ });