stagent 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -4
- package/package.json +3 -2
- package/src/__tests__/e2e/blueprint.test.ts +63 -0
- package/src/__tests__/e2e/cross-runtime.test.ts +77 -0
- package/src/__tests__/e2e/helpers.ts +286 -0
- package/src/__tests__/e2e/parallel-workflow.test.ts +120 -0
- package/src/__tests__/e2e/sequence-workflow.test.ts +109 -0
- package/src/__tests__/e2e/setup.ts +156 -0
- package/src/__tests__/e2e/single-task.test.ts +170 -0
- package/src/app/api/command-palette/recent/route.ts +41 -18
- package/src/app/api/context/batch/route.ts +44 -0
- package/src/app/api/permissions/presets/route.ts +80 -0
- package/src/app/api/playbook/status/route.ts +15 -0
- package/src/app/api/profiles/route.ts +23 -20
- package/src/app/api/settings/pricing/route.ts +15 -0
- package/src/app/costs/page.tsx +53 -43
- package/src/app/playbook/[slug]/page.tsx +76 -0
- package/src/app/playbook/page.tsx +54 -0
- package/src/app/profiles/page.tsx +7 -4
- package/src/app/settings/page.tsx +2 -2
- package/src/components/costs/cost-dashboard.tsx +226 -320
- package/src/components/dashboard/activity-feed.tsx +6 -2
- package/src/components/notifications/batch-proposal-review.tsx +150 -0
- package/src/components/notifications/notification-item.tsx +6 -3
- package/src/components/notifications/pending-approval-host.tsx +57 -11
- package/src/components/playbook/adoption-heatmap.tsx +69 -0
- package/src/components/playbook/journey-card.tsx +110 -0
- package/src/components/playbook/playbook-action-button.tsx +22 -0
- package/src/components/playbook/playbook-browser.tsx +143 -0
- package/src/components/playbook/playbook-card.tsx +102 -0
- package/src/components/playbook/playbook-detail-view.tsx +223 -0
- package/src/components/playbook/playbook-homepage.tsx +142 -0
- package/src/components/playbook/playbook-toc.tsx +90 -0
- package/src/components/playbook/playbook-updated-badge.tsx +23 -0
- package/src/components/playbook/related-docs.tsx +30 -0
- package/src/components/profiles/__tests__/learned-context-panel.test.tsx +175 -0
- package/src/components/profiles/context-proposal-review.tsx +7 -3
- package/src/components/profiles/learned-context-panel.tsx +116 -8
- package/src/components/profiles/profile-detail-view.tsx +6 -3
- package/src/components/settings/__tests__/auth-config-section.test.tsx +147 -0
- package/src/components/settings/api-key-form.tsx +5 -43
- package/src/components/settings/auth-config-section.tsx +10 -6
- package/src/components/settings/auth-status-badge.tsx +8 -0
- package/src/components/settings/budget-guardrails-section.tsx +403 -620
- package/src/components/settings/connection-test-control.tsx +63 -0
- package/src/components/settings/permissions-section.tsx +85 -75
- package/src/components/settings/permissions-sections.tsx +24 -0
- package/src/components/settings/presets-section.tsx +159 -0
- package/src/components/settings/pricing-registry-panel.tsx +164 -0
- package/src/components/shared/app-sidebar.tsx +2 -0
- package/src/components/shared/command-palette.tsx +30 -0
- package/src/components/shared/light-markdown.tsx +134 -0
- package/src/components/workflows/loop-status-view.tsx +8 -4
- package/src/components/workflows/workflow-status-view.tsx +16 -9
- package/src/lib/agents/learned-context.ts +27 -15
- package/src/lib/agents/learning-session.ts +234 -0
- package/src/lib/agents/pattern-extractor.ts +19 -0
- package/src/lib/agents/profiles/__tests__/sort.test.ts +42 -0
- package/src/lib/agents/profiles/sort.ts +7 -0
- package/src/lib/constants/settings.ts +1 -0
- package/src/lib/db/schema.ts +3 -0
- package/src/lib/docs/adoption.ts +105 -0
- package/src/lib/docs/journey-tracker.ts +21 -0
- package/src/lib/docs/reader.ts +102 -0
- package/src/lib/docs/types.ts +54 -0
- package/src/lib/docs/usage-stage.ts +60 -0
- package/src/lib/notifications/actionable.ts +18 -10
- package/src/lib/settings/__tests__/budget-guardrails.test.ts +86 -24
- package/src/lib/settings/budget-guardrails.ts +213 -85
- package/src/lib/settings/permission-presets.ts +150 -0
- package/src/lib/settings/runtime-setup.ts +71 -0
- package/src/lib/usage/__tests__/ledger.test.ts +2 -2
- package/src/lib/usage/__tests__/pricing-registry.test.ts +78 -0
- package/src/lib/usage/ledger.ts +1 -1
- package/src/lib/usage/pricing-registry.ts +570 -0
- package/src/lib/usage/pricing.ts +15 -95
- package/src/lib/utils/__tests__/learned-context-history.test.ts +171 -0
- package/src/lib/utils/learned-context-history.ts +150 -0
- package/src/lib/validators/__tests__/settings.test.ts +23 -16
- package/src/lib/validators/settings.ts +3 -9
- package/src/lib/workflows/engine.ts +18 -0
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@ npx stagent
|
|
|
12
12
|
|
|
13
13
|
Open [localhost:3000](http://localhost:3000).
|
|
14
14
|
|
|
15
|
-
**Profiles & Policies** · **Blueprints & Schedules** · **Open Source**
|
|
15
|
+
**Profiles & Policies** · **Blueprints & Schedules** · **Built-in Playbook** · **Open Source**
|
|
16
16
|
|
|
17
17
|
<img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/home-list.png" alt="Stagent home workspace" width="1200" />
|
|
18
18
|
|
|
@@ -56,6 +56,10 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
|
|
|
56
56
|
| 📋 | **[Kanban Board](#kanban-board-operations)** | Inline editing, bulk operations, and persistent board state |
|
|
57
57
|
| 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution |
|
|
58
58
|
| 🧬 | **[Agent Self-Improvement](#agent-self-improvement)** | Agents learn patterns from execution history with human-approved context evolution |
|
|
59
|
+
| 🎯 | **[Tool Permission Presets](#tool-permission-presets)** | Pre-configured permission bundles (read-only, git-safe, full-auto) with layered apply/remove |
|
|
60
|
+
| 📦 | **[Workflow Context Batching](#workflow-context-batching)** | Workflow-scoped proposal buffering with batch approve/reject for learned context |
|
|
61
|
+
| 🧪 | **[E2E Test Automation](#e2e-test-automation)** | API-level end-to-end test suite covering both runtimes, 4 profiles, and 4 workflow patterns |
|
|
62
|
+
| 📖 | **[Playbook](#playbook)** | Built-in documentation with usage-stage awareness, adoption heatmap, and guided learning journeys |
|
|
59
63
|
|
|
60
64
|
---
|
|
61
65
|
|
|
@@ -73,6 +77,7 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
|
|
|
73
77
|
- **Reusable agent profiles** — Profiles define instructions, allowed tools, runtime tuning, and MCP configs for repeated use
|
|
74
78
|
- **Permission pre-check** — Saved "Always Allow" patterns bypass the notification loop for trusted tools
|
|
75
79
|
- **Learned context loop** — Pattern extraction → human approval → versioned context injection creates a supervised self-improvement cycle
|
|
80
|
+
- **Permission presets** — Layered preset bundles (read-only ⊂ git-safe ⊂ full-auto) that compose with individual "Always Allow" patterns
|
|
76
81
|
|
|
77
82
|
---
|
|
78
83
|
|
|
@@ -152,6 +157,9 @@ Bridge from AI task assist to workflow engine: when task assist recommends a mul
|
|
|
152
157
|
#### Agent Self-Improvement
|
|
153
158
|
Agents learn from execution history through a human-approved instruction evolution loop. After each task completion, the pattern extractor analyzes logs and proposes context updates — concise behavioral rules the agent should follow in future runs. Operators approve, reject, or edit proposals before they take effect. Learned context is versioned with rollback support and size-limited summarization to prevent unbounded growth. A sweep agent can audit the codebase for improvement opportunities and create prioritized tasks from its findings.
|
|
154
159
|
|
|
160
|
+
#### Workflow Context Batching
|
|
161
|
+
During workflow execution, the pattern extractor buffers context proposals into a learning session instead of creating individual notifications per proposal. When the workflow completes, all proposals are surfaced as a single batch for review. Operators can approve all, reject all, or review individually — reducing notification noise from multi-step workflows while preserving human oversight. The batch review component integrates into the existing pending approval host.
|
|
162
|
+
|
|
155
163
|
#### Session Management
|
|
156
164
|
Resume failed or cancelled agent tasks with one click. Tracks retry counts (limit: 3), detects expired sessions, and provides atomic claim to prevent duplicate runs.
|
|
157
165
|
|
|
@@ -181,6 +189,11 @@ Automatic text extraction on upload for five file types: text, PDF (pdf-parse),
|
|
|
181
189
|
#### Agent Document Context
|
|
182
190
|
Documents linked to a task are automatically injected into the agent's prompt as context. The context builder aggregates extracted text from all linked documents, giving agents access to uploaded reference material without manual copy-paste.
|
|
183
191
|
|
|
192
|
+
### Knowledge
|
|
193
|
+
|
|
194
|
+
#### Playbook
|
|
195
|
+
Built-in documentation system at `/playbook` with usage-stage awareness that adapts content to your experience level (new, early, active, power user). Browse feature reference docs and guided learning journeys organized by persona (Personal, Work, Power User, Developer). Adoption heatmap tracks which features you've explored, while journey cards show progress through multi-step learning paths. Markdown rendering with automatic internal link resolution, table of contents, related docs, and screengrab embedding.
|
|
196
|
+
|
|
184
197
|
### Platform
|
|
185
198
|
|
|
186
199
|
#### Tool Permission Persistence
|
|
@@ -189,6 +202,9 @@ Documents linked to a task are automatically injected into the agent's prompt as
|
|
|
189
202
|
#### Ambient Approvals
|
|
190
203
|
Pending permission requests now surface through a shell-level approval presenter on any route, so operators can respond without leaving the page they are working on. Inbox remains the durable queue and source of truth, while the ambient surface provides the fast path for active supervision.
|
|
191
204
|
|
|
205
|
+
#### Tool Permission Presets
|
|
206
|
+
Pre-configured permission bundles that reduce friction for common tool approval patterns. Three layered presets — read-only (file reads, glob, grep), git-safe (adds git operations), and full-auto (adds write, edit, bash) — compose with existing "Always Allow" patterns. Presets are layered: enabling git-safe automatically includes read-only patterns; removing git-safe only strips its unique additions. Risk badges indicate the trust level of each preset. Manage presets from the Settings page alongside individual tool permissions.
|
|
207
|
+
|
|
192
208
|
#### Schedules
|
|
193
209
|
Time-based scheduling for agent tasks with human-friendly intervals (`5m`, `2h`, `1d`) and raw 5-field cron expressions. One-shot and recurring modes with pause/resume lifecycle, expiry limits, and max firings. Each firing creates a child task through the shared execution pipeline, and schedules can now target a runtime explicitly. Scheduler runs as a poll-based engine started via Next.js instrumentation hook.
|
|
194
210
|
|
|
@@ -227,7 +243,10 @@ The `npx stagent` entry point boots a Next.js server from the published npm pack
|
|
|
227
243
|
SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Ten tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`, `learned_context`, `usage_ledger`. Self-healing bootstrap — tables are created on startup if missing.
|
|
228
244
|
|
|
229
245
|
#### App Shell
|
|
230
|
-
Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard,
|
|
246
|
+
Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard, Inbox, Monitor, Projects, Workflows, Documents, Profiles, Schedules, Cost & Usage, Playbook, Settings.
|
|
247
|
+
|
|
248
|
+
#### E2E Test Automation
|
|
249
|
+
API-level end-to-end test suite built on Vitest with 120-second timeouts and sequential execution. Five test files cover single-task execution, sequence workflows, parallel workflows, blueprints, and cross-runtime scenarios across both Claude and Codex backends. Tests skip gracefully when runtimes are not configured, preventing CI failures. Run with `npm run test:e2e`.
|
|
231
250
|
|
|
232
251
|
---
|
|
233
252
|
|
|
@@ -256,11 +275,13 @@ npm run dev # Next.js dev server (Turbopack)
|
|
|
256
275
|
npm run build:cli # Build CLI → dist/cli.js
|
|
257
276
|
npm test # Run Vitest
|
|
258
277
|
npm run test:coverage # Coverage report
|
|
278
|
+
npm run test:e2e # E2E integration tests (requires runtime credentials)
|
|
259
279
|
```
|
|
260
280
|
|
|
261
281
|
### Project Structure
|
|
262
282
|
|
|
263
283
|
```
|
|
284
|
+
docs/ # Playbook markdown docs + manifest.json
|
|
264
285
|
src/
|
|
265
286
|
├── app/ # Next.js App Router pages
|
|
266
287
|
│ ├── dashboard/ # Task kanban board
|
|
@@ -271,6 +292,7 @@ src/
|
|
|
271
292
|
│ ├── workflows/ # Workflow management + blueprints
|
|
272
293
|
│ ├── schedules/ # Schedule management
|
|
273
294
|
│ ├── costs/ # Cost & usage dashboard
|
|
295
|
+
│ ├── playbook/ # Documentation & learning journeys
|
|
274
296
|
│ ├── inbox/ # Notifications
|
|
275
297
|
│ ├── monitor/ # Log streaming
|
|
276
298
|
│ └── settings/ # Configuration
|
|
@@ -281,6 +303,7 @@ src/
|
|
|
281
303
|
│ ├── workflows/ # Workflow UI + blueprints + swarm
|
|
282
304
|
│ ├── documents/ # Document browser + upload
|
|
283
305
|
│ ├── costs/ # Cost dashboard + filters
|
|
306
|
+
│ ├── playbook/ # Playbook docs + journeys + adoption
|
|
284
307
|
│ ├── schedules/ # Schedule management
|
|
285
308
|
│ ├── monitoring/ # Log viewer
|
|
286
309
|
│ ├── notifications/ # Inbox + permission actions
|
|
@@ -290,6 +313,7 @@ src/
|
|
|
290
313
|
└── lib/
|
|
291
314
|
├── agents/ # Runtime adapters, profiles, learned context, pattern extraction
|
|
292
315
|
├── db/ # Schema, migrations
|
|
316
|
+
├── docs/ # Playbook reader, adoption, usage-stage, journey tracker
|
|
293
317
|
├── documents/ # Preprocessing + context builder
|
|
294
318
|
├── workflows/ # Engine + types + blueprints
|
|
295
319
|
├── schedules/ # Scheduler engine + interval parser
|
|
@@ -301,7 +325,7 @@ src/
|
|
|
301
325
|
└── utils/ # Shared helpers
|
|
302
326
|
```
|
|
303
327
|
|
|
304
|
-
### API Endpoints (
|
|
328
|
+
### API Endpoints (52 routes)
|
|
305
329
|
|
|
306
330
|
| Domain | Endpoint | Method | Purpose |
|
|
307
331
|
|--------|----------|--------|---------|
|
|
@@ -349,7 +373,10 @@ src/
|
|
|
349
373
|
| | `/api/settings/test` | POST | Provider-aware runtime connectivity test |
|
|
350
374
|
| | `/api/settings/budgets` | GET/POST | Budget configuration |
|
|
351
375
|
| | `/api/permissions` | GET/POST/DELETE | Tool permission patterns |
|
|
376
|
+
| | `/api/permissions/presets` | GET/POST/DELETE | Permission preset bundles |
|
|
377
|
+
| **Context** | `/api/context/batch` | POST | Batch approve/reject context proposals |
|
|
352
378
|
| **Monitoring** | `/api/logs/stream` | GET | SSE agent log stream |
|
|
379
|
+
| **Playbook** | `/api/playbook/status` | GET | Playbook adoption status and usage stage |
|
|
353
380
|
| **Platform** | `/api/command-palette/recent` | GET | Recent command palette items |
|
|
354
381
|
| | `/api/data/clear` | POST | Clear all data |
|
|
355
382
|
| | `/api/data/seed` | POST | Seed sample data |
|
|
@@ -368,7 +395,7 @@ All 14 features shipped across three layers:
|
|
|
368
395
|
| **Core** | Project management, task board, agent integration, inbox notifications, monitoring dashboard |
|
|
369
396
|
| **Polish** | Homepage dashboard, UX fixes, workflow engine, AI task assist, content handling, session management |
|
|
370
397
|
|
|
371
|
-
### Post-MVP — Complete (
|
|
398
|
+
### Post-MVP — Complete (31 features)
|
|
372
399
|
|
|
373
400
|
| Category | Feature | What shipped |
|
|
374
401
|
|----------|---------|-------------|
|
|
@@ -382,6 +409,7 @@ All 14 features shipped across three layers:
|
|
|
382
409
|
| | Multi-Agent Swarm | Mayor → worker pool → refinery orchestration with retryable stages |
|
|
383
410
|
| | AI Assist → Workflows | Bridge task assist into workflow engine with profile assignment and pattern selection |
|
|
384
411
|
| | Agent Self-Improvement | Pattern extraction from logs, human-approved context evolution, versioned rollback |
|
|
412
|
+
| | Workflow Context Batching | Workflow-scoped proposal buffering with batch approve/reject |
|
|
385
413
|
| **Agent Profiles** | Agent Profile Catalog | 13 domain-specific profiles, GitHub import, behavioral testing, MCP passthrough |
|
|
386
414
|
| | Workflow Blueprints | 8 templates, gallery, YAML editor, dynamic forms, GitHub import, lineage tracking |
|
|
387
415
|
| **UI Enhancement** | Ambient Approvals | Shell-level approval presenter on any route for fast supervision |
|
|
@@ -395,10 +423,13 @@ All 14 features shipped across three layers:
|
|
|
395
423
|
| | Board Context Persistence | Persisted filters, sort order, and project selection across sessions |
|
|
396
424
|
| **Platform** | Scheduled Prompt Loops | Cron + human-friendly intervals, one-shot/recurring, pause/resume lifecycle |
|
|
397
425
|
| | Tool Permission Persistence | "Always Allow" patterns, pre-check bypass, Settings management |
|
|
426
|
+
| | Tool Permission Presets | 3 layered presets (read-only, git-safe, full-auto) with risk badges |
|
|
398
427
|
| | Provider Runtimes | Shared runtime registry with Claude Code and OpenAI Codex App Server adapters |
|
|
399
428
|
| | OpenAI Codex Runtime | Codex App Server integration with inbox approvals, logs, and thread resumption |
|
|
400
429
|
| | Cross-Provider Profiles | Profile compatibility layer ensuring profiles work across Claude and Codex runtimes |
|
|
401
430
|
| | Parallel Fork/Join | 2-5 concurrent research branches with synthesis step |
|
|
431
|
+
| **Runtime Quality** | E2E Test Automation | API-level test suite covering both runtimes, 4 profiles, 4 workflow patterns |
|
|
432
|
+
| **Knowledge** | Playbook | Built-in documentation with usage-stage awareness, adoption heatmap, guided learning journeys |
|
|
402
433
|
| **Governance** | Usage Metering Ledger | Provider-normalized token and spend tracking across all execution paths |
|
|
403
434
|
| | Spend Budget Guardrails | Per-project and global budgets with enforcement and alerts |
|
|
404
435
|
| | Cost & Usage Dashboard | Summary cards, trend views, provider/model breakdowns, budget audit visibility |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stagent",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.12",
|
|
4
4
|
"description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"bugs": {
|
|
36
36
|
"url": "https://github.com/navam-io/stagent/issues"
|
|
37
37
|
},
|
|
38
|
-
"homepage": "https://
|
|
38
|
+
"homepage": "https://stagent.io",
|
|
39
39
|
"scripts": {
|
|
40
40
|
"dev": "next dev --turbopack",
|
|
41
41
|
"build": "next build",
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"test": "vitest run",
|
|
44
44
|
"test:watch": "vitest",
|
|
45
45
|
"test:coverage": "vitest run --coverage",
|
|
46
|
+
"test:e2e": "vitest run --config vitest.config.e2e.ts",
|
|
46
47
|
"test:ui": "vitest --ui",
|
|
47
48
|
"prepublishOnly": "npm run build:cli"
|
|
48
49
|
},
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E: Blueprint instantiation and execution.
|
|
3
|
+
*
|
|
4
|
+
* Tests that blueprints can be listed, instantiated with variables,
|
|
5
|
+
* and executed as workflows with variable resolution.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
setupE2E,
|
|
10
|
+
teardownE2E,
|
|
11
|
+
testProjectId,
|
|
12
|
+
claudeAvailable,
|
|
13
|
+
} from "./setup";
|
|
14
|
+
import {
|
|
15
|
+
listBlueprints,
|
|
16
|
+
instantiateBlueprint,
|
|
17
|
+
executeWorkflow,
|
|
18
|
+
pollWorkflowUntilDone,
|
|
19
|
+
} from "./helpers";
|
|
20
|
+
|
|
21
|
+
beforeAll(async () => {
|
|
22
|
+
await setupE2E();
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
afterAll(async () => {
|
|
26
|
+
await teardownE2E();
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe("Blueprint — Gallery & Instantiation", () => {
|
|
30
|
+
it("lists available blueprints", async () => {
|
|
31
|
+
const { ok, data } = await listBlueprints();
|
|
32
|
+
expect(ok).toBe(true);
|
|
33
|
+
expect(Array.isArray(data)).toBe(true);
|
|
34
|
+
expect(data!.length).toBeGreaterThan(0);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it.skipIf(!claudeAvailable)(
|
|
38
|
+
"instantiates and executes documentation-generation blueprint",
|
|
39
|
+
async () => {
|
|
40
|
+
// Instantiate with variables
|
|
41
|
+
const { ok: instOk, data: instData } = await instantiateBlueprint(
|
|
42
|
+
"documentation-generation",
|
|
43
|
+
{
|
|
44
|
+
target: "src/index.ts and src/utils.ts",
|
|
45
|
+
docType: "API Documentation",
|
|
46
|
+
},
|
|
47
|
+
testProjectId
|
|
48
|
+
);
|
|
49
|
+
expect(instOk).toBe(true);
|
|
50
|
+
|
|
51
|
+
const workflow = instData?.workflow;
|
|
52
|
+
expect(workflow).toBeTruthy();
|
|
53
|
+
expect(workflow!.status).toBe("draft");
|
|
54
|
+
|
|
55
|
+
// Execute the instantiated workflow
|
|
56
|
+
const exec = await executeWorkflow(workflow!.id);
|
|
57
|
+
expect(exec.status).toBe(202);
|
|
58
|
+
|
|
59
|
+
const result = await pollWorkflowUntilDone(workflow!.id);
|
|
60
|
+
expect(result.status).toBe("completed");
|
|
61
|
+
}
|
|
62
|
+
);
|
|
63
|
+
});
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E: Cross-runtime comparison.
|
|
3
|
+
*
|
|
4
|
+
* Tests that the same task produces valid results on both Claude Code
|
|
5
|
+
* and Codex runtimes, verifying runtime parity.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
setupE2E,
|
|
10
|
+
teardownE2E,
|
|
11
|
+
testProjectId,
|
|
12
|
+
claudeAvailable,
|
|
13
|
+
codexAvailable,
|
|
14
|
+
} from "./setup";
|
|
15
|
+
import {
|
|
16
|
+
createTask,
|
|
17
|
+
executeTask,
|
|
18
|
+
pollTaskUntilDone,
|
|
19
|
+
updateTask,
|
|
20
|
+
} from "./helpers";
|
|
21
|
+
|
|
22
|
+
beforeAll(async () => {
|
|
23
|
+
await setupE2E();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterAll(async () => {
|
|
27
|
+
await teardownE2E();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe("Cross-Runtime Comparison", () => {
|
|
31
|
+
const bothAvailable = () => claudeAvailable && codexAvailable;
|
|
32
|
+
|
|
33
|
+
it.skipIf(!bothAvailable())(
|
|
34
|
+
"same task produces valid results on both runtimes",
|
|
35
|
+
async () => {
|
|
36
|
+
const taskPrompt =
|
|
37
|
+
"Describe the TypeScript code in src/index.ts. List the exported functions and any bugs.";
|
|
38
|
+
|
|
39
|
+
// Create and execute on Claude
|
|
40
|
+
const { data: claudeTask } = await createTask({
|
|
41
|
+
title: "Cross-runtime test (Claude)",
|
|
42
|
+
description: taskPrompt,
|
|
43
|
+
projectId: testProjectId,
|
|
44
|
+
agentProfile: "general",
|
|
45
|
+
});
|
|
46
|
+
await updateTask(claudeTask!.id, { status: "queued" });
|
|
47
|
+
await executeTask(claudeTask!.id);
|
|
48
|
+
|
|
49
|
+
// Create and execute on Codex
|
|
50
|
+
const { data: codexTask } = await createTask({
|
|
51
|
+
title: "Cross-runtime test (Codex)",
|
|
52
|
+
description: taskPrompt,
|
|
53
|
+
projectId: testProjectId,
|
|
54
|
+
assignedAgent: "codex",
|
|
55
|
+
agentProfile: "general",
|
|
56
|
+
});
|
|
57
|
+
await updateTask(codexTask!.id, { status: "queued" });
|
|
58
|
+
await executeTask(codexTask!.id);
|
|
59
|
+
|
|
60
|
+
// Wait for both
|
|
61
|
+
const [claudeResult, codexResult] = await Promise.all([
|
|
62
|
+
pollTaskUntilDone(claudeTask!.id),
|
|
63
|
+
pollTaskUntilDone(codexTask!.id),
|
|
64
|
+
]);
|
|
65
|
+
|
|
66
|
+
// Both should complete
|
|
67
|
+
expect(claudeResult.status).toBe("completed");
|
|
68
|
+
expect(codexResult.status).toBe("completed");
|
|
69
|
+
|
|
70
|
+
// Both should produce non-empty results
|
|
71
|
+
expect(claudeResult.result).toBeTruthy();
|
|
72
|
+
expect(codexResult.result).toBeTruthy();
|
|
73
|
+
expect(claudeResult.result!.length).toBeGreaterThan(50);
|
|
74
|
+
expect(codexResult.result!.length).toBeGreaterThan(50);
|
|
75
|
+
}
|
|
76
|
+
);
|
|
77
|
+
});
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* E2E test helpers — HTTP client utilities for calling Stagent API endpoints.
|
|
3
|
+
*
|
|
4
|
+
* These helpers call the live Next.js dev/prod server. The base URL defaults
|
|
5
|
+
* to http://localhost:3000 and can be overridden via E2E_BASE_URL env var.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export const BASE_URL = process.env.E2E_BASE_URL ?? "http://localhost:3000";
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Generic fetch wrapper
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
interface ApiResponse<T = unknown> {
|
|
15
|
+
status: number;
|
|
16
|
+
ok: boolean;
|
|
17
|
+
data: T;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async function api<T = unknown>(
|
|
21
|
+
path: string,
|
|
22
|
+
options?: RequestInit
|
|
23
|
+
): Promise<ApiResponse<T>> {
|
|
24
|
+
const res = await fetch(`${BASE_URL}${path}`, {
|
|
25
|
+
...options,
|
|
26
|
+
headers: {
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
...options?.headers,
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
const data = (await res.json().catch(() => null)) as T;
|
|
32
|
+
return { status: res.status, ok: res.ok, data };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Projects
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
export interface ProjectPayload {
|
|
40
|
+
name: string;
|
|
41
|
+
description?: string;
|
|
42
|
+
workingDirectory?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function createProject(payload: ProjectPayload) {
|
|
46
|
+
return api<{ id: string; name: string }>(
|
|
47
|
+
"/api/projects",
|
|
48
|
+
{ method: "POST", body: JSON.stringify(payload) }
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function deleteProject(id: string) {
|
|
53
|
+
return api(`/api/projects/${id}`, { method: "DELETE" });
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Tasks
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
export interface TaskPayload {
|
|
61
|
+
title: string;
|
|
62
|
+
description?: string;
|
|
63
|
+
projectId?: string;
|
|
64
|
+
priority?: number;
|
|
65
|
+
assignedAgent?: string;
|
|
66
|
+
agentProfile?: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface TaskRow {
|
|
70
|
+
id: string;
|
|
71
|
+
title: string;
|
|
72
|
+
status: string;
|
|
73
|
+
result: string | null;
|
|
74
|
+
assignedAgent: string | null;
|
|
75
|
+
agentProfile: string | null;
|
|
76
|
+
projectId: string | null;
|
|
77
|
+
createdAt: string;
|
|
78
|
+
updatedAt: string;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export async function createTask(payload: TaskPayload) {
|
|
82
|
+
return api<TaskRow>("/api/tasks", {
|
|
83
|
+
method: "POST",
|
|
84
|
+
body: JSON.stringify(payload),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export async function getTask(id: string) {
|
|
89
|
+
return api<TaskRow>(`/api/tasks/${id}`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function updateTask(id: string, payload: Partial<TaskPayload & { status: string }>) {
|
|
93
|
+
return api<TaskRow>(`/api/tasks/${id}`, {
|
|
94
|
+
method: "PATCH",
|
|
95
|
+
body: JSON.stringify(payload),
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export async function deleteTask(id: string) {
|
|
100
|
+
return api(`/api/tasks/${id}`, { method: "DELETE" });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export async function executeTask(id: string) {
|
|
104
|
+
return api<{ message: string }>(`/api/tasks/${id}/execute`, {
|
|
105
|
+
method: "POST",
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
// Workflows
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
export interface WorkflowStep {
|
|
114
|
+
id: string;
|
|
115
|
+
name: string;
|
|
116
|
+
prompt: string;
|
|
117
|
+
requiresApproval?: boolean;
|
|
118
|
+
dependsOn?: string[];
|
|
119
|
+
assignedAgent?: string;
|
|
120
|
+
agentProfile?: string;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export interface WorkflowPayload {
|
|
124
|
+
name: string;
|
|
125
|
+
projectId?: string;
|
|
126
|
+
definition: {
|
|
127
|
+
pattern: string;
|
|
128
|
+
steps: WorkflowStep[];
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export interface WorkflowRow {
|
|
133
|
+
id: string;
|
|
134
|
+
name: string;
|
|
135
|
+
status: string;
|
|
136
|
+
definition: string;
|
|
137
|
+
projectId: string | null;
|
|
138
|
+
createdAt: string;
|
|
139
|
+
updatedAt: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export async function createWorkflow(payload: WorkflowPayload) {
|
|
143
|
+
return api<WorkflowRow>("/api/workflows", {
|
|
144
|
+
method: "POST",
|
|
145
|
+
body: JSON.stringify(payload),
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export async function getWorkflow(id: string) {
|
|
150
|
+
return api<WorkflowRow>(`/api/workflows/${id}`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export async function executeWorkflow(id: string) {
|
|
154
|
+
return api<{ status: string; workflowId: string }>(
|
|
155
|
+
`/api/workflows/${id}/execute`,
|
|
156
|
+
{ method: "POST" }
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// Blueprints
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
export async function listBlueprints() {
|
|
165
|
+
return api<unknown[]>("/api/blueprints");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export async function instantiateBlueprint(
|
|
169
|
+
blueprintId: string,
|
|
170
|
+
variables: Record<string, string>,
|
|
171
|
+
projectId?: string
|
|
172
|
+
) {
|
|
173
|
+
return api<{ workflow: WorkflowRow }>(
|
|
174
|
+
`/api/blueprints/${blueprintId}/instantiate`,
|
|
175
|
+
{
|
|
176
|
+
method: "POST",
|
|
177
|
+
body: JSON.stringify({ variables, projectId }),
|
|
178
|
+
}
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
// Profiles
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
export async function listProfiles() {
|
|
187
|
+
return api<unknown[]>("/api/profiles");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
// Runtime connectivity
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
export async function checkRuntimeConnectivity(runtimeId: string) {
|
|
195
|
+
return api<{ connected: boolean; method?: string }>(
|
|
196
|
+
`/api/settings/connectivity?runtime=${runtimeId}`
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ---------------------------------------------------------------------------
|
|
201
|
+
// Polling helpers
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
const POLL_INTERVAL_MS = 3_000;
|
|
205
|
+
const DEFAULT_TIMEOUT_MS = 120_000;
|
|
206
|
+
|
|
207
|
+
const TERMINAL_TASK_STATUSES = new Set([
|
|
208
|
+
"completed",
|
|
209
|
+
"failed",
|
|
210
|
+
"cancelled",
|
|
211
|
+
]);
|
|
212
|
+
|
|
213
|
+
const TERMINAL_WORKFLOW_STATUSES = new Set([
|
|
214
|
+
"completed",
|
|
215
|
+
"failed",
|
|
216
|
+
"cancelled",
|
|
217
|
+
]);
|
|
218
|
+
|
|
219
|
+
export async function pollTaskUntilDone(
|
|
220
|
+
taskId: string,
|
|
221
|
+
timeoutMs = DEFAULT_TIMEOUT_MS
|
|
222
|
+
): Promise<TaskRow> {
|
|
223
|
+
const start = Date.now();
|
|
224
|
+
while (Date.now() - start < timeoutMs) {
|
|
225
|
+
const { data } = await getTask(taskId);
|
|
226
|
+
if (data && TERMINAL_TASK_STATUSES.has(data.status)) {
|
|
227
|
+
return data;
|
|
228
|
+
}
|
|
229
|
+
await sleep(POLL_INTERVAL_MS);
|
|
230
|
+
}
|
|
231
|
+
throw new Error(
|
|
232
|
+
`Task ${taskId} did not reach a terminal status within ${timeoutMs}ms`
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export async function pollWorkflowUntilDone(
|
|
237
|
+
workflowId: string,
|
|
238
|
+
timeoutMs = DEFAULT_TIMEOUT_MS
|
|
239
|
+
): Promise<WorkflowRow> {
|
|
240
|
+
const start = Date.now();
|
|
241
|
+
while (Date.now() - start < timeoutMs) {
|
|
242
|
+
const { data } = await getWorkflow(workflowId);
|
|
243
|
+
if (data && TERMINAL_WORKFLOW_STATUSES.has(data.status)) {
|
|
244
|
+
return data;
|
|
245
|
+
}
|
|
246
|
+
await sleep(POLL_INTERVAL_MS);
|
|
247
|
+
}
|
|
248
|
+
throw new Error(
|
|
249
|
+
`Workflow ${workflowId} did not reach a terminal status within ${timeoutMs}ms`
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ---------------------------------------------------------------------------
|
|
254
|
+
// Utilities
|
|
255
|
+
// ---------------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
export function sleep(ms: number): Promise<void> {
|
|
258
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Check if a runtime is available by calling the connectivity endpoint.
|
|
263
|
+
* Returns true if the runtime responds as connected.
|
|
264
|
+
*/
|
|
265
|
+
export async function isRuntimeAvailable(
|
|
266
|
+
runtimeId: string
|
|
267
|
+
): Promise<boolean> {
|
|
268
|
+
try {
|
|
269
|
+
const { ok, data } = await checkRuntimeConnectivity(runtimeId);
|
|
270
|
+
return ok && !!data?.connected;
|
|
271
|
+
} catch {
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Check if the Stagent server is reachable.
|
|
278
|
+
*/
|
|
279
|
+
export async function isServerReachable(): Promise<boolean> {
|
|
280
|
+
try {
|
|
281
|
+
const res = await fetch(`${BASE_URL}/api/projects`);
|
|
282
|
+
return res.ok;
|
|
283
|
+
} catch {
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
}
|