stagent 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +24 -24
  2. package/package.json +1 -2
  3. package/src/app/api/profiles/route.ts +0 -1
  4. package/src/app/globals.css +0 -5
  5. package/src/app/tasks/page.tsx +5 -0
  6. package/src/components/profiles/profile-detail-view.tsx +1 -16
  7. package/src/components/profiles/profile-form-view.tsx +0 -22
  8. package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
  9. package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
  10. package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
  11. package/src/lib/agents/__tests__/sweep.test.ts +202 -0
  12. package/src/lib/agents/claude-agent.ts +104 -78
  13. package/src/lib/agents/learned-context.ts +5 -13
  14. package/src/lib/agents/pattern-extractor.ts +15 -64
  15. package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
  16. package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
  17. package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
  18. package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
  19. package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
  20. package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
  21. package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
  22. package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
  23. package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
  24. package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
  25. package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
  26. package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
  27. package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
  28. package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
  29. package/src/lib/agents/profiles/registry.ts +0 -1
  30. package/src/lib/agents/profiles/types.ts +0 -1
  31. package/src/lib/agents/runtime/catalog.ts +1 -1
  32. package/src/lib/agents/runtime/claude.ts +66 -0
  33. package/src/lib/constants/task-status.ts +6 -0
  34. package/src/lib/data/seed-data/profiles.ts +0 -3
  35. package/src/lib/usage/__tests__/ledger.test.ts +29 -5
  36. package/src/lib/usage/ledger.ts +3 -1
  37. package/src/lib/usage/pricing.ts +61 -7
  38. package/src/lib/validators/__tests__/profile.test.ts +0 -15
  39. package/src/lib/validators/profile.ts +0 -1
  40. package/src/lib/workflows/__tests__/engine.test.ts +2 -0
  41. package/src/lib/workflows/engine.ts +2 -1
package/README.md CHANGED
@@ -54,7 +54,8 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
54
54
  | 🚨 | **[Ambient Approvals](#ambient-approvals)** | Shell-level approval prompts that keep Inbox as the durable supervision queue |
55
55
  | 🔒 | **[Tool Permissions](#tool-permission-persistence)** | Trusted-tool policies with explicit "Always Allow" rules |
56
56
  | 📋 | **[Kanban Board](#kanban-board-operations)** | Inline editing, bulk operations, and persistent board state |
57
- | 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution *(in progress)* |
57
+ | 🤖 | **[AI Assist → Workflows](#ai-assist--workflow-creation)** | Bridge task assist recommendations into governed workflow execution |
58
+ | 🧬 | **[Agent Self-Improvement](#agent-self-improvement)** | Agents learn patterns from execution history with human-approved context evolution |
58
59
 
59
60
  ---
60
61
 
@@ -71,6 +72,7 @@ Stagent ships a shared runtime registry that routes tasks, schedules, and workfl
71
72
  - **Provider runtime abstraction** — Tasks, schedules, workflows, task assist, and health checks route through shared runtime adapters instead of provider-specific entry points
72
73
  - **Reusable agent profiles** — Profiles define instructions, allowed tools, runtime tuning, and MCP configs for repeated use
73
74
  - **Permission pre-check** — Saved "Always Allow" patterns bypass the notification loop for trusted tools
75
+ - **Learned context loop** — Pattern extraction → human approval → versioned context injection creates a supervised self-improvement cycle
74
76
 
75
77
  ---
76
78
 
@@ -113,7 +115,7 @@ Claude Agent SDK integration with the `canUseTool` polling pattern remains the d
113
115
  OpenAI Codex App Server is integrated as Stagent's second governed runtime. Codex-backed tasks preserve project working directories, document context, resumable thread IDs, inbox approval requests, user questions, and provider-labeled logs. The same runtime can also power task assist, scheduled firings, and workflow child tasks.
114
116
 
115
117
  #### Agent Profiles
116
- Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, runtime tuning, and MCP server configuration so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
118
+ Profile-backed execution with specialist definitions for different job types. Each profile packages instructions, allowed tools, max turns, and output format so teams can reuse behavior intentionally instead of relying on ad hoc prompts. Workflow steps and schedules can reference profiles directly, and runtimes can be selected independently when provider support differs.
117
119
 
118
120
  <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/profiles-list.png" alt="Stagent agent profiles" width="1200" />
119
121
 
@@ -143,10 +145,13 @@ AI-powered task creation: generate improved descriptions, break tasks into sub-t
143
145
  | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-empty.png" alt="Empty task creation form" width="380" /> | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-ai-assist.png" alt="AI Assist suggestions panel" width="380" /> | <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-create-form-ai-applied.png" alt="AI suggestions applied to form" width="380" /> |
144
146
 
145
147
  #### AI Assist → Workflow Creation
146
- *(In progress)* Bridge from AI task assist to workflow engine: when task assist recommends a multi-step plan, a "Create as Workflow" button converts the recommendation into a validated workflow definition with per-step profile assignments, dependency ordering, and pattern selection across all six workflow types. The `WorkflowConfirmationSheet` lets operators review and edit steps, profiles, and configuration before creating the workflow. A keyword-based profile suggestion fallback ensures steps get reasonable profile assignments even without the AI classifier.
148
+ Bridge from AI task assist to workflow engine: when task assist recommends a multi-step plan, a "Create as Workflow" button converts the recommendation into a validated workflow definition with per-step profile assignments, dependency ordering, and pattern selection across all six workflow types. The `WorkflowConfirmationSheet` lets operators review and edit steps, profiles, and configuration before creating the workflow. A keyword-based profile suggestion fallback ensures steps get reasonable profile assignments even without the AI classifier.
147
149
 
148
150
  <img src="https://raw.githubusercontent.com/navam-io/stagent/main/public/readme/dashboard-workflow-confirm.png" alt="Workflow creation from AI Assist" width="1200" />
149
151
 
152
+ #### Agent Self-Improvement
153
+ Agents learn from execution history through a human-approved instruction evolution loop. After each task completion, the pattern extractor analyzes logs and proposes context updates — concise behavioral rules the agent should follow in future runs. Operators approve, reject, or edit proposals before they take effect. Learned context is versioned with rollback support and size-limited summarization to prevent unbounded growth. A sweep agent can audit the codebase for improvement opportunities and create prioritized tasks from its findings.
154
+
150
155
  #### Session Management
151
156
  Resume failed or cancelled agent tasks with one click. Tracks retry counts (limit: 3), detects expired sessions, and provides atomic claim to prevent duplicate runs.
152
157
 
@@ -219,7 +224,7 @@ Configuration hub with provider-aware sections: Claude authentication (API key o
219
224
  The `npx stagent` entry point boots a Next.js server from the published npm package. It is built from `bin/cli.ts` into `dist/cli.js` using tsup, and serves as the primary distribution channel — no clone required.
220
225
 
221
226
  #### Database
222
- SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Eight tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`. Self-healing bootstrap — tables are created on startup if missing.
227
+ SQLite with WAL mode via better-sqlite3 + Drizzle ORM. Ten tables: `projects`, `tasks`, `workflows`, `agent_logs`, `notifications`, `documents`, `schedules`, `settings`, `learned_context`, `usage_ledger`. Self-healing bootstrap — tables are created on startup if missing.
223
228
 
224
229
  #### App Shell
225
230
  Responsive sidebar with collapsible icon-only mode, custom Stagent logo, tooltip navigation, dark/light/system theme, and OKLCH hue 250 blue-indigo color palette. Built on shadcn/ui (New York style) with PWA manifest and app icons. Routes: Home, Dashboard, Projects, Documents, Workflows, Profiles, Schedules, Inbox, Monitor, Settings.
@@ -258,32 +263,38 @@ npm run test:coverage # Coverage report
258
263
  ```
259
264
  src/
260
265
  ├── app/ # Next.js App Router pages
261
- │ ├── dashboard/ # Project overview
266
+ │ ├── dashboard/ # Task kanban board
262
267
  │ ├── projects/[id]/ # Project detail
268
+ │ ├── tasks/ # Task detail + creation (redirects to dashboard)
269
+ │ ├── profiles/ # Agent profile gallery + detail + creation
263
270
  │ ├── documents/ # Document browser
264
- │ ├── workflows/ # Workflow management
271
+ │ ├── workflows/ # Workflow management + blueprints
265
272
  │ ├── schedules/ # Schedule management
273
+ │ ├── costs/ # Cost & usage dashboard
266
274
  │ ├── inbox/ # Notifications
267
275
  │ ├── monitor/ # Log streaming
268
276
  │ └── settings/ # Configuration
269
277
  ├── components/
270
278
  │ ├── dashboard/ # Homepage widgets + charts
271
279
  │ ├── tasks/ # Board, cards, panels
272
- │ ├── workflows/ # Workflow UI
280
+ │ ├── profiles/ # Profile gallery, detail, form, learned context
281
+ │ ├── workflows/ # Workflow UI + blueprints + swarm
273
282
  │ ├── documents/ # Document browser + upload
283
+ │ ├── costs/ # Cost dashboard + filters
274
284
  │ ├── schedules/ # Schedule management
275
285
  │ ├── monitoring/ # Log viewer
276
286
  │ ├── notifications/ # Inbox + permission actions
277
- │ ├── settings/ # Auth, permissions, data mgmt
287
+ │ ├── settings/ # Auth, permissions, budgets, data mgmt
278
288
  │ ├── shared/ # App shell, sidebar
279
289
  │ └── ui/ # shadcn/ui primitives
280
290
  └── lib/
281
- ├── agents/ # Runtime adapters, provider integrations, profiles
291
+ ├── agents/ # Runtime adapters, profiles, learned context, pattern extraction
282
292
  ├── db/ # Schema, migrations
283
293
  ├── documents/ # Preprocessing + context builder
284
294
  ├── workflows/ # Engine + types + blueprints
285
295
  ├── schedules/ # Scheduler engine + interval parser
286
296
  ├── settings/ # Auth, permissions, helpers
297
+ ├── usage/ # Metering ledger + pricing registry
287
298
  ├── constants/ # Status transitions, colors
288
299
  ├── queries/ # Chart data aggregation
289
300
  ├── validators/ # Zod schemas
@@ -324,7 +335,7 @@ src/
324
335
  | **Profiles** | `/api/profiles` | GET | List agent profiles |
325
336
  | | `/api/profiles/[id]` | GET/PUT/DELETE | Profile CRUD |
326
337
  | | `/api/profiles/[id]/test` | POST | Run behavioral tests on a profile |
327
- | | `/api/profiles/[id]/context` | GET | Profile context for agent execution |
338
+ | | `/api/profiles/[id]/context` | GET/POST/PATCH | Learned context: version history, manual add, approve/reject/rollback |
328
339
  | | `/api/profiles/import` | POST | Import profile from GitHub URL |
329
340
  | **Notifications** | `/api/notifications` | GET/POST | List and create notifications |
330
341
  | | `/api/notifications/[id]` | PATCH/DELETE | Update and delete notification |
@@ -357,7 +368,7 @@ All 14 features shipped across three layers:
357
368
  | **Core** | Project management, task board, agent integration, inbox notifications, monitoring dashboard |
358
369
  | **Polish** | Homepage dashboard, UX fixes, workflow engine, AI task assist, content handling, session management |
359
370
 
360
- ### Post-MVP — Complete (25 features)
371
+ ### Post-MVP — Complete (27 features)
361
372
 
362
373
  | Category | Feature | What shipped |
363
374
  |----------|---------|-------------|
@@ -369,6 +380,8 @@ All 14 features shipped across three layers:
369
380
  | **Agent Intelligence** | Multi-Agent Routing | Profile registry (4 profiles), task classifier, per-step profile assignment |
370
381
  | | Autonomous Loop Execution | 4 stop conditions, iteration context chaining, pause/resume, loop status view |
371
382
  | | Multi-Agent Swarm | Mayor → worker pool → refinery orchestration with retryable stages |
383
+ | | AI Assist → Workflows | Bridge task assist into workflow engine with profile assignment and pattern selection |
384
+ | | Agent Self-Improvement | Pattern extraction from logs, human-approved context evolution, versioned rollback |
372
385
  | **Agent Profiles** | Agent Profile Catalog | 13 domain-specific profiles, GitHub import, behavioral testing, MCP passthrough |
373
386
  | | Workflow Blueprints | 8 templates, gallery, YAML editor, dynamic forms, GitHub import, lineage tracking |
374
387
  | **UI Enhancement** | Ambient Approvals | Shell-level approval presenter on any route for fast supervision |
@@ -384,25 +397,12 @@ All 14 features shipped across three layers:
384
397
  | | Tool Permission Persistence | "Always Allow" patterns, pre-check bypass, Settings management |
385
398
  | | Provider Runtimes | Shared runtime registry with Claude Code and OpenAI Codex App Server adapters |
386
399
  | | OpenAI Codex Runtime | Codex App Server integration with inbox approvals, logs, and thread resumption |
387
- | | npm Publish Readiness | `npx stagent` distribution channel with CLI bundling and package config |
388
400
  | | Cross-Provider Profiles | Profile compatibility layer ensuring profiles work across Claude and Codex runtimes |
389
401
  | | Parallel Fork/Join | 2-5 concurrent research branches with synthesis step |
390
402
  | **Governance** | Usage Metering Ledger | Provider-normalized token and spend tracking across all execution paths |
391
403
  | | Spend Budget Guardrails | Per-project and global budgets with enforcement and alerts |
392
404
  | | Cost & Usage Dashboard | Summary cards, trend views, provider/model breakdowns, budget audit visibility |
393
405
 
394
- ### In Progress
395
-
396
- | Feature | Description |
397
- |---------|-------------|
398
- | **AI Assist → Workflow Creation** | Bridge AI task assist recommendations into the workflow engine with profile assignment and pattern selection |
399
-
400
- ### Planned
401
-
402
- | Feature | Description |
403
- |---------|-------------|
404
- | **Agent Self-Improvement** | Agents learn patterns and update context with human approval |
405
-
406
406
  ---
407
407
 
408
408
  ## Contributing
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stagent",
3
- "version": "0.1.10",
3
+ "version": "0.1.11",
4
4
  "description": "Governed AI agent workspace for supervised local execution, workflows, documents, and provider runtimes.",
5
5
  "keywords": [
6
6
  "ai",
@@ -51,7 +51,6 @@
51
51
  },
52
52
  "dependencies": {
53
53
  "@anthropic-ai/claude-agent-sdk": "^0.2.71",
54
- "@anthropic-ai/sdk": "^0.78.0",
55
54
  "@dnd-kit/core": "^6.3.1",
56
55
  "@dnd-kit/sortable": "^10.0.0",
57
56
  "@dnd-kit/utilities": "^3.2.2",
@@ -13,7 +13,6 @@ export async function GET() {
13
13
  allowedTools: p.allowedTools,
14
14
  mcpServers: p.mcpServers,
15
15
  canUseToolPolicy: p.canUseToolPolicy,
16
- temperature: p.temperature,
17
16
  maxTurns: p.maxTurns,
18
17
  outputFormat: p.outputFormat,
19
18
  version: p.version,
@@ -446,11 +446,6 @@
446
446
  box-shadow: var(--glass-shadow-sm);
447
447
  }
448
448
 
449
- /* Temperature slider gradient track */
450
- .slider-temperature [data-slot="slider-range"] {
451
- background: linear-gradient(90deg, oklch(0.6 0.18 260), oklch(0.7 0.15 55));
452
- }
453
-
454
449
  [data-slot="popover-content"],
455
450
  [data-slot="dropdown-menu-content"],
456
451
  [data-slot="select-content"] {
@@ -0,0 +1,5 @@
1
+ import { redirect } from "next/navigation";
2
+
3
+ export default function TasksIndexPage() {
4
+ redirect("/dashboard");
5
+ }
@@ -18,7 +18,6 @@ import {
18
18
  Sparkles,
19
19
  Tag,
20
20
  User,
21
- Thermometer,
22
21
  Repeat,
23
22
  FileOutput,
24
23
  Wrench,
@@ -262,20 +261,6 @@ export function ProfileDetailView({ profileId, isBuiltin, initialProfile }: Prof
262
261
  <CardTitle className="text-sm font-medium">Configuration</CardTitle>
263
262
  </CardHeader>
264
263
  <CardContent className="space-y-3">
265
- {/* Temperature Gauge */}
266
- {profile.temperature !== undefined && (
267
- <div className="flex items-center gap-2">
268
- <Thermometer className="h-3.5 w-3.5 text-muted-foreground shrink-0" />
269
- <span className="text-xs text-muted-foreground w-20">Temperature</span>
270
- <div className="flex-1 h-1.5 rounded-full bg-muted">
271
- <div
272
- className="h-full rounded-full bg-primary"
273
- style={{ width: `${(profile.temperature / 2) * 100}%` }}
274
- />
275
- </div>
276
- <span className="text-xs font-medium w-8 text-right">{profile.temperature}</span>
277
- </div>
278
- )}
279
264
  {/* Max Turns */}
280
265
  {profile.maxTurns !== undefined && (
281
266
  <div className="flex items-center gap-2">
@@ -294,7 +279,7 @@ export function ProfileDetailView({ profileId, isBuiltin, initialProfile }: Prof
294
279
  </Badge>
295
280
  </div>
296
281
  )}
297
- {!profile.temperature && !profile.maxTurns && !profile.outputFormat && (
282
+ {!profile.maxTurns && !profile.outputFormat && (
298
283
  <p className="text-sm text-muted-foreground">Default configuration</p>
299
284
  )}
300
285
  </CardContent>
@@ -70,7 +70,6 @@ export function ProfileFormView({
70
70
  ]);
71
71
  const [codexInstructions, setCodexInstructions] = useState("");
72
72
  const [allowedTools, setAllowedTools] = useState("");
73
- const [temperature, setTemperature] = useState(0.5);
74
73
  const [maxTurns, setMaxTurns] = useState(30);
75
74
  const [outputFormat, setOutputFormat] = useState("");
76
75
  const [submitting, setSubmitting] = useState(false);
@@ -94,7 +93,6 @@ export function ProfileFormView({
94
93
  profile.runtimeOverrides?.["openai-codex-app-server"]?.instructions ?? ""
95
94
  );
96
95
  setAllowedTools(profile.allowedTools?.join(", ") ?? "");
97
- setTemperature(profile.temperature ?? 0.5);
98
96
  setMaxTurns(profile.maxTurns ?? 30);
99
97
  setOutputFormat(profile.outputFormat ?? "");
100
98
  })
@@ -145,7 +143,6 @@ export function ProfileFormView({
145
143
  }
146
144
  : undefined,
147
145
  allowedTools: parseCommaSeparated(allowedTools),
148
- temperature,
149
146
  maxTurns,
150
147
  outputFormat: outputFormat.trim() || undefined,
151
148
  };
@@ -307,25 +304,6 @@ export function ProfileFormView({
307
304
  {/* Model Tuning */}
308
305
  <FormSectionCard icon={SlidersHorizontal} title="Model Tuning">
309
306
  <div className="space-y-4">
310
- <div className="space-y-2">
311
- <div className="flex items-center justify-between">
312
- <Label htmlFor="profile-temp">Temperature</Label>
313
- <Badge variant="secondary" className="tabular-nums text-xs">
314
- {temperature.toFixed(2)}
315
- </Badge>
316
- </div>
317
- <div className="slider-temperature">
318
- <Slider
319
- id="profile-temp"
320
- min={0}
321
- max={1}
322
- step={0.05}
323
- value={[temperature]}
324
- onValueChange={([v]) => setTemperature(v)}
325
- />
326
- </div>
327
- <p className="text-xs text-muted-foreground">Lower = deterministic, higher = creative</p>
328
- </div>
329
307
  <div className="space-y-2">
330
308
  <div className="flex items-center justify-between">
331
309
  <Label htmlFor="profile-turns">Max Turns</Label>
@@ -315,12 +315,17 @@ describe("executeClaudeTask", () => {
315
315
 
316
316
  await executeClaudeTask("task-1");
317
317
 
318
- // query prompt should include output instructions and fall back to the title
318
+ // F1: prompt contains only user task text (title fallback); system instructions in systemPrompt
319
319
  expect(mockQuery).toHaveBeenCalledWith(
320
320
  expect.objectContaining({
321
- prompt: "Write outputs to /tmp/stagent-outputs/task-1\n\nTest Task",
321
+ prompt: "Test Task",
322
322
  })
323
323
  );
324
+ // System instructions (including output instructions) are in the systemPrompt option
325
+ const callOptions = mockQuery.mock.calls[0][0].options;
326
+ expect(callOptions.systemPrompt).toBeDefined();
327
+ expect(callOptions.maxTurns).toBeDefined();
328
+ expect(callOptions.maxBudgetUsd).toBeDefined();
324
329
  });
325
330
  });
326
331