stagent 0.9.3 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/cli.js +36 -1
  2. package/docs/superpowers/specs/2026-04-06-workflow-intelligence-stack-design.md +388 -0
  3. package/package.json +1 -1
  4. package/src/app/api/license/route.ts +3 -2
  5. package/src/app/api/workflows/[id]/debug/route.ts +18 -0
  6. package/src/app/api/workflows/[id]/execute/route.ts +39 -8
  7. package/src/app/api/workflows/optimize/route.ts +30 -0
  8. package/src/app/layout.tsx +4 -2
  9. package/src/components/chat/chat-message-markdown.tsx +78 -3
  10. package/src/components/chat/chat-message.tsx +12 -4
  11. package/src/components/settings/cloud-account-section.tsx +14 -12
  12. package/src/components/workflows/error-timeline.tsx +83 -0
  13. package/src/components/workflows/step-live-metrics.tsx +182 -0
  14. package/src/components/workflows/step-progress-bar.tsx +77 -0
  15. package/src/components/workflows/workflow-debug-panel.tsx +192 -0
  16. package/src/components/workflows/workflow-optimizer-panel.tsx +227 -0
  17. package/src/lib/agents/claude-agent.ts +4 -4
  18. package/src/lib/agents/runtime/anthropic-direct.ts +3 -3
  19. package/src/lib/agents/runtime/catalog.ts +30 -1
  20. package/src/lib/agents/runtime/openai-direct.ts +3 -3
  21. package/src/lib/billing/products.ts +6 -6
  22. package/src/lib/book/chapter-mapping.ts +6 -0
  23. package/src/lib/book/content.ts +10 -0
  24. package/src/lib/book/reading-paths.ts +1 -1
  25. package/src/lib/chat/__tests__/engine-stream-helpers.test.ts +57 -0
  26. package/src/lib/chat/engine.ts +68 -7
  27. package/src/lib/chat/stagent-tools.ts +2 -0
  28. package/src/lib/chat/tools/runtime-tools.ts +28 -0
  29. package/src/lib/chat/tools/schedule-tools.ts +44 -1
  30. package/src/lib/chat/tools/settings-tools.ts +40 -10
  31. package/src/lib/chat/tools/workflow-tools.ts +93 -4
  32. package/src/lib/chat/types.ts +21 -0
  33. package/src/lib/data/clear.ts +3 -0
  34. package/src/lib/db/bootstrap.ts +38 -0
  35. package/src/lib/db/migrations/0022_workflow_intelligence_phase1.sql +5 -0
  36. package/src/lib/db/migrations/0023_add_execution_stats.sql +15 -0
  37. package/src/lib/db/schema.ts +41 -1
  38. package/src/lib/license/__tests__/manager.test.ts +64 -0
  39. package/src/lib/license/manager.ts +80 -25
  40. package/src/lib/schedules/__tests__/interval-parser.test.ts +87 -0
  41. package/src/lib/schedules/__tests__/prompt-analyzer.test.ts +51 -0
  42. package/src/lib/schedules/interval-parser.ts +187 -0
  43. package/src/lib/schedules/prompt-analyzer.ts +87 -0
  44. package/src/lib/schedules/scheduler.ts +179 -9
  45. package/src/lib/workflows/cost-estimator.ts +141 -0
  46. package/src/lib/workflows/engine.ts +245 -45
  47. package/src/lib/workflows/error-analysis.ts +249 -0
  48. package/src/lib/workflows/execution-stats.ts +252 -0
  49. package/src/lib/workflows/optimizer.ts +193 -0
  50. package/src/lib/workflows/types.ts +6 -0
package/dist/cli.js CHANGED
@@ -130,7 +130,8 @@ var STAGENT_TABLES = [
130
130
  "user_table_triggers",
131
131
  "user_table_row_history",
132
132
  "snapshots",
133
- "license"
133
+ "license",
134
+ "workflow_execution_stats"
134
135
  ];
135
136
  function bootstrapStagentDatabase(sqlite2) {
136
137
  sqlite2.exec(`
@@ -158,6 +159,7 @@ function bootstrapStagentDatabase(sqlite2) {
158
159
  session_id TEXT,
159
160
  resume_count INTEGER DEFAULT 0 NOT NULL,
160
161
  workflow_run_number INTEGER,
162
+ max_budget_usd REAL,
161
163
  created_at INTEGER NOT NULL,
162
164
  updated_at INTEGER NOT NULL,
163
165
  FOREIGN KEY (project_id) REFERENCES projects(id) ON UPDATE NO ACTION ON DELETE NO ACTION,
@@ -172,6 +174,7 @@ function bootstrapStagentDatabase(sqlite2) {
172
174
  definition TEXT NOT NULL,
173
175
  status TEXT DEFAULT 'draft' NOT NULL,
174
176
  run_number INTEGER DEFAULT 0 NOT NULL,
177
+ runtime_id TEXT,
175
178
  created_at INTEGER NOT NULL,
176
179
  updated_at INTEGER NOT NULL,
177
180
  FOREIGN KEY (project_id) REFERENCES projects(id) ON UPDATE NO ACTION ON DELETE NO ACTION
@@ -260,6 +263,10 @@ function bootstrapStagentDatabase(sqlite2) {
260
263
  heartbeat_budget_per_day INTEGER,
261
264
  heartbeat_spent_today INTEGER DEFAULT 0 NOT NULL,
262
265
  heartbeat_budget_reset_at INTEGER,
266
+ avg_turns_per_firing INTEGER,
267
+ last_turn_count INTEGER,
268
+ failure_streak INTEGER DEFAULT 0 NOT NULL,
269
+ last_failure_reason TEXT,
263
270
  created_at INTEGER NOT NULL,
264
271
  updated_at INTEGER NOT NULL,
265
272
  FOREIGN KEY (project_id) REFERENCES projects(id) ON UPDATE NO ACTION ON DELETE NO ACTION
@@ -593,6 +600,10 @@ function bootstrapStagentDatabase(sqlite2) {
593
600
  CREATE INDEX IF NOT EXISTS idx_channel_configs_type ON channel_configs(channel_type);
594
601
  `);
595
602
  addColumnIfMissing(`ALTER TABLE schedules ADD COLUMN delivery_channels TEXT;`);
603
+ addColumnIfMissing(`ALTER TABLE schedules ADD COLUMN avg_turns_per_firing INTEGER;`);
604
+ addColumnIfMissing(`ALTER TABLE schedules ADD COLUMN last_turn_count INTEGER;`);
605
+ addColumnIfMissing(`ALTER TABLE schedules ADD COLUMN failure_streak INTEGER DEFAULT 0 NOT NULL;`);
606
+ addColumnIfMissing(`ALTER TABLE schedules ADD COLUMN last_failure_reason TEXT;`);
596
607
  addColumnIfMissing(`ALTER TABLE channel_configs ADD COLUMN direction TEXT DEFAULT 'outbound' NOT NULL;`);
597
608
  sqlite2.exec(`
598
609
  CREATE TABLE IF NOT EXISTS channel_bindings (
@@ -905,7 +916,31 @@ function bootstrapStagentDatabase(sqlite2) {
905
916
  created_at INTEGER NOT NULL,
906
917
  updated_at INTEGER NOT NULL
907
918
  );
919
+
920
+ CREATE TABLE IF NOT EXISTS workflow_execution_stats (
921
+ id TEXT PRIMARY KEY,
922
+ pattern TEXT NOT NULL,
923
+ step_count INTEGER NOT NULL,
924
+ avg_docs_per_step REAL,
925
+ avg_cost_per_step_micros INTEGER,
926
+ avg_duration_per_step_ms INTEGER,
927
+ success_rate REAL,
928
+ common_failures TEXT,
929
+ runtime_breakdown TEXT,
930
+ sample_count INTEGER NOT NULL DEFAULT 0,
931
+ last_updated TEXT NOT NULL,
932
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
933
+ );
908
934
  `);
935
+ for (const alter of [
936
+ "ALTER TABLE tasks ADD COLUMN max_budget_usd REAL",
937
+ "ALTER TABLE workflows ADD COLUMN runtime_id TEXT"
938
+ ]) {
939
+ try {
940
+ sqlite2.exec(alter);
941
+ } catch {
942
+ }
943
+ }
909
944
  }
910
945
  function hasLegacyStagentTables(sqlite2) {
911
946
  const placeholders = STAGENT_TABLES.map(() => "?").join(", ");
@@ -0,0 +1,388 @@
1
+ # Workflow Intelligence Stack — Design Spec
2
+
3
+ **Date:** 2026-04-06
4
+ **Scope:** EXPAND — reactive fixes + proactive optimization
5
+ **Source:** `ideas/analysis-chat-issues.md` (9 cascading failures from investor research session)
6
+ **Approach:** 4 features, 2 phases (A — Close the Gaps, then Intelligence Stack)
7
+
8
+ ---
9
+
10
+ ## Context
11
+
12
+ A real user session — researching seed investors for Stagent with a 3-step workflow and 6 attached documents — surfaced 9 cascading failures across workflow execution, budget management, model routing, and chat intelligence. Every execution attempt failed. Investigation revealed that much of the required infrastructure already exists but is not wired to user-facing surfaces (dead constants, unexposed DB columns, unused function parameters). Beyond fixing these gaps, we're expanding into proactive workflow optimization: a co-pilot that generates optimal definitions, a live execution dashboard, embedded debugging, and execution-informed learning from past runs.
13
+
14
+ ---
15
+
16
+ ## Phase 1 — Close the Gaps (P1, parallel features)
17
+
18
+ ### Feature 1: Workflow Budget Governance
19
+
20
+ **Problem:** `WORKFLOW_STEP_MAX_BUDGET_USD = 5.0` is dead code. All steps use `DEFAULT_MAX_BUDGET_USD = 2.0`. Budget settings aren't writable via chat. No pre-flight cost warning exists.
21
+
22
+ **Design:**
23
+
24
+ Budget resolution precedence (highest to lowest):
25
+ 1. Per-task override (future: `workflow.definition.budget`)
26
+ 2. User setting: `budget_max_cost_per_task`
27
+ 3. Workflow constant: `WORKFLOW_STEP_MAX_BUDGET_USD` ($5)
28
+ 4. Default: `DEFAULT_MAX_BUDGET_USD` ($2)
29
+
30
+ Pre-flight estimation flow:
31
+ ```
32
+ executeWorkflow() → estimateWorkflowCost()
33
+ ├─ For each step: calculate document context size
34
+ ├─ Estimate input tokens via estimateTokens(text)
35
+ ├─ Project cost = tokens × model pricing lookup
36
+ ├─ Compare projected vs budget cap
37
+ │ OVER → store warning in workflow _state.costEstimate
38
+ │ (advisory — does not block execution)
39
+ │ UI: WorkflowStatusView shows warning banner before run
40
+ │ Chat: execute_workflow tool response includes warning text
41
+ │ OK → proceed
42
+ └─ Store estimate in workflow _state.costEstimate for UI display
43
+ ```
44
+
45
+ **Changes:**
46
+
47
+ | File | Change | Risk |
48
+ |------|--------|------|
49
+ | `src/lib/workflows/engine.ts` | Import `WORKFLOW_STEP_MAX_BUDGET_USD`, pass as `maxBudgetUsd` to `executeChildTask` | Low |
50
+ | `src/lib/agents/claude-agent.ts` | Accept optional `maxBudgetUsd` param in `executeClaudeTask`/`resumeClaudeTask`, override `DEFAULT_MAX_BUDGET_USD` | Low |
51
+ | `src/lib/chat/tools/settings-tools.ts` | Add `budget_max_cost_per_task` (positive number, max 50), `budget_max_tokens_per_task` (positive int), `budget_max_daily_cost` (positive number) to `WRITABLE_SETTINGS` | Low |
52
+ | `src/lib/workflows/engine.ts` | Add `estimateWorkflowCost()` — pre-flight estimation before execution | Medium |
53
+ | `src/lib/documents/context-builder.ts` | Export `estimateStepTokens(workflowId, stepId)` for pre-flight use | Low |
54
+ | `src/lib/agents/claude-agent.ts` | Read `budget_max_cost_per_task` setting, use as override when present | Low |
55
+
56
+ **Acceptance Criteria:**
57
+ - [ ] Workflow steps use $5 budget by default (not $2)
58
+ - [ ] `budget_max_cost_per_task` is writable via `set_settings` chat tool
59
+ - [ ] `budget_max_tokens_per_task` and `budget_max_daily_cost` are writable
60
+ - [ ] User budget setting overrides the $5 constant when set
61
+ - [ ] Pre-flight estimation calculates per-step token cost from document context
62
+ - [ ] Estimation stored in workflow `_state` for UI consumption
63
+ - [ ] Chat tool `set_settings` validates budget values (positive, max 50 for cost)
64
+
65
+ ---
66
+
67
+ ### Feature 2: Workflow Runtime & Model Configuration
68
+
69
+ **Problem:** Workflows can't specify runtime. 5 adapters exist but chat can't discover them. Model IDs are fragmented across 3 registries. Chat hallucinates about models.
70
+
71
+ **Design:**
72
+
73
+ Unified model catalog — extend `RuntimeCatalogEntry`:
74
+ ```typescript
75
+ // catalog.ts — add to RuntimeCatalogEntry interface
76
+ models: {
77
+ default: string; // e.g., "gpt-5.4"
78
+ supported: string[]; // e.g., ["gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex"]
79
+ }
80
+ ```
81
+
82
+ Runtime resolution precedence:
83
+ 1. Step agent profile `preferredRuntime` (existing)
84
+ 2. `workflow.runtimeId` (new column)
85
+ 3. System `routing.preference` setting (existing)
86
+ 4. `DEFAULT_AGENT_RUNTIME` (existing)
87
+
88
+ **Changes:**
89
+
90
+ | File | Change | Risk |
91
+ |------|--------|------|
92
+ | `src/lib/db/schema.ts` | Add `runtimeId` text column to workflows table (nullable) | Low |
93
+ | `src/lib/db/bootstrap.ts` | Add `runtime_id` to CREATE TABLE for workflows | Low |
94
+ | Migration `00XX_add_workflow_runtime.sql` | `ALTER TABLE workflows ADD COLUMN runtime_id TEXT` | Low |
95
+ | `src/lib/agents/runtime/catalog.ts` | Add `models: { default, supported[] }` to `RuntimeCatalogEntry` and each registered runtime | Low |
96
+ | `src/lib/agents/runtime/openai-direct.ts` | Replace `?? "gpt-4.1"` with `catalog.models.default` | Low |
97
+ | `src/lib/agents/runtime/anthropic-direct.ts` | Replace `?? "claude-sonnet-4-20250514"` with `catalog.models.default` | Low |
98
+ | `src/lib/chat/tools/workflow-tools.ts` | Add `runtime` param to `create_workflow` tool, save to `workflows.runtimeId` | Low |
99
+ | `src/lib/workflows/engine.ts` | Read `workflow.runtimeId`, pass to `executeTaskWithRuntime(taskId, runtimeId)` | Low |
100
+ | `src/lib/chat/tools/` (new file) | New `list_runtimes` tool — returns catalog with models, capabilities | Low |
101
+ | `src/lib/chat/tools/settings-tools.ts` | Tag each key in `get_settings` response with `writable: true/false` | Low |
102
+ | `src/lib/chat/types.ts` | Derive `CHAT_MODELS` from catalog or validate at startup | Medium |
103
+
104
+ **Acceptance Criteria:**
105
+ - [ ] `RuntimeCatalogEntry` has `models.default` and `models.supported[]`
106
+ - [ ] Adapter fallbacks use `catalog.models.default` (no hardcoded model strings)
107
+ - [ ] `workflows` table has nullable `runtime_id` column
108
+ - [ ] `create_workflow` tool accepts optional `runtime` parameter
109
+ - [ ] Workflow execution passes `runtimeId` to `executeTaskWithRuntime`
110
+ - [ ] `list_runtimes` chat tool returns all 5 runtimes with models and capabilities
111
+ - [ ] `get_settings` response tags each key with `writable: true/false`
112
+ - [ ] `CHAT_MODELS` is validated against catalog at startup (no stale model IDs)
113
+
114
+ ---
115
+
116
+ ### Feature 3: Workflow Execution Resilience
117
+
118
+ **Problem:** 4 compounding state machine failures: step state written before task creation, errors swallowed, re-execution blocked for crashed workflows, `updateWorkflowState` silent on missing workflow. Per-step document binding exists in DB but isn't exposed.
119
+
120
+ **Design:**
121
+
122
+ State machine fix — deferred writes + explicit rollback:
123
+ ```
124
+ BEFORE: write state → create task → execute (failure = stuck state)
125
+ AFTER: create task → execute → write state (failure = rollback to "failed")
126
+ ```
127
+
128
+ Re-execution from any terminal-ish state:
129
+ ```
130
+ draft → active (normal)
131
+ completed|failed → reset → draft → active (re-execute)
132
+ active (no live tasks) → reset → draft → active (crash recovery)
133
+
134
+ Reset procedure:
135
+ 1. Cancel orphaned tasks (status IN running, queued)
136
+ 2. Delete _state and _loopState from definition
137
+ 3. Reset ALL step states to "pending"
138
+ 4. Set workflow status to "draft"
139
+ 5. Atomic claim to "active"
140
+ ```
141
+
142
+ Per-step document binding:
143
+ ```
144
+ create_workflow({
145
+ steps: [
146
+ { name: "Step 1", prompt: "...", documentIds: ["doc1", "doc2"] }, // step-scoped
147
+ { name: "Step 2", prompt: "...", documentIds: ["doc3"] }
148
+ ],
149
+ documentIds: ["doc4"] // global — available to all steps
150
+ })
151
+
152
+ // DB: workflowDocumentInputs
153
+ // stepId = null → global, stepId = "step-uuid" → step-scoped
154
+ // buildPoolDocumentContext(workflowId, stepId) already handles both!
155
+ ```
156
+
157
+ **Changes:**
158
+
159
+ | File | Change | Risk |
160
+ |------|--------|------|
161
+ | `src/lib/workflows/engine.ts` | Defer step state write until after task creation + execution start | **High** — core state machine |
162
+ | `src/lib/workflows/engine.ts` | Add explicit rollback in catch: step → "failed", error propagated | Medium |
163
+ | `src/lib/workflows/engine.ts` | Make `updateWorkflowState` throw on missing workflow | Medium |
164
+ | `src/app/api/workflows/[id]/execute/route.ts` | Allow re-execution from "active" if no live tasks (query tasks table) | Medium |
165
+ | `src/app/api/workflows/[id]/execute/route.ts` | On re-execute: reset ALL step states, cancel orphaned tasks | Medium |
166
+ | `src/lib/chat/tools/workflow-tools.ts` | Accept per-step `documentIds` in step definitions, write to `workflowDocumentInputs` with stepId | Low |
167
+
168
+ **Error & Rescue Registry:**
169
+
170
+ | Error | Trigger | Rescue |
171
+ |-------|---------|--------|
172
+ | Step stuck "running" | Task creation fails after state write | Deferred state write |
173
+ | Error swallowed silently | `executeTaskWithRuntime` throws | Explicit rollback + propagate |
174
+ | 409 on crashed workflow | Workflow "active" with no live tasks | Check live task count before blocking |
175
+ | State update lost | Workflow deleted mid-execution | `updateWorkflowState` throws |
176
+ | All docs every step | No per-step binding in tool | Expose `step.documentIds` |
177
+
178
+ **Acceptance Criteria:**
179
+ - [ ] Step state is NOT written to DB until task creation succeeds
180
+ - [ ] If `executeTaskWithRuntime` throws, step state is rolled back to "failed" with error message
181
+ - [ ] `updateWorkflowState` throws a named error when workflow is missing
182
+ - [ ] Workflows in "active" state with 0 running/queued tasks can be re-executed
183
+ - [ ] Re-execution resets ALL step states to "pending" and cancels orphaned tasks
184
+ - [ ] `create_workflow` accepts per-step `documentIds` arrays
185
+ - [ ] Global `documentIds` and step-scoped `documentIds` coexist correctly
186
+ - [ ] `buildPoolDocumentContext` returns global + step-specific docs when both exist
187
+
188
+ ---
189
+
190
+ ## Phase 2 — Intelligence Stack (P2, after Phase 1 stabilizes)
191
+
192
+ ### Feature 4: Workflow Intelligence & Observability
193
+
194
+ **Dependencies:** Features 1-3 must be stable. Also depends on: `usage-metering-ledger`, `monitoring-dashboard`.
195
+
196
+ #### Sub-capability A: Workflow Optimizer Co-pilot
197
+
198
+ **UX:** DetailPane (right-rail panel on desktop, Sheet on mobile) inside WorkflowFormView. Shows real-time suggestions as user edits workflow definition. 4 suggestion types:
199
+
200
+ 1. **Document Binding** — analyzes step prompts vs document content, recommends per-step binding instead of global. Shows reduction: "6 docs × 3 steps = 18 injections → only 7 needed"
201
+ 2. **Budget Estimate** — progress bar showing projected cost vs cap, per-step breakdown
202
+ 3. **Runtime Recommendation** — based on past success rates per runtime for similar workflows
203
+ 4. **Pattern Insight** — compares pattern options with historical performance data
204
+
205
+ Each suggestion has Apply/Dismiss actions. Apply modifies the form state directly.
206
+
207
+ **Data source:** `workflowExecutionStats` aggregate table (see Sub-capability D).
208
+
209
+ **API:** New endpoint `GET /api/workflows/optimize` — accepts partial workflow definition, returns suggestions array.
210
+
211
+ **Changes:**
212
+
213
+ | File | Change |
214
+ |------|--------|
215
+ | `src/components/workflows/workflow-form-view.tsx` | Add optimizer DetailPane panel, wire to suggestions API |
216
+ | `src/app/api/workflows/optimize/route.ts` (new) | Optimization suggestions endpoint |
217
+ | `src/lib/workflows/optimizer.ts` (new) | `getWorkflowOptimizationHints(definition)` — queries execution stats, generates suggestions |
218
+
219
+ #### Sub-capability B: Live Execution Dashboard
220
+
221
+ **UX:** Enhanced step cards in WorkflowStatusView during active execution. Running step expands to show:
222
+
223
+ - **4 live metric tiles** (reusing TaskBentoCell pattern): tokens (with rate), cost (with budget bar), current tool (with turn count), elapsed (with estimate)
224
+ - **Streaming partial results** — truncated agent output, auto-scrolling
225
+ - **Step progress indicator** — numbered circles with connecting lines, completed/running/pending states
226
+
227
+ **Data flow:** SSE stream from `/api/logs/stream?workflowId=X` — existing endpoint, filtered to workflow. Agent log events (`tool_start`, `content_block_delta`, `completed`) drive the metric tiles. Usage ledger entries drive the cost counter.
228
+
229
+ **Changes:**
230
+
231
+ | File | Change |
232
+ |------|--------|
233
+ | `src/components/workflows/workflow-status-view.tsx` | Add live metrics grid to running step cards |
234
+ | `src/components/workflows/step-live-metrics.tsx` (new) | 4-tile metric display with SSE subscription |
235
+ | `src/components/workflows/step-progress-bar.tsx` (new) | Step progress indicator with circle+line pattern |
236
+ | `src/lib/workflows/engine.ts` | Emit structured agent_log events for step transitions (step_started, step_completed, step_failed) |
237
+
238
+ #### Sub-capability C: Workflow-Embedded Debug Panel
239
+
240
+ **UX:** Collapsible section below step cards on failed/completed workflows. Contains:
241
+
242
+ 1. **Error summary** — red left-border card with failure description and root cause
243
+ 2. **Error timeline** — vertical timeline with dots (green=success, yellow=warning, red=failure) showing key events from agent_logs
244
+ 3. **Fix suggestions** — tiered: Quick (raise budget), Better (reduce docs), Best (restructure workflow)
245
+ 4. **Actions** — Retry Step, Re-run Workflow, View Full Logs buttons
246
+
247
+ **Data source:** `agent_logs` table filtered to workflow tasks + `_state` from workflow definition.
248
+
249
+ **Root cause analysis:** Pattern matching on error messages:
250
+ - `"Reached maximum budget"` → budget cause, suggest raise/reduce docs
251
+ - `"timeout"` / `"max turns"` → complexity cause, suggest split step
252
+ - `"connection"` / `"rate limit"` → transient cause, suggest retry
253
+
254
+ **Changes:**
255
+
256
+ | File | Change |
257
+ |------|--------|
258
+ | `src/components/workflows/workflow-debug-panel.tsx` (new) | Debug panel with timeline, suggestions, actions |
259
+ | `src/components/workflows/error-timeline.tsx` (new) | Vertical timeline component |
260
+ | `src/lib/workflows/error-analysis.ts` (new) | `analyzeWorkflowFailure(workflowId)` — builds timeline, identifies root cause, generates suggestions |
261
+ | `src/app/api/workflows/[id]/debug/route.ts` (new) | Debug data endpoint — returns timeline + analysis |
262
+
263
+ #### Sub-capability D: Execution-Informed Learning
264
+
265
+ **New table:** `workflowExecutionStats` — materialized rollup updated after each workflow run.
266
+
267
+ ```sql
268
+ CREATE TABLE IF NOT EXISTS workflow_execution_stats (
269
+ id TEXT PRIMARY KEY,
270
+ pattern TEXT NOT NULL, -- sequence, parallel, swarm, etc.
271
+ step_count INTEGER NOT NULL,
272
+ avg_docs_per_step REAL,
273
+ avg_cost_per_step_micros INTEGER,
274
+ avg_duration_per_step_ms INTEGER,
275
+ success_rate REAL, -- 0.0 to 1.0
276
+ common_failures TEXT, -- JSON: {"budget_exceeded": 4, "timeout": 1}
277
+ runtime_breakdown TEXT, -- JSON: {"claude-code": 0.92, "openai-direct": 0.71}
278
+ sample_count INTEGER NOT NULL,
279
+ last_updated TEXT NOT NULL,
280
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
281
+ );
282
+ ```
283
+
284
+ **Aggregation trigger:** After `executeWorkflow` completes or fails, call `updateExecutionStats()`:
285
+ 1. Query `usageLedger` for this workflow's run — per-step cost, tokens, runtime
286
+ 2. Query `agent_logs` for error types, tool usage, duration
287
+ 3. Upsert into `workflowExecutionStats` keyed by `(pattern, step_count)` bucket
288
+ 4. Update running averages and success rate
289
+
290
+ **Query API:** `getWorkflowOptimizationHints(pattern, stepCount, docCount)` returns:
291
+ - `budgetRecommendation` — suggested cap based on historical avg + 1 stddev
292
+ - `docBindingStrategy` — "per-step" if avg docs > 3 per step historically
293
+ - `runtimeRecommendation` — runtime with highest success rate for this pattern
294
+ - `patternComparison` — if alternative pattern has >20% better success rate, suggest it
295
+ - `similarWorkflowStats` — raw stats for display
296
+
297
+ **Changes:**
298
+
299
+ | File | Change |
300
+ |------|--------|
301
+ | `src/lib/db/schema.ts` | Add `workflowExecutionStats` table |
302
+ | `src/lib/db/bootstrap.ts` | Add bootstrap CREATE TABLE |
303
+ | Migration `00XX_add_execution_stats.sql` | CREATE TABLE |
304
+ | `src/lib/workflows/execution-stats.ts` (new) | `updateExecutionStats()`, `getWorkflowOptimizationHints()` |
305
+ | `src/lib/workflows/engine.ts` | Call `updateExecutionStats()` in finally block after workflow execution |
306
+ | `src/lib/data/clear.ts` | Add `workflowExecutionStats` to clear order |
307
+
308
+ ---
309
+
310
+ ## What Already Exists (Reuse)
311
+
312
+ | Capability | Location | How We Use It |
313
+ |---|---|---|
314
+ | `WORKFLOW_STEP_MAX_BUDGET_USD = 5.0` | `task-status.ts:58` | Wire into `executeChildTask` |
315
+ | `workflowDocumentInputs.stepId` column | `schema.ts` | Expose in `create_workflow` tool |
316
+ | `buildPoolDocumentContext(wfId, stepId)` | `context-builder.ts:143-152` | Already handles step-scoped docs |
317
+ | `estimateTokens(text)` | `chat/context-builder.ts:16-18` | Reuse for pre-flight estimation |
318
+ | `executeTaskWithRuntime(taskId, runtimeId?)` | `runtime/index.ts:77` | Pass workflow's runtimeId |
319
+ | `resolveAgentRuntime(runtimeId)` | `catalog.ts:131-136` | Fallback chain already exists |
320
+ | `listRuntimeCatalog()` | `catalog.ts:138-140` | Expose via `list_runtimes` tool |
321
+ | SSE streaming | `/api/logs/stream/route.ts` | Reuse for live execution metrics |
322
+ | Sparkline, DonutRing charts | `src/components/charts/` | Reuse for live dashboard |
323
+ | TaskBentoCell metric tiles | `src/components/tasks/` | Pattern for live metric tiles |
324
+ | DetailPane right-rail | `src/components/shared/` | Container for optimizer panel |
325
+ | ErrorState component | `src/components/shared/` | Pattern for debug panel errors |
326
+ | Swarm retry pattern | `swarm-dashboard.tsx:47-68` | Pattern for step-level retry |
327
+
328
+ ## NOT in Scope
329
+
330
+ | Deferred Item | Rationale |
331
+ |---|---|
332
+ | LLM-based document summarization | Complex feature, truncation sufficient for now |
333
+ | Workflow-level budget pooling (shared across steps) | Requires SDK changes |
334
+ | Global model override settings (`openai_direct_model` writable) | Wrong abstraction — per-workflow is the right surface |
335
+ | Automatic model selection based on task complexity | Future ML feature |
336
+ | Real-time cost streaming from SDK | SDK doesn't expose streaming cost data |
337
+ | Parallel execution cost estimation | Sequential is straightforward; parallel adds combinatorial complexity |
338
+ | Cross-workflow failure correlation in Monitor | Monitor stays as global overview; debugging lives in workflow detail |
339
+
340
+ ---
341
+
342
+ ## Dependency Graph
343
+
344
+ ```
345
+ Phase 1 (P1, parallel):
346
+ Feature 1: Workflow Budget Governance
347
+ ├─ depends: spend-budget-guardrails (completed)
348
+ └─ enables: Feature 4 (cost visibility in optimizer)
349
+
350
+ Feature 2: Workflow Runtime & Model Configuration
351
+ ├─ depends: provider-runtime-abstraction (completed)
352
+ └─ enables: Feature 4 (runtime catalog for optimizer)
353
+
354
+ Feature 3: Workflow Execution Resilience
355
+ ├─ depends: workflow-engine (completed)
356
+ └─ enables: Feature 4 (reliable metrics + error timelines)
357
+
358
+ Phase 2 (P2, sequential sub-capabilities):
359
+ Feature 4: Workflow Intelligence & Observability
360
+ ├─ 4D: Execution Learning (table + aggregation — no UI dependency)
361
+ ├─ 4B: Live Execution Dashboard (SSE metrics — needs F3 state fixes)
362
+ ├─ 4C: Embedded Debug Panel (error analysis — needs F1 budget + F3 state)
363
+ └─ 4A: Optimizer Co-pilot (needs 4D stats + F1 budget + F2 runtime catalog)
364
+ ```
365
+
366
+ ## Verification Plan
367
+
368
+ **Feature 1 — Budget:**
369
+ - Create a workflow with >$2 document context → should use $5 cap, not fail
370
+ - Set `budget_max_cost_per_task` to 10 via chat → verify next execution uses $10
371
+ - Pre-flight estimation should report per-step cost breakdown before execution
372
+
373
+ **Feature 2 — Runtime:**
374
+ - Create workflow with `runtime: "openai-direct"` → verify tasks use OpenAI adapter
375
+ - Call `list_runtimes` in chat → should return all 5 runtimes with models
376
+ - `get_settings` response should tag each key with `writable: true/false`
377
+
378
+ **Feature 3 — Resilience:**
379
+ - Force-fail a workflow step → verify step rolls back to "failed" (not stuck "running")
380
+ - Re-execute a failed workflow → all steps should reset to "pending"
381
+ - Re-execute a crashed "active" workflow (no live tasks) → should succeed
382
+ - Create workflow with per-step documentIds → verify each step receives only its docs
383
+
384
+ **Feature 4 — Intelligence:**
385
+ - Run 3+ workflows → check `workflowExecutionStats` table has aggregated data
386
+ - Open WorkflowFormView → optimizer panel should show suggestions based on history
387
+ - During execution → live metrics should update in real-time (tokens, cost, tool)
388
+ - After failure → debug panel should show error timeline and fix suggestions
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stagent",
3
- "version": "0.9.3",
3
+ "version": "0.9.6",
4
4
  "description": "AI Business Operating System — run your business with AI agents. Local-first, multi-provider, governed.",
5
5
  "keywords": [
6
6
  "ai",
@@ -9,7 +9,8 @@ import type { LicenseTier } from "@/lib/license/tier-limits";
9
9
  * GET /api/license — current license status, feature flags, and limits.
10
10
  */
11
11
  export async function GET() {
12
- const status = licenseManager.getStatus();
12
+ // Read from DB directly — avoids stale singleton cache in Turbopack dev mode
13
+ const status = licenseManager.getStatusFromDb();
13
14
  const tier = status.tier;
14
15
 
15
16
  // Build feature access map
@@ -33,7 +34,7 @@ export async function GET() {
33
34
  expiresAt: status.expiresAt?.toISOString() ?? null,
34
35
  lastValidatedAt: status.lastValidatedAt?.toISOString() ?? null,
35
36
  gracePeriodExpiresAt: status.gracePeriodExpiresAt?.toISOString() ?? null,
36
- isPremium: licenseManager.isPremium(),
37
+ isPremium: tier !== "community",
37
38
  features,
38
39
  limits,
39
40
  });
@@ -0,0 +1,18 @@
1
+ import { NextRequest, NextResponse } from "next/server";
2
+ import { analyzeWorkflowFailure } from "@/lib/workflows/error-analysis";
3
+
4
+ export async function GET(
5
+ _req: NextRequest,
6
+ { params }: { params: Promise<{ id: string }> }
7
+ ) {
8
+ const { id } = await params;
9
+ try {
10
+ const analysis = await analyzeWorkflowFailure(id);
11
+ return NextResponse.json(analysis);
12
+ } catch (error) {
13
+ return NextResponse.json(
14
+ { error: error instanceof Error ? error.message : "Analysis failed" },
15
+ { status: 500 }
16
+ );
17
+ }
18
+ }
@@ -1,7 +1,7 @@
1
1
  import { NextRequest, NextResponse } from "next/server";
2
2
  import { db } from "@/lib/db";
3
- import { workflows } from "@/lib/db/schema";
4
- import { eq, and, sql } from "drizzle-orm";
3
+ import { workflows, tasks } from "@/lib/db/schema";
4
+ import { eq, and, sql, inArray } from "drizzle-orm";
5
5
  import { executeWorkflow } from "@/lib/workflows/engine";
6
6
  import type { WorkflowDefinition } from "@/lib/workflows/types";
7
7
 
@@ -20,16 +20,46 @@ export async function POST(
20
20
  return NextResponse.json({ error: "Workflow not found" }, { status: 404 });
21
21
  }
22
22
 
23
+ // Check if genuinely running vs crashed "active" state
23
24
  if (workflow.status === "active") {
24
- return NextResponse.json(
25
- { error: "Workflow is already running" },
26
- { status: 409 }
27
- );
25
+ const liveTasks = await db
26
+ .select({ id: tasks.id })
27
+ .from(tasks)
28
+ .where(
29
+ and(
30
+ eq(tasks.workflowId, id),
31
+ inArray(tasks.status, ["running", "queued"])
32
+ )
33
+ );
34
+
35
+ if (liveTasks.length > 0) {
36
+ return NextResponse.json(
37
+ { error: "Workflow is already running" },
38
+ { status: 409 }
39
+ );
40
+ }
41
+ // Crashed "active" with 0 live tasks — fall through to re-execution
28
42
  }
29
43
 
30
- // Re-run: reset state for completed/failed workflows
31
- if (workflow.status === "completed" || workflow.status === "failed") {
44
+ // Re-run: comprehensive reset for completed, failed, or crashed-active workflows
45
+ if (
46
+ workflow.status === "completed" ||
47
+ workflow.status === "failed" ||
48
+ workflow.status === "active" // crashed recovery
49
+ ) {
32
50
  try {
51
+ // 1. Cancel orphaned tasks (running/queued from previous execution)
52
+ await db
53
+ .update(tasks)
54
+ .set({ status: "cancelled", updatedAt: new Date() })
55
+ .where(
56
+ and(
57
+ eq(tasks.workflowId, id),
58
+ inArray(tasks.status, ["running", "queued"])
59
+ )
60
+ );
61
+
62
+ // 2. Clear execution state from definition
33
63
  const def = JSON.parse(workflow.definition) as WorkflowDefinition & {
34
64
  _state?: unknown;
35
65
  _loopState?: unknown;
@@ -37,6 +67,7 @@ export async function POST(
37
67
  delete def._state;
38
68
  delete def._loopState;
39
69
 
70
+ // 3. Reset to draft
40
71
  await db
41
72
  .update(workflows)
42
73
  .set({
@@ -0,0 +1,30 @@
1
+ import { NextRequest, NextResponse } from "next/server";
2
+ import { generateOptimizationSuggestions } from "@/lib/workflows/optimizer";
3
+
4
+ export async function POST(req: NextRequest) {
5
+ try {
6
+ const body = await req.json();
7
+ const { definition, workflowId } = body;
8
+
9
+ if (!definition) {
10
+ return NextResponse.json(
11
+ { error: "definition is required" },
12
+ { status: 400 }
13
+ );
14
+ }
15
+
16
+ const suggestions = await generateOptimizationSuggestions(
17
+ definition,
18
+ workflowId
19
+ );
20
+ return NextResponse.json({ suggestions });
21
+ } catch (error) {
22
+ return NextResponse.json(
23
+ {
24
+ error:
25
+ error instanceof Error ? error.message : "Optimization failed",
26
+ },
27
+ { status: 500 }
28
+ );
29
+ }
30
+ }
@@ -1,4 +1,5 @@
1
1
  import type { Metadata } from "next";
2
+ import Script from "next/script";
2
3
  import { Inter, JetBrains_Mono } from "next/font/google";
3
4
  import { SidebarProvider, SidebarInset } from "@/components/ui/sidebar";
4
5
  import { TooltipProvider } from "@/components/ui/tooltip";
@@ -66,9 +67,10 @@ export default function RootLayout({
66
67
  return (
67
68
  <html lang="en" suppressHydrationWarning>
68
69
  <head>
69
- {/* Static CSS/JS — no user input, safe from XSS */}
70
+ {/* Static CSS — no user input, safe from XSS */}
70
71
  <style dangerouslySetInnerHTML={{ __html: CRITICAL_THEME_CSS }} />
71
- <script dangerouslySetInnerHTML={{ __html: THEME_INIT_SCRIPT }} />
72
+ {/* Theme bootstrap — runs before paint to prevent FOUC */}
73
+ <Script id="theme-init" strategy="beforeInteractive">{THEME_INIT_SCRIPT}</Script>
72
74
  </head>
73
75
  <body
74
76
  className={`${inter.variable} ${jetbrainsMono.variable} font-sans antialiased bg-background text-foreground`}