stagent 0.9.3 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/cli.js +36 -1
  2. package/docs/superpowers/specs/2026-04-06-workflow-intelligence-stack-design.md +388 -0
  3. package/package.json +1 -1
  4. package/src/app/api/license/route.ts +3 -2
  5. package/src/app/api/workflows/[id]/debug/route.ts +18 -0
  6. package/src/app/api/workflows/[id]/execute/route.ts +39 -8
  7. package/src/app/api/workflows/optimize/route.ts +30 -0
  8. package/src/app/layout.tsx +4 -2
  9. package/src/components/chat/chat-message-markdown.tsx +78 -3
  10. package/src/components/chat/chat-message.tsx +12 -4
  11. package/src/components/settings/cloud-account-section.tsx +14 -12
  12. package/src/components/workflows/error-timeline.tsx +83 -0
  13. package/src/components/workflows/step-live-metrics.tsx +182 -0
  14. package/src/components/workflows/step-progress-bar.tsx +77 -0
  15. package/src/components/workflows/workflow-debug-panel.tsx +192 -0
  16. package/src/components/workflows/workflow-optimizer-panel.tsx +227 -0
  17. package/src/lib/agents/claude-agent.ts +4 -4
  18. package/src/lib/agents/runtime/anthropic-direct.ts +3 -3
  19. package/src/lib/agents/runtime/catalog.ts +30 -1
  20. package/src/lib/agents/runtime/openai-direct.ts +3 -3
  21. package/src/lib/billing/products.ts +6 -6
  22. package/src/lib/book/chapter-mapping.ts +6 -0
  23. package/src/lib/book/content.ts +10 -0
  24. package/src/lib/book/reading-paths.ts +1 -1
  25. package/src/lib/chat/__tests__/engine-stream-helpers.test.ts +57 -0
  26. package/src/lib/chat/engine.ts +68 -7
  27. package/src/lib/chat/stagent-tools.ts +2 -0
  28. package/src/lib/chat/tools/runtime-tools.ts +28 -0
  29. package/src/lib/chat/tools/schedule-tools.ts +44 -1
  30. package/src/lib/chat/tools/settings-tools.ts +40 -10
  31. package/src/lib/chat/tools/workflow-tools.ts +93 -4
  32. package/src/lib/chat/types.ts +21 -0
  33. package/src/lib/data/clear.ts +3 -0
  34. package/src/lib/db/bootstrap.ts +38 -0
  35. package/src/lib/db/migrations/0022_workflow_intelligence_phase1.sql +5 -0
  36. package/src/lib/db/migrations/0023_add_execution_stats.sql +15 -0
  37. package/src/lib/db/schema.ts +41 -1
  38. package/src/lib/license/__tests__/manager.test.ts +64 -0
  39. package/src/lib/license/manager.ts +80 -25
  40. package/src/lib/schedules/__tests__/interval-parser.test.ts +87 -0
  41. package/src/lib/schedules/__tests__/prompt-analyzer.test.ts +51 -0
  42. package/src/lib/schedules/interval-parser.ts +187 -0
  43. package/src/lib/schedules/prompt-analyzer.ts +87 -0
  44. package/src/lib/schedules/scheduler.ts +179 -9
  45. package/src/lib/workflows/cost-estimator.ts +141 -0
  46. package/src/lib/workflows/engine.ts +245 -45
  47. package/src/lib/workflows/error-analysis.ts +249 -0
  48. package/src/lib/workflows/execution-stats.ts +252 -0
  49. package/src/lib/workflows/optimizer.ts +193 -0
  50. package/src/lib/workflows/types.ts +6 -0
@@ -13,9 +13,11 @@
13
13
 
14
14
  import { db } from "@/lib/db";
15
15
  import { schedules, tasks, agentLogs, scheduleDocumentInputs, documents } from "@/lib/db/schema";
16
- import { eq, and, lte, inArray, sql } from "drizzle-orm";
16
+ import { eq, and, lte, inArray, sql, asc } from "drizzle-orm";
17
17
  import { computeNextFireTime } from "./interval-parser";
18
18
  import { executeTaskWithRuntime } from "@/lib/agents/runtime";
19
+ import { getSetting } from "@/lib/settings/helpers";
20
+ import { SETTINGS_KEYS } from "@/lib/constants/settings";
19
21
  import { checkActiveHours } from "./active-hours";
20
22
  import {
21
23
  buildHeartbeatPrompt,
@@ -29,6 +31,158 @@ import { processHandoffs } from "@/lib/agents/handoff/bus";
29
31
  const POLL_INTERVAL_MS = 60_000; // 60 seconds
30
32
 
31
33
  let intervalHandle: ReturnType<typeof setInterval> | null = null;
34
+ let draining = false;
35
+
36
+ /**
37
+ * Drain queued schedule/heartbeat tasks after a firing completes.
38
+ *
39
+ * Background: schedule firings used to be fire-and-forget. When multiple
40
+ * schedules collided on the same minute (e.g. three `*​/30 * * * *` schedules
41
+ * all firing at :00), one task would execute and the others would sit in
42
+ * "queued" until the next poll cycle 30+ minutes later. This drain hook walks
43
+ * the queue immediately on completion so collisions resolve in seconds.
44
+ *
45
+ * Sequential by design: the executor processes one task at a time to avoid
46
+ * concurrent agent costs and write conflicts. We use a module-level `draining`
47
+ * flag to ensure only one drain loop runs even if multiple firings finish in
48
+ * close succession.
49
+ */
50
+ export async function drainQueue(): Promise<void> {
51
+ if (draining) return;
52
+ draining = true;
53
+ try {
54
+ // Loop until the queue is empty so a single drain cycle clears all
55
+ // collided tasks rather than only the next one.
56
+ while (true) {
57
+ const [nextQueued] = await db
58
+ .select({ id: tasks.id })
59
+ .from(tasks)
60
+ .where(
61
+ and(
62
+ eq(tasks.status, "queued"),
63
+ inArray(tasks.sourceType, ["scheduled", "heartbeat"])
64
+ )
65
+ )
66
+ .orderBy(asc(tasks.createdAt))
67
+ .limit(1);
68
+
69
+ if (!nextQueued) return;
70
+
71
+ console.log(`[scheduler] draining queue → executing task ${nextQueued.id}`);
72
+ try {
73
+ await executeTaskWithRuntime(nextQueued.id);
74
+ } catch (err) {
75
+ console.error(`[scheduler] drain task ${nextQueued.id} failed:`, err);
76
+ }
77
+
78
+ // Record health metrics for the schedule that owns this task (if any).
79
+ try {
80
+ const [taskRow] = await db
81
+ .select({ scheduleId: tasks.scheduleId })
82
+ .from(tasks)
83
+ .where(eq(tasks.id, nextQueued.id));
84
+ if (taskRow?.scheduleId) {
85
+ await recordFiringMetrics(taskRow.scheduleId, nextQueued.id);
86
+ }
87
+ } catch (err) {
88
+ console.error(`[scheduler] metrics recording failed for ${nextQueued.id}:`, err);
89
+ }
90
+ }
91
+ } finally {
92
+ draining = false;
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Build the turn-budget guidance header that prepends to schedule-spawned
98
+ * task descriptions. Reads `runtime.maxTurns` so the agent sees the same
99
+ * limit the runtime will enforce, and gives concrete batching guidance to
100
+ * head off per-item loop patterns that exhaust turns.
101
+ */
102
+ async function buildTurnBudgetHeader(): Promise<string> {
103
+ const raw = await getSetting(SETTINGS_KEYS.MAX_TURNS);
104
+ const maxTurns = raw ? Number.parseInt(raw, 10) || 50 : 50;
105
+ return [
106
+ `TURN BUDGET: You have ${maxTurns} turns maximum. Plan accordingly.`,
107
+ `IMPORTANT: Batch operations to minimize turns.`,
108
+ `- Use ONE web search with multiple keywords instead of per-item searches`,
109
+ `- Read multiple tables in a single turn when possible`,
110
+ `- Do NOT loop through items with individual tool calls`,
111
+ ``,
112
+ ``,
113
+ ].join("\n");
114
+ }
115
+
116
+ /**
117
+ * Detect a failure reason from a completed task by inspecting its result text.
118
+ * Used by recordFiringMetrics to surface meaningful causes (turn limit, timeout,
119
+ * generic) without needing additional schema columns on tasks.
120
+ */
121
+ function detectFailureReason(result: string | null): string {
122
+ if (!result) return "unknown";
123
+ const lower = result.toLowerCase();
124
+ if (lower.includes("turn") && (lower.includes("limit") || lower.includes("max"))) {
125
+ return "turn_limit_exceeded";
126
+ }
127
+ if (lower.includes("timeout") || lower.includes("timed out")) {
128
+ return "timeout";
129
+ }
130
+ if (lower.includes("budget")) return "budget_exceeded";
131
+ return "error";
132
+ }
133
+
134
+ /**
135
+ * Record per-firing health metrics on a schedule and auto-pause after
136
+ * 3 consecutive failures. Uses an exponential moving average for turn count
137
+ * so the metric reflects recent behavior more than ancient firings.
138
+ */
139
+ export async function recordFiringMetrics(
140
+ scheduleId: string,
141
+ taskId: string
142
+ ): Promise<void> {
143
+ const [task] = await db
144
+ .select({ status: tasks.status, result: tasks.result })
145
+ .from(tasks)
146
+ .where(eq(tasks.id, taskId));
147
+ if (!task) return;
148
+
149
+ const [schedule] = await db
150
+ .select()
151
+ .from(schedules)
152
+ .where(eq(schedules.id, scheduleId));
153
+ if (!schedule) return;
154
+
155
+ const turnCountResult = await db
156
+ .select({ count: sql<number>`count(*)` })
157
+ .from(agentLogs)
158
+ .where(eq(agentLogs.taskId, taskId));
159
+ const turns = Number(turnCountResult[0]?.count ?? 0);
160
+
161
+ const prevAvg = schedule.avgTurnsPerFiring ?? turns;
162
+ const newAvg = Math.round(prevAvg * 0.7 + turns * 0.3);
163
+
164
+ const isFailure = task.status === "failed";
165
+ const newStreak = isFailure ? (schedule.failureStreak ?? 0) + 1 : 0;
166
+ const shouldAutoPause = isFailure && newStreak >= 3 && schedule.status === "active";
167
+
168
+ await db
169
+ .update(schedules)
170
+ .set({
171
+ lastTurnCount: turns,
172
+ avgTurnsPerFiring: newAvg,
173
+ failureStreak: newStreak,
174
+ lastFailureReason: isFailure ? detectFailureReason(task.result) : null,
175
+ status: shouldAutoPause ? "paused" : schedule.status,
176
+ updatedAt: new Date(),
177
+ })
178
+ .where(eq(schedules.id, scheduleId));
179
+
180
+ if (shouldAutoPause) {
181
+ console.warn(
182
+ `[scheduler] auto-paused "${schedule.name}" after 3 consecutive failures`
183
+ );
184
+ }
185
+ }
32
186
 
33
187
  /**
34
188
  * Start the scheduler singleton. Safe to call multiple times — subsequent
@@ -162,13 +316,17 @@ async function fireSchedule(
162
316
  const taskId = crypto.randomUUID();
163
317
  const firingNumber = schedule.firingCount + 1;
164
318
 
319
+ // Prepend turn-budget guidance so the agent can plan batched tool calls
320
+ // instead of per-item loops that exhaust maxTurns mid-task.
321
+ const budgetHeader = await buildTurnBudgetHeader();
322
+
165
323
  await db.insert(tasks).values({
166
324
  id: taskId,
167
325
  projectId: schedule.projectId,
168
326
  workflowId: null,
169
327
  scheduleId: schedule.id,
170
328
  title: `${schedule.name} — firing #${firingNumber}`,
171
- description: schedule.prompt,
329
+ description: budgetHeader + schedule.prompt,
172
330
  status: "queued",
173
331
  assignedAgent: schedule.assignedAgent,
174
332
  agentProfile: schedule.agentProfile,
@@ -220,13 +378,19 @@ async function fireSchedule(
220
378
  })
221
379
  .where(eq(schedules.id, schedule.id));
222
380
 
223
- // Fire-and-forget task execution
224
- executeTaskWithRuntime(taskId).catch((err) => {
225
- console.error(
226
- `[scheduler] task execution failed for schedule ${schedule.id}, task ${taskId}:`,
227
- err
228
- );
229
- });
381
+ // Drain-aware task execution. We still don't await in fireSchedule (the
382
+ // poll loop must keep claiming other due schedules), but on completion we
383
+ // record metrics and trigger drainQueue() so any tasks queued by colliding
384
+ // schedules execute immediately instead of waiting for the next poll.
385
+ executeTaskWithRuntime(taskId)
386
+ .catch((err) => {
387
+ console.error(
388
+ `[scheduler] task execution failed for schedule ${schedule.id}, task ${taskId}:`,
389
+ err
390
+ );
391
+ })
392
+ .then(() => recordFiringMetrics(schedule.id, taskId).catch(() => {}))
393
+ .then(() => drainQueue().catch(() => {}));
230
394
 
231
395
  console.log(
232
396
  `[scheduler] fired schedule "${schedule.name}" → task ${taskId} (firing #${firingNumber})`
@@ -372,6 +536,12 @@ async function fireHeartbeat(
372
536
  console.error(`[scheduler] heartbeat evaluation failed for "${schedule.name}":`, err);
373
537
  }
374
538
 
539
+ // Record health metrics and trigger drain (fire-and-forget — we still need
540
+ // to finish heartbeat post-processing below before returning).
541
+ recordFiringMetrics(schedule.id, evalTaskId)
542
+ .catch(() => {})
543
+ .then(() => drainQueue().catch(() => {}));
544
+
375
545
  // 6. Read the completed task result
376
546
  const [evalTask] = await db
377
547
  .select({ result: tasks.result, status: tasks.status })
@@ -0,0 +1,141 @@
1
+ import { db } from "@/lib/db";
2
+ import { workflows } from "@/lib/db/schema";
3
+ import { eq } from "drizzle-orm";
4
+ import { buildPoolDocumentContext } from "@/lib/documents/context-builder";
5
+ import { getSetting } from "@/lib/settings/helpers";
6
+ import { WORKFLOW_STEP_MAX_BUDGET_USD } from "@/lib/constants/task-status";
7
+ import type { WorkflowDefinition, WorkflowStep } from "./types";
8
+
9
+ /** Rough token estimate: ~4 chars per token */
10
+ function estimateTokens(text: string): number {
11
+ return Math.ceil(text.length / 4);
12
+ }
13
+
14
+ /** Approximate cost per 1M input tokens by provider tier (conservative estimates) */
15
+ const COST_PER_MILLION_INPUT_TOKENS: Record<string, number> = {
16
+ fast: 0.25, // Haiku / GPT-mini tier
17
+ balanced: 3.0, // Sonnet / GPT-4.1 tier
18
+ best: 15.0, // Opus / GPT-5.4 tier
19
+ default: 3.0, // Conservative middle estimate
20
+ };
21
+
22
+ export interface StepCostEstimate {
23
+ stepId: string;
24
+ name: string;
25
+ estimatedInputTokens: number;
26
+ estimatedCostUsd: number;
27
+ budgetCapUsd: number;
28
+ }
29
+
30
+ export interface WorkflowCostEstimate {
31
+ steps: StepCostEstimate[];
32
+ totalEstimatedCostUsd: number;
33
+ totalBudgetCapUsd: number;
34
+ overBudget: boolean;
35
+ warnings: string[];
36
+ }
37
+
38
+ /**
39
+ * Resolve the effective budget cap for a workflow step.
40
+ *
41
+ * Precedence (highest wins):
42
+ * 1. step.budgetUsd (per-step override)
43
+ * 2. User setting: budget_max_cost_per_task
44
+ * 3. WORKFLOW_STEP_MAX_BUDGET_USD ($5)
45
+ * 4. DEFAULT_MAX_BUDGET_USD ($2)
46
+ */
47
+ export async function resolveStepBudget(step?: WorkflowStep): Promise<number> {
48
+ // Per-step override
49
+ if (step?.budgetUsd && step.budgetUsd > 0) {
50
+ return step.budgetUsd;
51
+ }
52
+
53
+ // User setting
54
+ const userBudget = await getSetting("budget_max_cost_per_task");
55
+ if (userBudget) {
56
+ const parsed = parseFloat(userBudget);
57
+ if (!isNaN(parsed) && parsed > 0) return parsed;
58
+ }
59
+
60
+ // Workflow step constant (was dead code — now wired)
61
+ return WORKFLOW_STEP_MAX_BUDGET_USD;
62
+ }
63
+
64
+ /**
65
+ * Pre-flight cost estimation for a workflow.
66
+ * Calculates expected token usage and cost per step based on document context size.
67
+ * Returns advisory estimate — does NOT block execution.
68
+ */
69
+ export async function estimateWorkflowCost(
70
+ workflowId: string
71
+ ): Promise<WorkflowCostEstimate> {
72
+ const [workflow] = await db
73
+ .select()
74
+ .from(workflows)
75
+ .where(eq(workflows.id, workflowId));
76
+
77
+ if (!workflow) {
78
+ return {
79
+ steps: [],
80
+ totalEstimatedCostUsd: 0,
81
+ totalBudgetCapUsd: 0,
82
+ overBudget: false,
83
+ warnings: ["Workflow not found"],
84
+ };
85
+ }
86
+
87
+ const definition: WorkflowDefinition = JSON.parse(workflow.definition);
88
+ const steps = definition.steps;
89
+ const warnings: string[] = [];
90
+
91
+ const stepEstimates: StepCostEstimate[] = [];
92
+ let totalCost = 0;
93
+ let totalBudget = 0;
94
+
95
+ for (const step of steps) {
96
+ // Get document context that would be injected for this step
97
+ const poolContext = await buildPoolDocumentContext(workflowId, step.id);
98
+ const promptTokens = estimateTokens(step.prompt);
99
+ const docTokens = poolContext ? estimateTokens(poolContext) : 0;
100
+ const totalInputTokens = promptTokens + docTokens;
101
+
102
+ // Estimate cost using balanced tier (conservative)
103
+ const costPerToken = COST_PER_MILLION_INPUT_TOKENS.default / 1_000_000;
104
+ // Input + estimated output (~50% of input)
105
+ const estimatedCost = totalInputTokens * costPerToken * 1.5;
106
+
107
+ const budgetCap = await resolveStepBudget(step);
108
+
109
+ stepEstimates.push({
110
+ stepId: step.id,
111
+ name: step.name,
112
+ estimatedInputTokens: totalInputTokens,
113
+ estimatedCostUsd: Math.round(estimatedCost * 10000) / 10000,
114
+ budgetCapUsd: budgetCap,
115
+ });
116
+
117
+ totalCost += estimatedCost;
118
+ totalBudget += budgetCap;
119
+
120
+ if (estimatedCost > budgetCap * 0.8) {
121
+ warnings.push(
122
+ `Step "${step.name}" estimated at $${estimatedCost.toFixed(4)} — close to or over the $${budgetCap} cap`
123
+ );
124
+ }
125
+ }
126
+
127
+ const overBudget = totalCost > totalBudget;
128
+ if (overBudget) {
129
+ warnings.push(
130
+ `Total estimated cost $${totalCost.toFixed(4)} exceeds combined budget cap $${totalBudget.toFixed(2)}`
131
+ );
132
+ }
133
+
134
+ return {
135
+ steps: stepEstimates,
136
+ totalEstimatedCostUsd: Math.round(totalCost * 10000) / 10000,
137
+ totalBudgetCapUsd: totalBudget,
138
+ overBudget,
139
+ warnings,
140
+ };
141
+ }