npm - stagent - Versions diffs - 0.9.3 → 0.9.6 - Mend

stagent 0.9.3 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/dist/cli.js +36 -1
package/docs/superpowers/specs/2026-04-06-workflow-intelligence-stack-design.md +388 -0
package/package.json +1 -1
package/src/app/api/license/route.ts +3 -2
package/src/app/api/workflows/[id]/debug/route.ts +18 -0
package/src/app/api/workflows/[id]/execute/route.ts +39 -8
package/src/app/api/workflows/optimize/route.ts +30 -0
package/src/app/layout.tsx +4 -2
package/src/components/chat/chat-message-markdown.tsx +78 -3
package/src/components/chat/chat-message.tsx +12 -4
package/src/components/settings/cloud-account-section.tsx +14 -12
package/src/components/workflows/error-timeline.tsx +83 -0
package/src/components/workflows/step-live-metrics.tsx +182 -0
package/src/components/workflows/step-progress-bar.tsx +77 -0
package/src/components/workflows/workflow-debug-panel.tsx +192 -0
package/src/components/workflows/workflow-optimizer-panel.tsx +227 -0
package/src/lib/agents/claude-agent.ts +4 -4
package/src/lib/agents/runtime/anthropic-direct.ts +3 -3
package/src/lib/agents/runtime/catalog.ts +30 -1
package/src/lib/agents/runtime/openai-direct.ts +3 -3
package/src/lib/billing/products.ts +6 -6
package/src/lib/book/chapter-mapping.ts +6 -0
package/src/lib/book/content.ts +10 -0
package/src/lib/book/reading-paths.ts +1 -1
package/src/lib/chat/__tests__/engine-stream-helpers.test.ts +57 -0
package/src/lib/chat/engine.ts +68 -7
package/src/lib/chat/stagent-tools.ts +2 -0
package/src/lib/chat/tools/runtime-tools.ts +28 -0
package/src/lib/chat/tools/schedule-tools.ts +44 -1
package/src/lib/chat/tools/settings-tools.ts +40 -10
package/src/lib/chat/tools/workflow-tools.ts +93 -4
package/src/lib/chat/types.ts +21 -0
package/src/lib/data/clear.ts +3 -0
package/src/lib/db/bootstrap.ts +38 -0
package/src/lib/db/migrations/0022_workflow_intelligence_phase1.sql +5 -0
package/src/lib/db/migrations/0023_add_execution_stats.sql +15 -0
package/src/lib/db/schema.ts +41 -1
package/src/lib/license/__tests__/manager.test.ts +64 -0
package/src/lib/license/manager.ts +80 -25
package/src/lib/schedules/__tests__/interval-parser.test.ts +87 -0
package/src/lib/schedules/__tests__/prompt-analyzer.test.ts +51 -0
package/src/lib/schedules/interval-parser.ts +187 -0
package/src/lib/schedules/prompt-analyzer.ts +87 -0
package/src/lib/schedules/scheduler.ts +179 -9
package/src/lib/workflows/cost-estimator.ts +141 -0
package/src/lib/workflows/engine.ts +245 -45
package/src/lib/workflows/error-analysis.ts +249 -0
package/src/lib/workflows/execution-stats.ts +252 -0
package/src/lib/workflows/optimizer.ts +193 -0
package/src/lib/workflows/types.ts +6 -0

package/src/lib/schedules/scheduler.ts CHANGED Viewed

@@ -13,9 +13,11 @@
 import { db } from "@/lib/db";
 import { schedules, tasks, agentLogs, scheduleDocumentInputs, documents } from "@/lib/db/schema";
-import { eq, and, lte, inArray, sql } from "drizzle-orm";
+import { eq, and, lte, inArray, sql, asc } from "drizzle-orm";
 import { computeNextFireTime } from "./interval-parser";
 import { executeTaskWithRuntime } from "@/lib/agents/runtime";
+import { getSetting } from "@/lib/settings/helpers";
+import { SETTINGS_KEYS } from "@/lib/constants/settings";
 import { checkActiveHours } from "./active-hours";
 import {
   buildHeartbeatPrompt,
@@ -29,6 +31,158 @@ import { processHandoffs } from "@/lib/agents/handoff/bus";
 const POLL_INTERVAL_MS = 60_000; // 60 seconds
 let intervalHandle: ReturnType<typeof setInterval> | null = null;
+let draining = false;
+/**
+ * Drain queued schedule/heartbeat tasks after a firing completes.
+ *
+ * Background: schedule firings used to be fire-and-forget. When multiple
+ * schedules collided on the same minute (e.g. three `*/30 * * * *` schedules
+ * all firing at :00), one task would execute and the others would sit in
+ * "queued" until the next poll cycle 30+ minutes later. This drain hook walks
+ * the queue immediately on completion so collisions resolve in seconds.
+ *
+ * Sequential by design: the executor processes one task at a time to avoid
+ * concurrent agent costs and write conflicts. We use a module-level `draining`
+ * flag to ensure only one drain loop runs even if multiple firings finish in
+ * close succession.
+ */
+export async function drainQueue(): Promise<void> {
+  if (draining) return;
+  draining = true;
+  try {
+    // Loop until the queue is empty so a single drain cycle clears all
+    // collided tasks rather than only the next one.
+    while (true) {
+      const [nextQueued] = await db
+        .select({ id: tasks.id })
+        .from(tasks)
+        .where(
+          and(
+            eq(tasks.status, "queued"),
+            inArray(tasks.sourceType, ["scheduled", "heartbeat"])
+          )
+        )
+        .orderBy(asc(tasks.createdAt))
+        .limit(1);
+      if (!nextQueued) return;
+      console.log(`[scheduler] draining queue → executing task ${nextQueued.id}`);
+      try {
+        await executeTaskWithRuntime(nextQueued.id);
+      } catch (err) {
+        console.error(`[scheduler] drain task ${nextQueued.id} failed:`, err);
+      }
+      // Record health metrics for the schedule that owns this task (if any).
+      try {
+        const [taskRow] = await db
+          .select({ scheduleId: tasks.scheduleId })
+          .from(tasks)
+          .where(eq(tasks.id, nextQueued.id));
+        if (taskRow?.scheduleId) {
+          await recordFiringMetrics(taskRow.scheduleId, nextQueued.id);
+        }
+      } catch (err) {
+        console.error(`[scheduler] metrics recording failed for ${nextQueued.id}:`, err);
+      }
+    }
+  } finally {
+    draining = false;
+  }
+}
+/**
+ * Build the turn-budget guidance header that prepends to schedule-spawned
+ * task descriptions. Reads `runtime.maxTurns` so the agent sees the same
+ * limit the runtime will enforce, and gives concrete batching guidance to
+ * head off per-item loop patterns that exhaust turns.
+ */
+async function buildTurnBudgetHeader(): Promise<string> {
+  const raw = await getSetting(SETTINGS_KEYS.MAX_TURNS);
+  const maxTurns = raw ? Number.parseInt(raw, 10) || 50 : 50;
+  return [
+    `TURN BUDGET: You have ${maxTurns} turns maximum. Plan accordingly.`,
+    `IMPORTANT: Batch operations to minimize turns.`,
+    `- Use ONE web search with multiple keywords instead of per-item searches`,
+    `- Read multiple tables in a single turn when possible`,
+    `- Do NOT loop through items with individual tool calls`,
+    ``,
+    ``,
+  ].join("\n");
+}
+/**
+ * Detect a failure reason from a completed task by inspecting its result text.
+ * Used by recordFiringMetrics to surface meaningful causes (turn limit, timeout,
+ * generic) without needing additional schema columns on tasks.
+ */
+function detectFailureReason(result: string | null): string {
+  if (!result) return "unknown";
+  const lower = result.toLowerCase();
+  if (lower.includes("turn") && (lower.includes("limit") || lower.includes("max"))) {
+    return "turn_limit_exceeded";
+  }
+  if (lower.includes("timeout") || lower.includes("timed out")) {
+    return "timeout";
+  }
+  if (lower.includes("budget")) return "budget_exceeded";
+  return "error";
+}
+/**
+ * Record per-firing health metrics on a schedule and auto-pause after
+ * 3 consecutive failures. Uses an exponential moving average for turn count
+ * so the metric reflects recent behavior more than ancient firings.
+ */
+export async function recordFiringMetrics(
+  scheduleId: string,
+  taskId: string
+): Promise<void> {
+  const [task] = await db
+    .select({ status: tasks.status, result: tasks.result })
+    .from(tasks)
+    .where(eq(tasks.id, taskId));
+  if (!task) return;
+  const [schedule] = await db
+    .select()
+    .from(schedules)
+    .where(eq(schedules.id, scheduleId));
+  if (!schedule) return;
+  const turnCountResult = await db
+    .select({ count: sql<number>`count(*)` })
+    .from(agentLogs)
+    .where(eq(agentLogs.taskId, taskId));
+  const turns = Number(turnCountResult[0]?.count ?? 0);
+  const prevAvg = schedule.avgTurnsPerFiring ?? turns;
+  const newAvg = Math.round(prevAvg * 0.7 + turns * 0.3);
+  const isFailure = task.status === "failed";
+  const newStreak = isFailure ? (schedule.failureStreak ?? 0) + 1 : 0;
+  const shouldAutoPause = isFailure && newStreak >= 3 && schedule.status === "active";
+  await db
+    .update(schedules)
+    .set({
+      lastTurnCount: turns,
+      avgTurnsPerFiring: newAvg,
+      failureStreak: newStreak,
+      lastFailureReason: isFailure ? detectFailureReason(task.result) : null,
+      status: shouldAutoPause ? "paused" : schedule.status,
+      updatedAt: new Date(),
+    })
+    .where(eq(schedules.id, scheduleId));
+  if (shouldAutoPause) {
+    console.warn(
+      `[scheduler] auto-paused "${schedule.name}" after 3 consecutive failures`
+    );
+  }
+}
 /**
  * Start the scheduler singleton. Safe to call multiple times — subsequent
@@ -162,13 +316,17 @@ async function fireSchedule(
   const taskId = crypto.randomUUID();
   const firingNumber = schedule.firingCount + 1;
+  // Prepend turn-budget guidance so the agent can plan batched tool calls
+  // instead of per-item loops that exhaust maxTurns mid-task.
+  const budgetHeader = await buildTurnBudgetHeader();
   await db.insert(tasks).values({
     id: taskId,
     projectId: schedule.projectId,
     workflowId: null,
     scheduleId: schedule.id,
     title: `${schedule.name} — firing #${firingNumber}`,
-    description: schedule.prompt,
+    description: budgetHeader + schedule.prompt,
     status: "queued",
     assignedAgent: schedule.assignedAgent,
     agentProfile: schedule.agentProfile,
@@ -220,13 +378,19 @@ async function fireSchedule(
     })
     .where(eq(schedules.id, schedule.id));
-  // Fire-and-forget task execution
-  executeTaskWithRuntime(taskId).catch((err) => {
-    console.error(
-      `[scheduler] task execution failed for schedule ${schedule.id}, task ${taskId}:`,
-      err
-    );
-  });
+  // Drain-aware task execution. We still don't await in fireSchedule (the
+  // poll loop must keep claiming other due schedules), but on completion we
+  // record metrics and trigger drainQueue() so any tasks queued by colliding
+  // schedules execute immediately instead of waiting for the next poll.
+  executeTaskWithRuntime(taskId)
+    .catch((err) => {
+      console.error(
+        `[scheduler] task execution failed for schedule ${schedule.id}, task ${taskId}:`,
+        err
+      );
+    })
+    .then(() => recordFiringMetrics(schedule.id, taskId).catch(() => {}))
+    .then(() => drainQueue().catch(() => {}));
   console.log(
     `[scheduler] fired schedule "${schedule.name}" → task ${taskId} (firing #${firingNumber})`
@@ -372,6 +536,12 @@ async function fireHeartbeat(
     console.error(`[scheduler] heartbeat evaluation failed for "${schedule.name}":`, err);
   }
+  // Record health metrics and trigger drain (fire-and-forget — we still need
+  // to finish heartbeat post-processing below before returning).
+  recordFiringMetrics(schedule.id, evalTaskId)
+    .catch(() => {})
+    .then(() => drainQueue().catch(() => {}));
   // 6. Read the completed task result
   const [evalTask] = await db
     .select({ result: tasks.result, status: tasks.status })

package/src/lib/workflows/cost-estimator.ts ADDED Viewed

@@ -0,0 +1,141 @@
+import { db } from "@/lib/db";
+import { workflows } from "@/lib/db/schema";
+import { eq } from "drizzle-orm";
+import { buildPoolDocumentContext } from "@/lib/documents/context-builder";
+import { getSetting } from "@/lib/settings/helpers";
+import { WORKFLOW_STEP_MAX_BUDGET_USD } from "@/lib/constants/task-status";
+import type { WorkflowDefinition, WorkflowStep } from "./types";
+/** Rough token estimate: ~4 chars per token */
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+/** Approximate cost per 1M input tokens by provider tier (conservative estimates) */
+const COST_PER_MILLION_INPUT_TOKENS: Record<string, number> = {
+  fast: 0.25,     // Haiku / GPT-mini tier
+  balanced: 3.0,  // Sonnet / GPT-4.1 tier
+  best: 15.0,     // Opus / GPT-5.4 tier
+  default: 3.0,   // Conservative middle estimate
+};
+export interface StepCostEstimate {
+  stepId: string;
+  name: string;
+  estimatedInputTokens: number;
+  estimatedCostUsd: number;
+  budgetCapUsd: number;
+}
+export interface WorkflowCostEstimate {
+  steps: StepCostEstimate[];
+  totalEstimatedCostUsd: number;
+  totalBudgetCapUsd: number;
+  overBudget: boolean;
+  warnings: string[];
+}
+/**
+ * Resolve the effective budget cap for a workflow step.
+ *
+ * Precedence (highest wins):
+ *   1. step.budgetUsd (per-step override)
+ *   2. User setting: budget_max_cost_per_task
+ *   3. WORKFLOW_STEP_MAX_BUDGET_USD ($5)
+ *   4. DEFAULT_MAX_BUDGET_USD ($2)
+ */
+export async function resolveStepBudget(step?: WorkflowStep): Promise<number> {
+  // Per-step override
+  if (step?.budgetUsd && step.budgetUsd > 0) {
+    return step.budgetUsd;
+  }
+  // User setting
+  const userBudget = await getSetting("budget_max_cost_per_task");
+  if (userBudget) {
+    const parsed = parseFloat(userBudget);
+    if (!isNaN(parsed) && parsed > 0) return parsed;
+  }
+  // Workflow step constant (was dead code — now wired)
+  return WORKFLOW_STEP_MAX_BUDGET_USD;
+}
+/**
+ * Pre-flight cost estimation for a workflow.
+ * Calculates expected token usage and cost per step based on document context size.
+ * Returns advisory estimate — does NOT block execution.
+ */
+export async function estimateWorkflowCost(
+  workflowId: string
+): Promise<WorkflowCostEstimate> {
+  const [workflow] = await db
+    .select()
+    .from(workflows)
+    .where(eq(workflows.id, workflowId));
+  if (!workflow) {
+    return {
+      steps: [],
+      totalEstimatedCostUsd: 0,
+      totalBudgetCapUsd: 0,
+      overBudget: false,
+      warnings: ["Workflow not found"],
+    };
+  }
+  const definition: WorkflowDefinition = JSON.parse(workflow.definition);
+  const steps = definition.steps;
+  const warnings: string[] = [];
+  const stepEstimates: StepCostEstimate[] = [];
+  let totalCost = 0;
+  let totalBudget = 0;
+  for (const step of steps) {
+    // Get document context that would be injected for this step
+    const poolContext = await buildPoolDocumentContext(workflowId, step.id);
+    const promptTokens = estimateTokens(step.prompt);
+    const docTokens = poolContext ? estimateTokens(poolContext) : 0;
+    const totalInputTokens = promptTokens + docTokens;
+    // Estimate cost using balanced tier (conservative)
+    const costPerToken = COST_PER_MILLION_INPUT_TOKENS.default / 1_000_000;
+    // Input + estimated output (~50% of input)
+    const estimatedCost = totalInputTokens * costPerToken * 1.5;
+    const budgetCap = await resolveStepBudget(step);
+    stepEstimates.push({
+      stepId: step.id,
+      name: step.name,
+      estimatedInputTokens: totalInputTokens,
+      estimatedCostUsd: Math.round(estimatedCost * 10000) / 10000,
+      budgetCapUsd: budgetCap,
+    });
+    totalCost += estimatedCost;
+    totalBudget += budgetCap;
+    if (estimatedCost > budgetCap * 0.8) {
+      warnings.push(
+        `Step "${step.name}" estimated at $${estimatedCost.toFixed(4)} — close to or over the $${budgetCap} cap`
+      );
+    }
+  }
+  const overBudget = totalCost > totalBudget;
+  if (overBudget) {
+    warnings.push(
+      `Total estimated cost $${totalCost.toFixed(4)} exceeds combined budget cap $${totalBudget.toFixed(2)}`
+    );
+  }
+  return {
+    steps: stepEstimates,
+    totalEstimatedCostUsd: Math.round(totalCost * 10000) / 10000,
+    totalBudgetCapUsd: totalBudget,
+    overBudget,
+    warnings,
+  };
+}