npm - @desplega.ai/agent-swarm - Versions diffs - 1.79.4 → 1.80.1 - Mend

@desplega.ai/agent-swarm 1.79.4 → 1.80.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/openapi.json +496 -32
package/package.json +14 -6
package/src/artifact-sdk/server.ts +2 -1
package/src/be/db.ts +102 -31
package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
package/src/be/migrations/064_scripts.sql +39 -0
package/src/be/migrations/065_script_embeddings.sql +7 -0
package/src/be/pricing-normalize.ts +81 -0
package/src/be/scripts/db.ts +391 -0
package/src/be/scripts/embeddings.ts +231 -0
package/src/be/scripts/maintenance.ts +9 -0
package/src/be/scripts/typecheck.ts +193 -0
package/src/be/seed-pricing.ts +293 -0
package/src/cli.tsx +22 -5
package/src/commands/artifact.ts +3 -2
package/src/commands/claude-managed-setup.ts +21 -4
package/src/commands/codex-login.ts +5 -3
package/src/commands/onboard.tsx +2 -1
package/src/commands/runner.ts +663 -246
package/src/commands/setup.tsx +5 -3
package/src/hooks/hook.ts +4 -3
package/src/http/context.ts +6 -2
package/src/http/index.ts +126 -68
package/src/http/memory.ts +28 -0
package/src/http/openapi.ts +1 -0
package/src/http/page-proxy.ts +2 -1
package/src/http/route-def.ts +1 -0
package/src/http/schedules.ts +37 -0
package/src/http/scripts.ts +381 -0
package/src/http/session-data.ts +74 -23
package/src/linear/outbound.ts +9 -2
package/src/otel-impl.ts +200 -0
package/src/otel.ts +132 -0
package/src/providers/claude-adapter.ts +52 -6
package/src/providers/claude-managed-adapter.ts +43 -17
package/src/providers/claude-managed-pricing.ts +34 -0
package/src/providers/codex-adapter.ts +38 -27
package/src/providers/codex-models.ts +22 -3
package/src/providers/devin-adapter.ts +11 -0
package/src/providers/opencode-adapter.ts +31 -7
package/src/providers/pi-mono-adapter.ts +39 -7
package/src/providers/pricing-sources.md +52 -0
package/src/providers/swarm-events-shared.ts +8 -4
package/src/providers/types.ts +33 -10
package/src/scripts-runtime/ctx.ts +23 -0
package/src/scripts-runtime/eval-harness.ts +39 -0
package/src/scripts-runtime/executors/native.ts +229 -0
package/src/scripts-runtime/executors/registry.ts +16 -0
package/src/scripts-runtime/executors/types.ts +63 -0
package/src/scripts-runtime/extract-signature.ts +81 -0
package/src/scripts-runtime/import-allowlist.ts +109 -0
package/src/scripts-runtime/loader.ts +96 -0
package/src/scripts-runtime/redacted.ts +48 -0
package/src/scripts-runtime/sdk-allowlist.ts +29 -0
package/src/scripts-runtime/stdlib/fetch.ts +46 -0
package/src/scripts-runtime/stdlib/glob.ts +8 -0
package/src/scripts-runtime/stdlib/grep.ts +34 -0
package/src/scripts-runtime/stdlib/index.ts +16 -0
package/src/scripts-runtime/stdlib/table.ts +17 -0
package/src/scripts-runtime/swarm-config.ts +35 -0
package/src/scripts-runtime/swarm-sdk.ts +197 -0
package/src/scripts-runtime/types/stdlib.d.ts +104 -0
package/src/scripts-runtime/types/swarm-sdk.d.ts +86 -0
package/src/server.ts +18 -0
package/src/tests/api-key.test.ts +33 -0
package/src/tests/claude-managed-adapter.test.ts +17 -3
package/src/tests/claude-managed-setup.test.ts +10 -1
package/src/tests/codex-adapter.test.ts +20 -19
package/src/tests/codex-login.test.ts +1 -1
package/src/tests/context-snapshot.test.ts +2 -2
package/src/tests/context-window.test.ts +65 -1
package/src/tests/devin-adapter.test.ts +2 -0
package/src/tests/http/context-routes.test.ts +161 -0
package/src/tests/linear-outbound-sync.test.ts +109 -0
package/src/tests/mcp-tools.test.ts +69 -0
package/src/tests/migration-063-schema-relax.test.ts +109 -0
package/src/tests/opencode-adapter.test.ts +146 -1
package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
package/src/tests/pages-view-count.test.ts +30 -5
package/src/tests/providers/codex-cost.test.ts +18 -0
package/src/tests/providers/opencode-cost.test.ts +74 -0
package/src/tests/providers/pi-cost.test.ts +128 -0
package/src/tests/redacted.test.ts +29 -0
package/src/tests/runner-tool-spans.test.ts +268 -0
package/src/tests/script-executor-conformance.test.ts +142 -0
package/src/tests/script-executor-registry.test.ts +17 -0
package/src/tests/scripts-db.test.ts +329 -0
package/src/tests/scripts-embeddings.test.ts +291 -0
package/src/tests/scripts-extract-signature.test.ts +47 -0
package/src/tests/scripts-http.test.ts +350 -0
package/src/tests/scripts-import-allowlist.test.ts +55 -0
package/src/tests/scripts-mcp-e2e.test.ts +269 -0
package/src/tests/scripts-runtime-secret-egress.test.ts +44 -0
package/src/tests/scripts-runtime.test.ts +289 -0
package/src/tests/sdk-allowlist.test.ts +59 -0
package/src/tests/secret-scrubber.test.ts +54 -1
package/src/tests/session-costs-codex-recompute.test.ts +35 -22
package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
package/src/tests/store-progress-cost.test.ts +6 -1
package/src/tests/swarm-config.test.ts +38 -0
package/src/tests/tool-annotations.test.ts +2 -2
package/src/tests/tool-call-progress.test.ts +30 -0
package/src/tests/workflow-e2e.test.ts +218 -0
package/src/tests/workflow-executors.test.ts +32 -2
package/src/tests/workflow-input-redaction.test.ts +232 -0
package/src/tests/workflow-swarm-script.test.ts +273 -0
package/src/tools/memory-rate.ts +2 -1
package/src/tools/script-common.ts +88 -0
package/src/tools/script-delete.ts +35 -0
package/src/tools/script-query-types.ts +37 -0
package/src/tools/script-run.ts +43 -0
package/src/tools/script-search.ts +32 -0
package/src/tools/script-upsert.ts +43 -0
package/src/tools/store-progress.ts +16 -60
package/src/tools/tool-config.ts +7 -0
package/src/tools/utils.ts +65 -12
package/src/types.ts +122 -10
package/src/utils/api-key.ts +28 -0
package/src/utils/context-window.ts +104 -4
package/src/utils/page-session.ts +8 -6
package/src/utils/secret-scrubber.ts +29 -1
package/src/workflows/engine.ts +12 -4
package/src/workflows/executors/index.ts +1 -0
package/src/workflows/executors/registry.ts +2 -0
package/src/workflows/executors/script.ts +12 -1
package/src/workflows/executors/swarm-script.ts +170 -0
package/src/workflows/input.ts +65 -0
package/src/workflows/recovery.ts +31 -3
package/src/workflows/resume.ts +43 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@desplega.ai/agent-swarm",
-  "version": "1.79.4",
+  "version": "1.80.1",
   "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
   "license": "MIT",
   "author": "desplega.sh <contact@desplega.sh>",
@@ -42,8 +42,10 @@
   },
   "scripts": {
     "build:pi-skills": "bun run plugin/build-pi-skills.ts",
+    "build:script-types": "bun run scripts/bundle-script-types.ts",
     "tsc:check": "bun tsc --noEmit",
     "check:db-boundary": "bash scripts/check-db-boundary.sh",
+    "check:api-key-boundary": "bash scripts/check-api-key-boundary.sh",
     "cli": "bun src/cli.tsx",
     "hook": "bun src/hooks/hook.ts",
     "claude": "bun src/cli.tsx claude",
@@ -73,6 +75,7 @@
     "deploy:docker": "bun deploy/docker-push.ts",
     "e2e:workflows": "bun scripts/e2e-workflow-test.ts",
     "e2e:workflows:docker": "bun scripts/e2e-workflow-test.ts --with-docker",
+    "e2e:otel:jaeger": "bun scripts/e2e-otel-jaeger.ts",
     "docs:mcp": "bun scripts/generate-mcp-docs.ts",
     "docs:openapi": "bun scripts/generate-openapi.ts",
     "docs:business-use": "bun scripts/generate-business-use-docs.ts",
@@ -104,13 +107,18 @@
     "@desplega.ai/localtunnel": "^2.2.0",
     "@inkjs/ui": "^2.0.0",
     "@linear/sdk": "^77.0.0",
-    "@earendil-works/pi-agent-core": "^0.74.0",
-    "@earendil-works/pi-ai": "^0.74.0",
-    "@earendil-works/pi-coding-agent": "^0.74.0",
+    "@earendil-works/pi-agent-core": "^0.75.3",
+    "@earendil-works/pi-ai": "^0.75.3",
+    "@earendil-works/pi-coding-agent": "^0.75.3",
     "@modelcontextprotocol/sdk": "^1.25.1",
-    "@openai/codex-sdk": "^0.128.0",
-    "@opencode-ai/sdk": "^1.14.30",
+    "@openai/codex-sdk": "^0.130.0",
+    "@opencode-ai/sdk": "^1.15.4",
     "@openfort/openfort-node": "^0.9.1",
+    "@opentelemetry/api": "^1.9.1",
+    "@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
+    "@opentelemetry/resources": "^2.7.1",
+    "@opentelemetry/sdk-node": "^0.218.0",
+    "@opentelemetry/semantic-conventions": "^1.41.1",
     "@slack/bolt": "^4.6.0",
     "@types/react": "^19.2.7",
     "@x402/core": "^2.5.0",

package/src/artifact-sdk/server.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { Hono } from "hono";
 import { serveStatic } from "hono/bun";
+import { getApiKey } from "../utils/api-key";
 import { BROWSER_SDK_JS } from "./browser-sdk";
 import { getAvailablePort } from "./port";
 import { createTunnel } from "./tunnel";
@@ -23,7 +24,7 @@ export interface ArtifactServer {
 export function createArtifactServer(opts: ArtifactServerOptions): ArtifactServer {
   const agentId = process.env.AGENT_ID || "unknown";
-  const apiKey = process.env.API_KEY || "";
+  const apiKey = getApiKey();
   const mcpBaseUrl = process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
   const app = new Hono();

package/src/be/db.ts CHANGED Viewed

@@ -362,7 +362,7 @@ function ensureAgentProfileColumns(database: Database): void {
   }
 }
-function computeContentHash(content: string): string {
+export function computeContentHash(content: string): string {
   const hasher = new Bun.CryptoHasher("sha256");
   hasher.update(content);
   return hasher.digest("hex");
@@ -980,7 +980,7 @@ type AgentTaskRow = {
   progress: string | null;
   compactionCount: number | null;
   peakContextPercent: number | null;
-  totalContextTokensUsed: number | null;
+  peakContextTokens: number | null;
   contextWindowSize: number | null;
   was_paused: number;
   credentialKeySuffix: string | null;
@@ -1036,7 +1036,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
     contextKey: row.contextKey ?? undefined,
     compactionCount: row.compactionCount ?? undefined,
     peakContextPercent: row.peakContextPercent ?? undefined,
-    totalContextTokensUsed: row.totalContextTokensUsed ?? undefined,
+    peakContextTokens: row.peakContextTokens ?? undefined,
     contextWindowSize: row.contextWindowSize ?? undefined,
     createdAt: row.createdAt,
     lastUpdatedAt: row.lastUpdatedAt,
@@ -3761,8 +3761,11 @@ type SessionCostRow = {
   outputTokens: number;
   cacheReadTokens: number;
   cacheWriteTokens: number;
+  // Migration 063 additions:
+  reasoningOutputTokens: number;
+  thinkingTokens: number;
   durationMs: number;
-  numTurns: number;
+  numTurns: number | null;
   model: string;
   isError: number;
   costSource: string;
@@ -3780,6 +3783,8 @@ function rowToSessionCost(row: SessionCostRow): SessionCost {
     outputTokens: row.outputTokens,
     cacheReadTokens: row.cacheReadTokens,
     cacheWriteTokens: row.cacheWriteTokens,
+    reasoningOutputTokens: row.reasoningOutputTokens ?? 0,
+    thinkingTokens: row.thinkingTokens ?? 0,
     durationMs: row.durationMs,
     numTurns: row.numTurns,
     model: row.model,
@@ -3803,15 +3808,24 @@ const sessionCostQueries = {
         number,
         number,
         number,
-        number,
-        number,
-        string,
-        number,
-        string,
+        number, // reasoningOutputTokens
+        number, // thinkingTokens
+        number, // durationMs
+        number | null, // numTurns
+        string, // model
+        number, // isError
+        string, // costSource
       ]
     >(
-      `INSERT INTO session_costs (id, sessionId, taskId, agentId, totalCostUsd, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, durationMs, numTurns, model, isError, costSource, createdAt)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))`,
+      `INSERT INTO session_costs (
+         id, sessionId, taskId, agentId,
+         totalCostUsd, inputTokens, outputTokens,
+         cacheReadTokens, cacheWriteTokens,
+         reasoningOutputTokens, thinkingTokens,
+         durationMs, numTurns, model, isError,
+         costSource, createdAt
+       )
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))`,
     ),
   getByTaskId: () =>
@@ -3839,16 +3853,22 @@ export interface CreateSessionCostInput {
   outputTokens?: number;
   cacheReadTokens?: number;
   cacheWriteTokens?: number;
+  // Migration 063 additions — adapters that have these numbers should pass
+  // them; defaulting to 0 preserves the old write shape for callers that don't.
+  reasoningOutputTokens?: number;
+  thinkingTokens?: number;
   durationMs: number;
-  numTurns: number;
+  // Nullable: some adapters (claude when num_turns is absent) can't honestly
+  // report a turn count; we prefer null over a faked 1.
+  numTurns: number | null;
   model: string;
   isError?: boolean;
   /**
-   * Phase 6: where the recorded `totalCostUsd` came from.
+   * Phase 6 (migration 063 added 'unpriced'): where `totalCostUsd` came from.
    *  - 'harness'        — value reported by the harness as-is (default).
-   *  - 'pricing-table'  — value recomputed by the API from `pricing` rows
-   *                       (Codex when DB pricing rows exist for all three
-   *                       token classes).
+   *  - 'pricing-table'  — value recomputed by the API from `pricing` rows.
+   *  - 'unpriced'       — recompute attempted but no matching pricing rows;
+   *                       `totalCostUsd` is whatever the worker submitted.
    */
   costSource?: SessionCostSource;
 }
@@ -3856,6 +3876,8 @@ export interface CreateSessionCostInput {
 export function createSessionCost(input: CreateSessionCostInput): SessionCost {
   const id = crypto.randomUUID();
   const costSource: SessionCostSource = input.costSource ?? "harness";
+  const reasoningOutputTokens = input.reasoningOutputTokens ?? 0;
+  const thinkingTokens = input.thinkingTokens ?? 0;
   sessionCostQueries
     .insert()
     .run(
@@ -3868,6 +3890,8 @@ export function createSessionCost(input: CreateSessionCostInput): SessionCost {
       input.outputTokens ?? 0,
       input.cacheReadTokens ?? 0,
       input.cacheWriteTokens ?? 0,
+      reasoningOutputTokens,
+      thinkingTokens,
       input.durationMs,
       input.numTurns,
       input.model,
@@ -3885,6 +3909,8 @@ export function createSessionCost(input: CreateSessionCostInput): SessionCost {
     outputTokens: input.outputTokens ?? 0,
     cacheReadTokens: input.cacheReadTokens ?? 0,
     cacheWriteTokens: input.cacheWriteTokens ?? 0,
+    reasoningOutputTokens,
+    thinkingTokens,
     durationMs: input.durationMs,
     numTurns: input.numTurns,
     model: input.model,
@@ -4110,16 +4136,33 @@ export interface DashboardCostSummary {
 }
 export function getDashboardCostSummary(): DashboardCostSummary {
+  // Phase 13: compute the date boundaries in TS and pass them as ISO 8601
+  // strings. `session_costs.createdAt` is a TEXT ISO 8601 column; lexicographic
+  // comparison on ISO 8601 sorts correctly, so the comparison works as long
+  // as both sides are the same shape. The old code compared an ISO string
+  // (`2026-05-15T03:45:12.123Z`) against `date('now')` (which returns the
+  // string `2026-05-15`) — lexicographically `2026-05-15T...` > `2026-05-15`,
+  // so post-midnight rows correctly counted, BUT rows whose ISO began with
+  // the EXACT bare-date string would fail the `>=` check inconsistently
+  // depending on millisecond precision. Use a proper ISO-millisecond boundary
+  // for both halves so the comparison is unambiguous.
+  const now = new Date();
+  const startOfDayUtc = new Date(
+    Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()),
+  ).toISOString();
+  const startOfMonthUtc = new Date(
+    Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), 1),
+  ).toISOString();
   type CostRow = { costToday: number; costMtd: number };
   const row = getDb()
-    .prepare<CostRow, []>(
+    .prepare<CostRow, [string, string]>(
       `SELECT
-        COALESCE(SUM(CASE WHEN createdAt >= date('now') THEN totalCostUsd ELSE 0 END), 0) as costToday,
+        COALESCE(SUM(CASE WHEN createdAt >= ? THEN totalCostUsd ELSE 0 END), 0) as costToday,
         COALESCE(SUM(totalCostUsd), 0) as costMtd
       FROM session_costs
-      WHERE createdAt >= date('now', 'start of month')`,
+      WHERE createdAt >= ?`,
     )
-    .get();
+    .get(startOfDayUtc, startOfMonthUtc);
   return row ?? { costToday: 0, costMtd: 0 };
 }
@@ -8245,6 +8288,8 @@ type ContextSnapshotRow = {
   preCompactTokens: number | null;
   cumulativeInputTokens: number;
   cumulativeOutputTokens: number;
+  // Migration 063 — see ContextFormulaSchema in src/types.ts for the value set.
+  contextFormula: string | null;
   createdAt: string;
 };
@@ -8258,10 +8303,11 @@ function rowToContextSnapshot(row: ContextSnapshotRow): ContextSnapshot {
     contextTotalTokens: row.contextTotalTokens ?? undefined,
     contextPercent: row.contextPercent ?? undefined,
     eventType: row.eventType,
-    compactTrigger: (row.compactTrigger as "auto" | "manual" | null) ?? undefined,
+    compactTrigger: (row.compactTrigger as "auto" | "manual" | "auto-inferred" | null) ?? undefined,
     preCompactTokens: row.preCompactTokens ?? undefined,
     cumulativeInputTokens: row.cumulativeInputTokens,
     cumulativeOutputTokens: row.cumulativeOutputTokens,
+    contextFormula: (row.contextFormula as ContextSnapshot["contextFormula"]) ?? undefined,
     createdAt: row.createdAt,
   };
 }
@@ -8283,11 +8329,12 @@ const contextSnapshotQueries = {
         number | null,
         number,
         number,
+        string | null, // contextFormula (migration 063)
         string,
       ]
     >(
-      `INSERT INTO task_context_snapshots (id, taskId, agentId, sessionId, contextUsedTokens, contextTotalTokens, contextPercent, eventType, compactTrigger, preCompactTokens, cumulativeInputTokens, cumulativeOutputTokens, createdAt)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      `INSERT INTO task_context_snapshots (id, taskId, agentId, sessionId, contextUsedTokens, contextTotalTokens, contextPercent, eventType, compactTrigger, preCompactTokens, cumulativeInputTokens, cumulativeOutputTokens, contextFormula, createdAt)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
     ),
   getByTaskId: () =>
@@ -8309,10 +8356,12 @@ export interface CreateContextSnapshotInput {
   contextTotalTokens?: number;
   contextPercent?: number;
   eventType: ContextSnapshotEventType;
-  compactTrigger?: "auto" | "manual";
+  compactTrigger?: "auto" | "manual" | "auto-inferred";
   preCompactTokens?: number;
   cumulativeInputTokens?: number;
   cumulativeOutputTokens?: number;
+  // Migration 063 — adapter-supplied formula tag.
+  contextFormula?: ContextSnapshot["contextFormula"];
 }
 export function createContextSnapshot(input: CreateContextSnapshotInput): ContextSnapshot {
@@ -8334,6 +8383,7 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
       input.preCompactTokens ?? null,
       input.cumulativeInputTokens ?? 0,
       input.cumulativeOutputTokens ?? 0,
+      input.contextFormula ?? null,
       now,
     );
@@ -8347,10 +8397,15 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
       .run(input.contextPercent, input.taskId);
   }
-  // Keep totalContextTokensUsed up to date with the latest known value
+  // Migration 063: peakContextTokens is monotonic-max across snapshots, not a
+  // rolling latest. Mirrors Claude Code's status-line "peak context" semantic.
   if (input.contextUsedTokens != null) {
     getDb()
-      .prepare("UPDATE agent_tasks SET totalContextTokensUsed = ? WHERE id = ?")
+      .prepare(
+        `UPDATE agent_tasks
+         SET peakContextTokens = MAX(COALESCE(peakContextTokens, 0), ?)
+         WHERE id = ?`,
+      )
       .run(input.contextUsedTokens, input.taskId);
   }
@@ -8362,9 +8417,17 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
       .run(input.taskId);
   }
-  if (input.eventType === "completion" && input.contextTotalTokens != null) {
+  // Phase 10: set contextWindowSize on the FIRST snapshot that carries one
+  // (was previously gated on eventType === 'completion', meaning the UI saw
+  // NULL throughout running tasks). Subsequent snapshots leave it alone — the
+  // window doesn't change mid-session.
+  if (input.contextTotalTokens != null) {
     getDb()
-      .prepare("UPDATE agent_tasks SET contextWindowSize = ? WHERE id = ?")
+      .prepare(
+        `UPDATE agent_tasks
+         SET contextWindowSize = ?
+         WHERE id = ? AND contextWindowSize IS NULL`,
+      )
       .run(input.contextTotalTokens, input.taskId);
   }
@@ -8381,6 +8444,7 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
     preCompactTokens: input.preCompactTokens,
     cumulativeInputTokens: input.cumulativeInputTokens ?? 0,
     cumulativeOutputTokens: input.cumulativeOutputTokens ?? 0,
+    contextFormula: input.contextFormula,
     createdAt: now,
   };
 }
@@ -8396,7 +8460,8 @@ export function getContextSnapshotsBySessionId(sessionId: string, limit = 500):
 export interface ContextSummary {
   compactionCount: number;
   peakContextPercent: number | null;
-  totalContextTokensUsed: number | null;
+  // Migration 063: renamed from totalContextTokensUsed.
+  peakContextTokens: number | null;
   contextWindowSize: number | null;
   snapshotCount: number;
 }
@@ -8412,7 +8477,7 @@ export function getContextSummaryByTaskId(taskId: string): ContextSummary {
   return {
     compactionCount: task?.compactionCount ?? 0,
     peakContextPercent: task?.peakContextPercent ?? null,
-    totalContextTokensUsed: task?.totalContextTokensUsed ?? null,
+    peakContextTokens: task?.peakContextTokens ?? null,
     contextWindowSize: task?.contextWindowSize ?? null,
     snapshotCount: countRow?.cnt ?? 0,
   };
@@ -8635,6 +8700,12 @@ export function getKeyCostSummary(keyType?: string): KeyCostSummary[] {
   }
   const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
+  // Phase 13: INNER JOIN -> LEFT JOIN. The `WHERE t.credentialKeySuffix IS NOT NULL`
+  // still filters out rows whose taskId doesn't link to a task with credentials,
+  // but switching to LEFT JOIN means a future change that drops the WHERE
+  // (or a debugging query that wants orphan rows visible) doesn't silently
+  // disappear them. Equivalent for the current `WHERE … IS NOT NULL` filter;
+  // makes the query's intent (cost rows owned by a credential) explicit.
   return db
     .prepare<KeyCostSummary, string[]>(
       `SELECT
@@ -8645,7 +8716,7 @@ export function getKeyCostSummary(keyType?: string): KeyCostSummary[] {
         COALESCE(SUM(sc.outputTokens), 0) as totalOutputTokens,
         COUNT(DISTINCT sc.taskId) as taskCount
       FROM session_costs sc
-      JOIN agent_tasks t ON sc.taskId = t.id
+      LEFT JOIN agent_tasks t ON sc.taskId = t.id
       ${where}
       GROUP BY t.credentialKeyType, t.credentialKeySuffix`,
     )

package/src/be/migrations/063_cost_context_schema_relax.sql ADDED Viewed

@@ -0,0 +1,133 @@
+-- 063_cost_context_schema_relax.sql
+-- Phase 1 of the context & cost tracking fixes plan (2026-05-15).
+--
+-- This migration unblocks every downstream phase by:
+--   * Dropping the brittle CHECK constraints on `pricing.provider` and
+--     `pricing.token_class` so we can seed rows for all 7 providers
+--     (claude, claude-managed, codex, pi, opencode, devin, gemini) and the
+--     extra token classes (`cache_write`, `runtime_hour`, `acu`). Zod
+--     validation at the application boundary (`PricingProviderSchema`,
+--     `PricingTokenClassSchema` in `src/types.ts`) keeps the actual safety
+--     guarantee — the CHECKs added drift risk for no real benefit.
+--   * Renaming the misleading `agent_tasks.totalContextTokensUsed` column
+--     to `peakContextTokens` to match its new monotonic-max semantic
+--     (mirrors Claude Code's status-line "peak context" idea).
+--   * Recording the `contextFormula` used by the adapter that emitted a
+--     given snapshot so we can tell apples from oranges across providers.
+--   * Adding `reasoningOutputTokens` (codex reasoning models) and
+--     `thinkingTokens` (claude extended thinking) columns to `session_costs`
+--     so we stop dropping those numbers on the floor.
+--
+-- SQLite CHECK constraints can't be modified in place, so the `pricing` and
+-- `task_context_snapshots` shape changes use the standard
+-- create-new / copy / drop / rename dance. Existing rows are preserved.
+--
+-- Forward-only — no down migration. If you need to revert, write a new
+-- migration that walks the schema forward to the desired state.
+-- ---------------------------------------------------------------------------
+-- 1. Relax `pricing` CHECK constraints (drop them entirely; Zod validates).
+-- ---------------------------------------------------------------------------
+CREATE TABLE pricing_new (
+  provider TEXT NOT NULL,
+  model TEXT NOT NULL,
+  token_class TEXT NOT NULL,
+  effective_from INTEGER NOT NULL,
+  price_per_million_usd REAL NOT NULL,
+  createdAt INTEGER NOT NULL,
+  lastUpdatedAt INTEGER NOT NULL,
+  PRIMARY KEY (provider, model, token_class, effective_from)
+);
+INSERT INTO pricing_new (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
+SELECT provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt
+FROM pricing;
+DROP TABLE pricing;
+ALTER TABLE pricing_new RENAME TO pricing;
+-- Re-create the index the original `pricing` table had (matches 046:54-55).
+CREATE INDEX IF NOT EXISTS idx_pricing_lookup
+  ON pricing (provider, model, token_class, effective_from DESC);
+-- ---------------------------------------------------------------------------
+-- 2. Rename agent_tasks.totalContextTokensUsed -> peakContextTokens.
+--    SQLite >= 3.25 supports RENAME COLUMN; bun:sqlite is well past that.
+-- ---------------------------------------------------------------------------
+ALTER TABLE agent_tasks RENAME COLUMN totalContextTokensUsed TO peakContextTokens;
+-- ---------------------------------------------------------------------------
+-- 3. Add contextFormula column to task_context_snapshots.
+--    Using a plain TEXT column (no CHECK) so the adapter side can add new
+--    formulas without an accompanying migration; Zod enum validates writes.
+--    Values today:
+--      'input-cache-output'    — unified formula (post-Phase 9)
+--      'input-cache-no-output' — pre-unification claude formula
+--      'input-output-no-cache' — pre-unification claude-managed formula
+--      'peak-proxy'            — pre-unification codex formula
+--      'pi-delegated'          — context numbers come from the pi-ai SDK
+--      'harness-reported'      — context numbers come from a harness API (devin)
+--      'unknown'               — pre-migration backfill or adapter didn't tag
+-- ---------------------------------------------------------------------------
+ALTER TABLE task_context_snapshots ADD COLUMN contextFormula TEXT;
+UPDATE task_context_snapshots SET contextFormula = 'unknown' WHERE contextFormula IS NULL;
+-- ---------------------------------------------------------------------------
+-- 4. Rewrite session_costs to:
+--    a) drop the costSource CHECK (we need 'unpriced' as a third value);
+--    b) add reasoningOutputTokens + thinkingTokens columns we previously
+--       dropped on the floor.
+--    SQLite can't relax a CHECK in-place — table-rewrite dance, same pattern
+--    as the pricing table above. FKs and indexes are restored after rename.
+-- ---------------------------------------------------------------------------
+CREATE TABLE session_costs_new (
+    id TEXT PRIMARY KEY,
+    sessionId TEXT NOT NULL,
+    taskId TEXT,
+    agentId TEXT NOT NULL,
+    totalCostUsd REAL NOT NULL,
+    inputTokens INTEGER NOT NULL DEFAULT 0,
+    outputTokens INTEGER NOT NULL DEFAULT 0,
+    cacheReadTokens INTEGER NOT NULL DEFAULT 0,
+    -- Migration 063: nullable. Codex SDK can't surface cache writes, so we
+    -- store null instead of faking a 0 that mixes with real zeros.
+    cacheWriteTokens INTEGER DEFAULT 0,
+    durationMs INTEGER NOT NULL,
+    -- Migration 063: nullable. Claude when `num_turns` is absent can't honestly
+    -- report a turn count; null is preferred over a faked 1.
+    numTurns INTEGER,
+    model TEXT NOT NULL,
+    isError INTEGER NOT NULL DEFAULT 0,
+    costSource TEXT NOT NULL DEFAULT 'harness',
+    reasoningOutputTokens INTEGER NOT NULL DEFAULT 0,
+    thinkingTokens INTEGER NOT NULL DEFAULT 0,
+    createdAt TEXT NOT NULL,
+    FOREIGN KEY (agentId) REFERENCES agents(id) ON DELETE CASCADE,
+    FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE SET NULL
+);
+INSERT INTO session_costs_new (
+    id, sessionId, taskId, agentId, totalCostUsd,
+    inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
+    durationMs, numTurns, model, isError, costSource,
+    reasoningOutputTokens, thinkingTokens, createdAt
+)
+SELECT
+    id, sessionId, taskId, agentId, totalCostUsd,
+    inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
+    durationMs, numTurns, model, isError, costSource,
+    0, 0, createdAt
+FROM session_costs;
+DROP TABLE session_costs;
+ALTER TABLE session_costs_new RENAME TO session_costs;
+-- Recreate indexes (mirrors 001_initial.sql:360-363).
+CREATE INDEX IF NOT EXISTS idx_session_costs_createdAt ON session_costs(createdAt);
+CREATE INDEX IF NOT EXISTS idx_session_costs_taskId ON session_costs(taskId);
+CREATE INDEX IF NOT EXISTS idx_session_costs_agentId ON session_costs(agentId);
+CREATE INDEX IF NOT EXISTS idx_session_costs_agent_createdAt ON session_costs(agentId, createdAt);

package/src/be/migrations/064_scripts.sql ADDED Viewed

@@ -0,0 +1,39 @@
+CREATE TABLE scripts (
+  id TEXT PRIMARY KEY,
+  name TEXT NOT NULL,
+  scope TEXT NOT NULL CHECK(scope IN ('global', 'agent')),
+  scopeId TEXT,
+  source TEXT NOT NULL,
+  description TEXT NOT NULL,
+  intent TEXT NOT NULL,
+  signatureJson TEXT NOT NULL,
+  contentHash TEXT NOT NULL,
+  version INTEGER NOT NULL DEFAULT 1,
+  isScratch INTEGER NOT NULL DEFAULT 0,
+  typeChecked INTEGER NOT NULL DEFAULT 0,
+  fsMode TEXT NOT NULL DEFAULT 'none' CHECK(fsMode IN ('none', 'workspace-rw')),
+  createdByAgentId TEXT,
+  createdAt TEXT NOT NULL DEFAULT (datetime('now')),
+  updatedAt TEXT NOT NULL DEFAULT (datetime('now'))
+);
+CREATE UNIQUE INDEX idx_scripts_name_scope ON scripts(name, scope, COALESCE(scopeId, ''));
+CREATE INDEX idx_scripts_scope ON scripts(scope, scopeId);
+CREATE INDEX idx_scripts_scratch ON scripts(isScratch, createdAt);
+CREATE TABLE script_versions (
+  id TEXT PRIMARY KEY,
+  scriptId TEXT NOT NULL REFERENCES scripts(id) ON DELETE CASCADE,
+  version INTEGER NOT NULL,
+  source TEXT NOT NULL,
+  description TEXT NOT NULL,
+  intent TEXT NOT NULL,
+  signatureJson TEXT NOT NULL,
+  contentHash TEXT NOT NULL,
+  changedByAgentId TEXT,
+  changedAt TEXT NOT NULL DEFAULT (datetime('now')),
+  changeReason TEXT,
+  UNIQUE(scriptId, version)
+);
+CREATE INDEX idx_script_versions_hash ON script_versions(contentHash);

package/src/be/migrations/065_script_embeddings.sql ADDED Viewed

@@ -0,0 +1,7 @@
+CREATE TABLE script_embeddings (
+  scriptId TEXT PRIMARY KEY REFERENCES scripts(id) ON DELETE CASCADE,
+  embedding BLOB NOT NULL,
+  embeddingModel TEXT NOT NULL,
+  embeddedText TEXT NOT NULL,
+  embeddedAt TEXT NOT NULL DEFAULT (datetime('now'))
+);

package/src/be/pricing-normalize.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * Phase 2 fix — normalize provider model ids before pricing-table lookup.
+ *
+ * Different harnesses report the same underlying model under different keys:
+ *
+ *   - claude-adapter      → `claude-opus-4-7`               (bare)
+ *   - codex-adapter       → `gpt-5.4`                       (bare, dotted)
+ *   - opencode-adapter    → `openrouter/anthropic/claude-sonnet-4.5`
+ *   - pi-mono-adapter     → `github-copilot/gpt-5.4` or
+ *                            `openrouter/anthropic/claude-sonnet-4.5`
+ *
+ * The pricing seed in `src/be/seed-pricing.ts` keys by what models.dev calls
+ * the model (e.g. `anthropic/claude-sonnet-4.5` for openrouter rows,
+ * `gpt-5.4` for openai rows). That means harness-emitted ids with extra
+ * routing prefixes (`openrouter/`, `github-copilot/`, …) fall through to
+ * `costSource='unpriced'` even when we have a perfectly good rate row.
+ *
+ * Rather than rewriting the adapter outputs (which are the harness's source
+ * of truth and useful for debugging), we normalize at the *lookup boundary*:
+ * strip noisy routing prefixes so the seeded canonical key resolves.
+ *
+ * Apply this helper symmetrically: once when seeding rows (so seed keys are
+ * canonical) and once when querying (so adapter-emitted keys collapse onto
+ * the same canonical form).
+ */
+import type { PricingProvider } from "../types";
+/**
+ * Routing prefixes that a harness may prepend to the underlying model id but
+ * that have no pricing semantics. Stripping these collapses
+ * `openrouter/anthropic/claude-sonnet-4.5` → `anthropic/claude-sonnet-4.5`
+ * which is the key models.dev/openrouter uses.
+ *
+ * Order matters: we only ever strip the *first* matching prefix so we don't
+ * accidentally chew through a model id like `openai/openai-test-model`.
+ */
+const ROUTING_PREFIXES_BY_PROVIDER: Record<PricingProvider, readonly string[]> = {
+  // opencode routes via opencode-server which proxies to openrouter, anthropic,
+  // openai, … — strip whichever proxy prefix the user picked.
+  opencode: ["openrouter/", "github-copilot/"],
+  // pi-mono can hit openrouter mirrors, the github-copilot proxy, or native
+  // anthropic/openai/google providers.
+  pi: ["openrouter/", "github-copilot/"],
+  // codex normally reports a bare id, but a user may set MODEL_OVERRIDE to a
+  // prefixed form. Be forgiving on the lookup side.
+  codex: ["openai/", "github-copilot/"],
+  // claude / claude-managed / devin / gemini emit bare ids today. The empty
+  // list keeps the helper a no-op for them but the entry-per-provider shape
+  // means a future provider can opt in without changing call-sites.
+  claude: [],
+  "claude-managed": [],
+  devin: [],
+  gemini: [],
+};
+/**
+ * Canonical model key for a `(provider, model)` pair. Idempotent — calling
+ * this on an already-normalized value is a no-op.
+ *
+ * Rules:
+ *  1. Lowercase the input. Adapters sometimes pass mixed case (codex calls
+ *     `.toLowerCase()` itself; opencode/pi don't always).
+ *  2. Strip the first matching routing prefix for this provider, if any.
+ *
+ * We deliberately do NOT touch dotted-vs-dashed minor versions
+ * (`gpt-5.4` vs `gpt-5-4`) — both harness output and models.dev use dotted
+ * for openai and dashed for anthropic, so there's no real drift there.
+ */
+export function normalizeModelKey(provider: PricingProvider, model: string): string {
+  if (!model) return model;
+  let key = model.toLowerCase();
+  const prefixes = ROUTING_PREFIXES_BY_PROVIDER[provider] ?? [];
+  for (const prefix of prefixes) {
+    if (key.startsWith(prefix)) {
+      key = key.slice(prefix.length);
+      break;
+    }
+  }
+  return key;
+}