npm - @desplega.ai/agent-swarm - Versions diffs - 1.79.4 → 1.80.0 - Mend

@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/openapi.json +98 -19
package/package.json +12 -6
package/src/be/db.ts +101 -30
package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
package/src/be/pricing-normalize.ts +81 -0
package/src/be/seed-pricing.ts +293 -0
package/src/commands/claude-managed-setup.ts +19 -3
package/src/commands/runner.ts +592 -237
package/src/http/context.ts +6 -2
package/src/http/index.ts +115 -68
package/src/http/session-data.ts +74 -23
package/src/otel-impl.ts +200 -0
package/src/otel.ts +127 -0
package/src/providers/claude-adapter.ts +30 -5
package/src/providers/claude-managed-adapter.ts +43 -17
package/src/providers/claude-managed-pricing.ts +34 -0
package/src/providers/codex-adapter.ts +38 -27
package/src/providers/codex-models.ts +22 -3
package/src/providers/devin-adapter.ts +11 -0
package/src/providers/opencode-adapter.ts +31 -7
package/src/providers/pi-mono-adapter.ts +39 -7
package/src/providers/pricing-sources.md +52 -0
package/src/providers/swarm-events-shared.ts +8 -4
package/src/providers/types.ts +33 -10
package/src/server.ts +6 -0
package/src/tests/claude-managed-adapter.test.ts +17 -3
package/src/tests/claude-managed-setup.test.ts +10 -1
package/src/tests/codex-adapter.test.ts +20 -19
package/src/tests/context-snapshot.test.ts +2 -2
package/src/tests/context-window.test.ts +65 -1
package/src/tests/devin-adapter.test.ts +2 -0
package/src/tests/http/context-routes.test.ts +161 -0
package/src/tests/migration-063-schema-relax.test.ts +109 -0
package/src/tests/opencode-adapter.test.ts +146 -1
package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
package/src/tests/pages-view-count.test.ts +30 -5
package/src/tests/providers/codex-cost.test.ts +18 -0
package/src/tests/providers/opencode-cost.test.ts +74 -0
package/src/tests/providers/pi-cost.test.ts +128 -0
package/src/tests/secret-scrubber.test.ts +19 -0
package/src/tests/session-costs-codex-recompute.test.ts +35 -22
package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
package/src/tests/store-progress-cost.test.ts +6 -1
package/src/tools/store-progress.ts +16 -60
package/src/tools/utils.ts +65 -12
package/src/types.ts +62 -9
package/src/utils/context-window.ts +104 -4
package/src/utils/secret-scrubber.ts +7 -0

package/src/providers/pi-mono-adapter.ts CHANGED Viewed

@@ -264,7 +264,7 @@ function cleanupAgentsMdSymlink(cwd: string): void {
   }
 }
-class PiMonoSession implements ProviderSession {
+export class PiMonoSession implements ProviderSession {
   private listeners: Array<(event: ProviderEvent) => void> = [];
   private eventQueue: ProviderEvent[] = [];
   private _sessionId: string | undefined;
@@ -275,6 +275,14 @@ class PiMonoSession implements ProviderSession {
   private logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
   /** Track last emitted message text to avoid duplicates across turns */
   private lastEmittedMessage = "";
+  /** Phase 7: wallclock start so we can populate `durationMs` on the cost row. */
+  private sessionStartedAt: number = Date.now();
+  /**
+   * Phase 7: previous output-token total — used to derive per-turn delta for
+   * `context_usage.outputTokens` since pi-ai's `getContextUsage()` doesn't
+   * surface it directly.
+   */
+  private prevOutputTokens = 0;
   constructor(agentSession: AgentSession, config: ProviderSessionConfig, createdSymlink: boolean) {
     this.agentSession = agentSession;
@@ -282,6 +290,7 @@ class PiMonoSession implements ProviderSession {
     this.createdSymlink = createdSymlink;
     this.logFileHandle = Bun.file(config.logFile).writer();
     this._sessionId = agentSession.sessionId;
+    this.sessionStartedAt = Date.now();
     // Emit session_init immediately
     this.emit({ type: "session_init", sessionId: this._sessionId, provider: "pi" });
@@ -293,6 +302,18 @@ class PiMonoSession implements ProviderSession {
     this.completionPromise = this.runSession();
   }
+  /**
+   * Canonical model slug for downstream reporting (latestModel, raw_log envelopes).
+   * Composes `${provider}/${id}` from the resolved pi-ai model so the UI snapshot
+   * lookup matches (e.g. `openrouter/deepseek/deepseek-v4-flash`). Falls back to
+   * the configured model string if the session didn't resolve one.
+   */
+  private reportedModel(): string {
+    const m = this.agentSession.model;
+    if (m) return `${m.provider}/${m.id}`;
+    return this.config.model;
+  }
   private emit(event: ProviderEvent): void {
     // Scrub secrets from raw_log / raw_stderr content before egress (log file
     // write, listener dispatch, downstream session-logs push + pretty-print).
@@ -329,7 +350,7 @@ class PiMonoSession implements ProviderSession {
                 .trim()
             : String(msg.content || "").trim();
           if (text && text !== this.lastEmittedMessage) {
-            const model = this.agentSession.model?.name ?? this.config.model;
+            const model = this.reportedModel();
             this.emit({
               type: "raw_log",
               content: JSON.stringify({
@@ -344,21 +365,30 @@ class PiMonoSession implements ProviderSession {
             this.lastEmittedMessage = text;
           }
         }
-        // Emit context_usage for dashboard tracking
+        // Emit context_usage for dashboard tracking.
+        // Phase 7: derive `outputTokens` from `SessionStats` delta (pi-ai's
+        // `getContextUsage()` doesn't expose per-turn output tokens, but the
+        // session-stats counter is monotonic so a delta is correct).
         const usage = this.agentSession.getContextUsage();
         if (usage && usage.tokens != null) {
+          const stats = this.agentSession.getSessionStats();
+          const currOutput = stats?.tokens?.output ?? 0;
+          const outputDelta = Math.max(0, currOutput - this.prevOutputTokens);
+          this.prevOutputTokens = currOutput;
           this.emit({
             type: "context_usage",
             contextUsedTokens: usage.tokens,
             contextTotalTokens: usage.contextWindow,
             contextPercent: usage.percent ?? 0,
-            outputTokens: 0,
+            outputTokens: outputDelta,
+            // Phase 9: pi-ai owns the formula — we just relay its number.
+            contextFormula: "pi-delegated",
           });
         }
         break;
       }
       case "tool_execution_start": {
-        const model = this.agentSession.model?.name ?? this.config.model;
+        const model = this.reportedModel();
         this.emit({
           type: "raw_log",
           content: JSON.stringify({
@@ -489,9 +519,11 @@ class PiMonoSession implements ProviderSession {
       outputTokens: stats.tokens.output,
       cacheReadTokens: stats.tokens.cacheRead,
       cacheWriteTokens: stats.tokens.cacheWrite,
-      durationMs: 0, // Not directly available from SessionStats
+      // Phase 7: real wallclock duration; pi-ai SessionStats doesn't carry
+      // one so we track it on this adapter instance.
+      durationMs: Date.now() - this.sessionStartedAt,
       numTurns: stats.userMessages + stats.assistantMessages,
-      model: this.agentSession.model?.name ?? this.config.model,
+      model: this.reportedModel(),
       isError: false,
       provider: "pi",
     };

package/src/providers/pricing-sources.md ADDED Viewed

@@ -0,0 +1,52 @@
+# Pricing sources
+This page lists the sources that feed the `pricing` table at server boot.
+Operators bumping a rate by hand should also update this file.
+## Primary: vendored models.dev snapshot
+- **Path**: `ui/src/lib/modelsdev-cache.json`
+- **Loaded by**: `src/be/seed-pricing.ts` → `seedPricingFromModelsDev()`,
+  called from `src/server.ts` after `initDb`.
+- **Projection rules** (see the same module for code-level detail):
+  - Anthropic models → rows under `provider='claude'` AND `provider='claude-managed'`.
+    Shortnames (`opus`, `sonnet`, `haiku`) ALSO get rows keyed by the current
+    default full id (e.g. `opus → claude-opus-4-7`). Pi-mono uses the same
+    shortname forms, so they're projected under `provider='pi'` as well.
+  - OpenAI models → rows under `provider='codex'`.
+  - OpenRouter models → rows under `provider='opencode'`. Any `google/...`
+    row additionally gets projected under `provider='gemini'` (both the
+    stripped name and the full `google/...` id) so internal-ai callers find
+    a hit either way.
+- **Refresh procedure** (the only place to update the snapshot):
+  - Run `bun run scripts/refresh-modelsdev-pricing.ts` (Phase 2 — adds the
+    script). It fetches the latest snapshot from models.dev, diffs against
+    the vendored copy, prints a summary, and writes the new file.
+  - Commit the regenerated `modelsdev-cache.json` together with a bump
+    note in the PR description.
+## Manual overrides
+Two cost components models.dev doesn't carry are encoded in
+`MANUAL_PRICING_OVERRIDES` inside `src/be/seed-pricing.ts`:
+| Provider         | Model | Token class    | Rate         | Source                                                                         | Verified   |
+|------------------|-------|----------------|--------------|---------------------------------------------------------------------------------|------------|
+| `claude-managed` | `*`   | `runtime_hour` | $0.08 / hour | <https://docs.claude.com/en/api/agent-sdk/managed-runtime#pricing>             | 2026-04-28 |
+| `devin`          | `*`   | `acu`          | $2.25 / ACU  | <https://devin.ai/pricing>                                                      | 2026-04-28 |
+The `pricePerMillionUsd` column carries these as `rate * 1_000_000` so the
+same schema fits — the adapter scales by the underlying unit (hours / ACUs),
+not by tokens. The unit convention is specific to those `token_class` values.
+## When a model is missing
+If `POST /api/session-costs` arrives with a `(provider, model)` pair that has
+no input/output pricing rows at the lookup time, the row is persisted with
+`costSource='unpriced'` (rather than 'harness'). The UI surfaces this as a
+yellow badge.
+To fix: either add the model to `modelsdev-cache.json` (preferred — the
+upstream snapshot probably needs refreshing) or add a manual override row via
+the existing admin route `POST /api/pricing`.

package/src/providers/swarm-events-shared.ts CHANGED Viewed

@@ -167,8 +167,10 @@ export function createSwarmEventHandler(
   const progressContextUsage = (event: {
     contextUsedTokens: number;
-    contextTotalTokens: number;
-    contextPercent: number;
+    // Migration 063: nullable for adapters that can't resolve a window.
+    contextTotalTokens: number | null;
+    contextPercent: number | null;
+    contextFormula?: string;
   }): void => {
     if (opts.taskId && shouldRun("context-progress", CONTEXT_THROTTLE_MS)) {
       fireAndForget(`${opts.apiUrl}/api/tasks/${encodeURIComponent(opts.taskId)}/context`, {
@@ -178,8 +180,9 @@ export function createSwarmEventHandler(
           eventType: "progress",
           sessionId: sessionId ?? `${opts.sessionIdFallbackPrefix ?? "session"}-${opts.taskId}`,
           contextUsedTokens: event.contextUsedTokens,
-          contextTotalTokens: event.contextTotalTokens,
-          contextPercent: event.contextPercent,
+          contextTotalTokens: event.contextTotalTokens ?? undefined,
+          contextPercent: event.contextPercent ?? undefined,
+          contextFormula: event.contextFormula,
         }),
       });
     }
@@ -239,6 +242,7 @@ export function createSwarmEventHandler(
             contextUsedTokens: event.contextUsedTokens,
             contextTotalTokens: event.contextTotalTokens,
             contextPercent: event.contextPercent,
+            contextFormula: event.contextFormula,
           });
           break;
         }

package/src/providers/types.ts CHANGED Viewed

@@ -7,18 +7,30 @@ export interface CostData {
   inputTokens?: number;
   outputTokens?: number;
   cacheReadTokens?: number;
+  /**
+   * Migration 063: undefined means "the harness can't report this" (e.g. the
+   * Codex SDK has no cache-write field). Zero is reserved for "really zero".
+   */
   cacheWriteTokens?: number;
+  /** Migration 063: codex reasoning_output_tokens (and similar) for reasoning models. */
+  reasoningOutputTokens?: number;
+  /** Migration 063: claude extended-thinking tokens from CLI's `usage.thinking_input_tokens`. */
+  thinkingTokens?: number;
   durationMs: number;
-  numTurns: number;
+  /**
+   * Migration 063: nullable — some adapters (claude when `num_turns` is absent)
+   * can't honestly report a turn count; null is preferred over a faked 1.
+   */
+  numTurns: number | null;
   model: string;
   isError: boolean;
   /**
-   * Phase 6: tells the API which recompute path to use on
-   * `POST /api/session-costs`. Codex triggers the pricing-table recompute
-   * (when DB pricing rows exist for all three token classes); Claude / pi
-   * always trust the harness-reported `totalCostUsd` as-is.
+   * Phase 6 (extended migration 063): tells the API which recompute path to
+   * use on `POST /api/session-costs`. After Phase 2 the recompute path runs
+   * for every provider with seeded pricing rows, so every adapter should
+   * populate this field.
    */
-  provider?: "claude" | "codex" | "pi" | "opencode";
+  provider?: "claude" | "claude-managed" | "codex" | "pi" | "opencode" | "devin";
 }
 import type { ProviderName } from "../types";
@@ -43,14 +55,25 @@ export type ProviderEvent =
   | {
       type: "context_usage";
       contextUsedTokens: number;
-      contextTotalTokens: number;
-      contextPercent: number;
-      outputTokens: number;
+      // Migration 063: nullable so adapters (e.g. devin without a context API)
+      // can emit a snapshot that records cumulative tokens without faking a window.
+      contextTotalTokens: number | null;
+      // Migration 063: null if contextTotalTokens is missing (no divide-by-zero).
+      contextPercent: number | null;
+      // Migration 063: null when the adapter can't honestly report output tokens.
+      outputTokens: number | null;
+      /**
+       * Migration 063 — the formula the adapter used to compute
+       * contextUsedTokens. See `ContextFormulaSchema` in `src/types.ts` for the
+       * canonical value list. Adapters should always populate this going
+       * forward; it powers cross-provider apples-to-apples comparison.
+       */
+      contextFormula?: string;
     }
   | {
       type: "compaction";
       preCompactTokens: number;
-      compactTrigger: "auto" | "manual";
+      compactTrigger: "auto" | "manual" | "auto-inferred";
       contextTotalTokens: number;
     };

package/src/server.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import pkg from "../package.json";
 import { initDb } from "./be/db";
+import { seedPricingFromModelsDev } from "./be/seed-pricing";
 import { registerCancelTaskTool } from "./tools/cancel-task";
 import { registerContextDiffTool } from "./tools/context-diff";
 import { registerContextHistoryTool } from "./tools/context-history";
@@ -147,6 +148,11 @@ export function createServer() {
   // Initialize database with WAL mode
   // Uses DATABASE_PATH env var for Docker volume compatibility (WAL needs .sqlite, .sqlite-wal, .sqlite-shm on same filesystem)
   initDb(process.env.DATABASE_PATH);
+  // Phase 2: project the vendored models.dev snapshot into the pricing table.
+  // Idempotent (INSERT OR IGNORE keyed on PK with effective_from=0); safe to
+  // call on every boot. See src/be/seed-pricing.ts for the projection logic
+  // and the manual-override constants for runtime-fee / ACU pricing.
+  seedPricingFromModelsDev();
   const server = new McpServer(
     {

package/src/tests/claude-managed-adapter.test.ts CHANGED Viewed

@@ -319,11 +319,14 @@ describe("ClaudeManagedAdapter (Phase 3) — session lifecycle", () => {
     }
     // context_usage emitted on span.model_request_end.
+    // Phase 5 / Phase 9 unified formula = input + cache_read + cache_write + output.
     const ctx = emitted.find((e) => e.type === "context_usage");
     expect(ctx).toBeDefined();
     if (ctx && ctx.type === "context_usage") {
-      expect(ctx.contextUsedTokens).toBe(150); // 100 input + 50 output
+      expect(ctx.contextUsedTokens).toBe(165); // 100 + 10 + 5 + 50
       expect(ctx.outputTokens).toBe(50);
+      // Phase 9: every snapshot carries the formula tag.
+      expect(ctx.contextFormula).toBe("input-cache-output");
     }
     // result emitted with accumulated cost. Phase 3 leaves totalCostUsd at 0
@@ -345,6 +348,8 @@ describe("ClaudeManagedAdapter (Phase 3) — session lifecycle", () => {
       expect(resultEvent.cost.totalCostUsd).toBeGreaterThanOrEqual(0);
       expect(Number.isFinite(resultEvent.cost.totalCostUsd)).toBe(true);
       expect(resultEvent.output).toBe("Hello from managed agent");
+      // Phase 3 — provider tag is required so the API recompute path engages.
+      expect(resultEvent.cost.provider).toBe("claude-managed");
     }
     // ProviderResult.
@@ -644,17 +649,24 @@ describe("ClaudeManagedAdapter (Phase 4) — repo provisioning + cost data", ()
     process.env.ANTHROPIC_API_KEY = "sk-test";
     process.env.MANAGED_AGENT_ID = "agent_x";
     process.env.MANAGED_ENVIRONMENT_ID = "env_x";
+    // Defensive: vault env vars may leak in from the host .env (Bun auto-loads
+    // it); each vault-related test sets exactly what it asserts on.
+    delete process.env.MANAGED_GITHUB_TOKEN;
+    delete process.env.MANAGED_GITHUB_VAULT_ID;
+    delete process.env.MANAGED_MCP_VAULT_ID;
   });
   afterAll(() => {
     rmSync(tmpLogDir, { recursive: true, force: true });
     delete process.env.MANAGED_GITHUB_TOKEN;
     delete process.env.MANAGED_GITHUB_VAULT_ID;
+    delete process.env.MANAGED_MCP_VAULT_ID;
   });
   afterEach(() => {
     delete process.env.MANAGED_GITHUB_TOKEN;
     delete process.env.MANAGED_GITHUB_VAULT_ID;
+    delete process.env.MANAGED_MCP_VAULT_ID;
   });
   test("normalizeRepoUrl: passes through https URLs and expands owner/repo shorthand", () => {
@@ -1266,9 +1278,11 @@ describe("ClaudeManagedAdapter (Phase 6) — full happy-path integration", () =>
     const ctxUsage = emitted.find((e) => e.type === "context_usage");
     expect(ctxUsage?.type).toBe("context_usage");
     if (ctxUsage?.type === "context_usage") {
-      // 1M input + 200k output = 1.2M used; output = 200k.
-      expect(ctxUsage.contextUsedTokens).toBe(1_200_000);
+      // Phase 5 / Phase 9 unified: input + cache_read + cache_write + output.
+      // 1M + 50k + 25k + 200k = 1,275,000.
+      expect(ctxUsage.contextUsedTokens).toBe(1_275_000);
       expect(ctxUsage.outputTokens).toBe(200_000);
+      expect(ctxUsage.contextFormula).toBe("input-cache-output");
     }
     // The terminal `result` ProviderEvent — the contract Phase 4 hardened —

package/src/tests/claude-managed-setup.test.ts CHANGED Viewed

@@ -88,13 +88,22 @@ describe("runClaudeManagedSetupFlow — happy path", () => {
     const agentCallArgs = agentsCreate.mock.calls[0]?.[0] as {
       name: string;
       model: string;
-      tools: Array<{ type: string }>;
+      tools: Array<{
+        type: string;
+        default_config?: { permission_policy?: { type: string } };
+      }>;
       skills: Array<{ type: string; skill_id: string }>;
       mcp_servers: Array<{ name: string; type: string; url: string }>;
     };
     expect(agentCallArgs.name).toBe("swarm-worker");
     expect(agentCallArgs.model).toBe("claude-sonnet-4-6");
     expect(agentCallArgs.tools[0]?.type).toBe("agent_toolset_20260401");
+    // Headless workers can't approve interactively — both toolsets must be
+    // configured with `always_allow` so the sandbox executes tool calls
+    // without parking them in `awaiting approval`.
+    for (const tool of agentCallArgs.tools) {
+      expect(tool.default_config?.permission_policy?.type).toBe("always_allow");
+    }
     expect(agentCallArgs.skills.map((s) => s.skill_id)).toEqual([
       "skill_work-on-task",
       "skill_create-pr",

package/src/tests/codex-adapter.test.ts CHANGED Viewed

@@ -191,16 +191,16 @@ describe("CodexSession event mapping", () => {
       expect(messages[0].content).toBe("Hello from codex");
     }
-    // context_usage event fired with the *uncached + output* peak proxy
-    // (input=100, cached=25, output=50 → uncached=75 → peak=125)
-    // contextPercent is on a 0-100 scale (claude/pi convention).
+    // Phase 9: unified `input + output` formula (Codex `input_tokens` already
+    // includes cached input, so we don't add cache_read separately).
+    // input=100 + output=50 → contextUsed=150.
     const contextUsage = emitted.find((e) => e.type === "context_usage");
     expect(contextUsage).toBeDefined();
     if (contextUsage && contextUsage.type === "context_usage") {
-      expect(contextUsage.contextUsedTokens).toBe(125);
+      expect(contextUsage.contextUsedTokens).toBe(150);
       expect(contextUsage.contextTotalTokens).toBe(200_000);
-      // 125 / 200_000 × 100 = 0.0625
-      expect(contextUsage.contextPercent).toBeCloseTo((125 / 200_000) * 100, 6);
+      expect(contextUsage.contextPercent).toBeCloseTo((150 / 200_000) * 100, 6);
+      expect(contextUsage.contextFormula).toBe("input-cache-output");
     }
     // result event is final and non-error, with cost computed from token counts
@@ -225,14 +225,15 @@ describe("CodexSession event mapping", () => {
     expect(result.sessionId).toBe("thread-abc");
   });
-  test("chatty turn: peakContextPercent uses uncached + output, not raw input_tokens", async () => {
-    // Reproduces the verify-plan finding: a chatty turn where the SDK reports
-    // input_tokens far in excess of the model's context window because the
-    // total represents the SUM of every prompt across all model invocations
-    // in the turn (with cache reuses billed at every roundtrip). Without the
-    // peak-proxy fix this would clamp `contextPercent` to 1.0 even though no
-    // single model call hit the limit. Use realistic numbers from the actual
-    // E2E lead transcript captured during verification.
+  test("Phase 9: chatty turn clamps contextPercent to 100% under the unified formula", async () => {
+    // Phase 9 deliberately swapped Codex's per-adapter peak-proxy formula
+    // (`(input - cached) + output`) for the unified `input + output` formula
+    // shared with every other provider. The trade-off: a chatty Codex turn
+    // — where `input_tokens` is the SUM across every model call in the turn
+    // — over-reports compared to the peak-proxy variant. The clamp at 100%
+    // keeps the gauge sensible; downstream consumers reading the new
+    // `contextFormula='input-cache-output'` tag know it's apples-to-apples
+    // across providers. Numbers below are from the verify-plan transcript.
     const agentMsg: AgentMessageItem = {
       id: "msg-1",
       type: "agent_message",
@@ -262,12 +263,12 @@ describe("CodexSession event mapping", () => {
     const contextUsage = emitted.find((e) => e.type === "context_usage");
     expect(contextUsage).toBeDefined();
     if (contextUsage && contextUsage.type === "context_usage") {
-      // peak proxy = (357142 - 278912) + 2156 = 78230 + 2156 = 80386
-      expect(contextUsage.contextUsedTokens).toBe(80386);
+      // Phase 9 unified: input + output = 357142 + 2156 = 359298 (above 200k).
+      expect(contextUsage.contextUsedTokens).toBe(359298);
       expect(contextUsage.contextTotalTokens).toBe(200_000);
-      // 80386 / 200000 × 100 = 40.193 — on the 0-100 scale, NOT clamped to 100
-      expect(contextUsage.contextPercent).toBeCloseTo(40.193, 2);
-      expect(contextUsage.contextPercent).toBeLessThan(100);
+      // Above 100% raw → clamped to exactly 100.
+      expect(contextUsage.contextPercent).toBe(100);
+      expect(contextUsage.contextFormula).toBe("input-cache-output");
     }
     // Cost still uses the full input_tokens — billing semantics are

package/src/tests/context-snapshot.test.ts CHANGED Viewed

@@ -82,7 +82,7 @@ describe("Context Snapshots", () => {
     // The summary should preserve the last known context usage, not null/0
     const summary = getContextSummaryByTaskId(taskId);
-    expect(summary.totalContextTokensUsed).toBe(80000);
+    expect(summary.peakContextTokens).toBe(80000);
     expect(summary.contextWindowSize).toBe(200000);
     expect(summary.peakContextPercent).toBe(40);
   });
@@ -113,7 +113,7 @@ describe("Context Snapshots", () => {
     });
     const summary = getContextSummaryByTaskId(task2.id);
-    expect(summary.totalContextTokensUsed).toBe(60000);
+    expect(summary.peakContextTokens).toBe(60000);
     expect(summary.contextWindowSize).toBe(200000);
   });

package/src/tests/context-window.test.ts CHANGED Viewed

@@ -1,8 +1,15 @@
 import { describe, expect, test } from "bun:test";
-import { computeContextUsed, getContextWindowSize } from "../utils/context-window";
+import {
+  CONTEXT_FORMULA,
+  clampContextPercent,
+  computeContextUsed,
+  computeContextUsedUnified,
+  getContextWindowSize,
+} from "../utils/context-window";
 describe("getContextWindowSize", () => {
   test("returns 1M for opus models", () => {
+    expect(getContextWindowSize("claude-opus-4-7")).toBe(1_000_000);
     expect(getContextWindowSize("claude-opus-4-6")).toBe(1_000_000);
     expect(getContextWindowSize("opus")).toBe(1_000_000);
   });
@@ -26,6 +33,20 @@ describe("getContextWindowSize", () => {
   test("returns default entry value", () => {
     expect(getContextWindowSize("default")).toBe(200_000);
   });
+  test("Phase 4: dated full ids resolve via date-suffix stripping", () => {
+    // The regression this fixes: pre-Phase 4 these all fell to the 200k
+    // default, wildly understating opus/sonnet 4.x context.
+    expect(getContextWindowSize("claude-sonnet-4-6-20251004")).toBe(1_000_000);
+    expect(getContextWindowSize("claude-opus-4-7-20251201")).toBe(1_000_000);
+    expect(getContextWindowSize("claude-haiku-4-5-20251001")).toBe(200_000);
+  });
+  test("Phase 4: legacy 3.x family ids resolve", () => {
+    expect(getContextWindowSize("claude-3-5-sonnet")).toBe(200_000);
+    expect(getContextWindowSize("claude-3-5-sonnet-20241022")).toBe(200_000);
+    expect(getContextWindowSize("claude-3-opus")).toBe(200_000);
+  });
 });
 describe("computeContextUsed", () => {
@@ -64,3 +85,46 @@ describe("computeContextUsed", () => {
     ).toBe(5000);
   });
 });
+describe("computeContextUsedUnified (Phase 9 unified formula)", () => {
+  test("sums input + cache_read + cache_create + output", () => {
+    expect(
+      computeContextUsedUnified({
+        inputTokens: 1000,
+        cacheReadTokens: 200,
+        cacheCreateTokens: 300,
+        outputTokens: 500,
+      }),
+    ).toBe(2000);
+  });
+  test("treats missing/null fields as zero", () => {
+    expect(computeContextUsedUnified({})).toBe(0);
+    expect(computeContextUsedUnified({ inputTokens: 100, outputTokens: null })).toBe(100);
+  });
+});
+describe("clampContextPercent (Phase 9)", () => {
+  test("returns the clamped percent for valid inputs", () => {
+    expect(clampContextPercent(50_000, 200_000)).toBe(25);
+    expect(clampContextPercent(0, 200_000)).toBe(0);
+  });
+  test("clamps to [0, 100]", () => {
+    expect(clampContextPercent(500_000, 200_000)).toBe(100);
+    expect(clampContextPercent(-10, 200_000)).toBe(0);
+  });
+  test("returns null for missing/zero/negative total (no divide-by-zero NaN)", () => {
+    expect(clampContextPercent(100, 0)).toBeNull();
+    expect(clampContextPercent(100, null)).toBeNull();
+    expect(clampContextPercent(100, undefined)).toBeNull();
+    expect(clampContextPercent(100, -1)).toBeNull();
+  });
+});
+describe("CONTEXT_FORMULA constant", () => {
+  test("is 'input-cache-output' so adapters stamp the same value on snapshots", () => {
+    expect(CONTEXT_FORMULA).toBe("input-cache-output");
+  });
+});

package/src/tests/devin-adapter.test.ts CHANGED Viewed

@@ -598,6 +598,8 @@ describe("CostData mapping", () => {
       expect(resultEvent.cost.model).toBe("devin");
       expect(resultEvent.cost.inputTokens).toBe(0);
       expect(resultEvent.cost.outputTokens).toBe(0);
+      // Phase 3 — provider tag is required so the API recompute path engages.
+      expect(resultEvent.cost.provider).toBe("devin");
     }
   });