npm - @desplega.ai/agent-swarm - Versions diffs - 1.79.4 → 1.80.0 - Mend

@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/openapi.json +98 -19
package/package.json +12 -6
package/src/be/db.ts +101 -30
package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
package/src/be/pricing-normalize.ts +81 -0
package/src/be/seed-pricing.ts +293 -0
package/src/commands/claude-managed-setup.ts +19 -3
package/src/commands/runner.ts +592 -237
package/src/http/context.ts +6 -2
package/src/http/index.ts +115 -68
package/src/http/session-data.ts +74 -23
package/src/otel-impl.ts +200 -0
package/src/otel.ts +127 -0
package/src/providers/claude-adapter.ts +30 -5
package/src/providers/claude-managed-adapter.ts +43 -17
package/src/providers/claude-managed-pricing.ts +34 -0
package/src/providers/codex-adapter.ts +38 -27
package/src/providers/codex-models.ts +22 -3
package/src/providers/devin-adapter.ts +11 -0
package/src/providers/opencode-adapter.ts +31 -7
package/src/providers/pi-mono-adapter.ts +39 -7
package/src/providers/pricing-sources.md +52 -0
package/src/providers/swarm-events-shared.ts +8 -4
package/src/providers/types.ts +33 -10
package/src/server.ts +6 -0
package/src/tests/claude-managed-adapter.test.ts +17 -3
package/src/tests/claude-managed-setup.test.ts +10 -1
package/src/tests/codex-adapter.test.ts +20 -19
package/src/tests/context-snapshot.test.ts +2 -2
package/src/tests/context-window.test.ts +65 -1
package/src/tests/devin-adapter.test.ts +2 -0
package/src/tests/http/context-routes.test.ts +161 -0
package/src/tests/migration-063-schema-relax.test.ts +109 -0
package/src/tests/opencode-adapter.test.ts +146 -1
package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
package/src/tests/pages-view-count.test.ts +30 -5
package/src/tests/providers/codex-cost.test.ts +18 -0
package/src/tests/providers/opencode-cost.test.ts +74 -0
package/src/tests/providers/pi-cost.test.ts +128 -0
package/src/tests/secret-scrubber.test.ts +19 -0
package/src/tests/session-costs-codex-recompute.test.ts +35 -22
package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
package/src/tests/store-progress-cost.test.ts +6 -1
package/src/tools/store-progress.ts +16 -60
package/src/tools/utils.ts +65 -12
package/src/types.ts +62 -9
package/src/utils/context-window.ts +104 -4
package/src/utils/secret-scrubber.ts +7 -0

package/src/otel.ts ADDED Viewed

@@ -0,0 +1,127 @@
+export type AttributeValue = string | number | boolean | string[] | number[] | boolean[];
+export type Attributes = Record<string, AttributeValue | undefined>;
+type SpanStatus = {
+  code: number;
+  message?: string;
+};
+export type SwarmSpan = {
+  setAttribute: (key: string, value: AttributeValue) => SwarmSpan;
+  setAttributes: (attributes: Attributes) => SwarmSpan;
+  addEvent: (name: string, attributes?: Attributes) => SwarmSpan;
+  recordException: (error: unknown) => void;
+  setStatus: (status: SpanStatus) => SwarmSpan;
+  end: () => void;
+};
+const enabled = Boolean(process.env.OTEL_EXPORTER_OTLP_ENDPOINT);
+const NOOP_SPAN: SwarmSpan = {
+  setAttribute: () => NOOP_SPAN,
+  setAttributes: () => NOOP_SPAN,
+  addEvent: () => NOOP_SPAN,
+  recordException: () => {},
+  setStatus: () => NOOP_SPAN,
+  end: () => {},
+};
+let initialized = false;
+let realWithSpan:
+  | (<T>(
+      name: string,
+      fn: (span: SwarmSpan) => Promise<T> | T,
+      attributes?: Attributes,
+    ) => Promise<T>)
+  | undefined;
+let realStartSpan: ((name: string, attributes?: Attributes) => SwarmSpan) | undefined;
+let realWithRemoteContext:
+  | (<T>(carrier: Record<string, unknown>, fn: () => Promise<T> | T) => Promise<T>)
+  | undefined;
+let realWithSpanContext: (<T>(span: SwarmSpan, fn: () => T) => T) | undefined;
+let realInjectTraceContext:
+  | ((headers: Record<string, string>) => Record<string, string>)
+  | undefined;
+let realShutdown: (() => Promise<void>) | undefined;
+export function isOtelEnabled(): boolean {
+  return enabled;
+}
+export async function initOtel(serviceRole = process.env.AGENT_ROLE || "api"): Promise<void> {
+  if (!enabled || initialized) return;
+  initialized = true;
+  try {
+    const impl = await import("./otel-impl");
+    await impl.boot(serviceRole);
+    realWithSpan = impl.withSpan;
+    realStartSpan = impl.startSpan;
+    realWithRemoteContext = impl.withRemoteContext;
+    realWithSpanContext = impl.withSpanContext;
+    realInjectTraceContext = impl.injectTraceContext;
+    realShutdown = impl.shutdown;
+    console.log(
+      `[OTel] enabled for ${process.env.OTEL_SERVICE_NAME ?? "agent-swarm"} (${serviceRole})`,
+    );
+  } catch (error) {
+    console.warn(`[OTel] disabled after initialization failure: ${error}`);
+  }
+}
+export async function withSpan<T>(
+  name: string,
+  fn: (span: SwarmSpan) => Promise<T> | T,
+  attributes?: Attributes,
+): Promise<T> {
+  if (!enabled || !realWithSpan) {
+    return fn(NOOP_SPAN);
+  }
+  return realWithSpan(name, fn, attributes);
+}
+export function startSpan(name: string, attributes?: Attributes): SwarmSpan {
+  if (!enabled || !realStartSpan) {
+    return NOOP_SPAN;
+  }
+  return realStartSpan(name, attributes);
+}
+export function withSpanContext<T>(span: SwarmSpan, fn: () => T): T {
+  if (!enabled || !realWithSpanContext) {
+    return fn();
+  }
+  return realWithSpanContext(span, fn);
+}
+export async function withRemoteContext<T>(
+  carrier: Record<string, unknown>,
+  fn: () => Promise<T> | T,
+): Promise<T> {
+  if (!enabled || !realWithRemoteContext) {
+    return fn();
+  }
+  return realWithRemoteContext(carrier, fn);
+}
+export function injectTraceContext(headers: Record<string, string>): Record<string, string> {
+  if (!enabled || !realInjectTraceContext) {
+    return headers;
+  }
+  return realInjectTraceContext(headers);
+}
+export async function shutdownOtel(): Promise<void> {
+  if (!realShutdown) return;
+  await realShutdown();
+}
+export function _resetOtelForTests() {
+  initialized = false;
+  realWithSpan = undefined;
+  realStartSpan = undefined;
+  realWithRemoteContext = undefined;
+  realWithSpanContext = undefined;
+  realInjectTraceContext = undefined;
+  realShutdown = undefined;
+}

package/src/providers/claude-adapter.ts CHANGED Viewed

@@ -1,7 +1,12 @@
 import { readFile, unlink, writeFile } from "node:fs/promises";
 import { homedir } from "node:os";
 import { dirname, join } from "node:path";
-import { computeContextUsed, getContextWindowSize } from "../utils/context-window";
+import {
+  CONTEXT_FORMULA,
+  clampContextPercent,
+  computeContextUsedUnified,
+  getContextWindowSize,
+} from "../utils/context-window";
 import { validateClaudeCredentials } from "../utils/credentials";
 import {
   parseStderrForErrors,
@@ -465,6 +470,10 @@ class ClaudeSession implements ProviderSession {
         this._sessionId = json.session_id;
         this.emit({ type: "session_init", sessionId: json.session_id, provider: "claude" });
         if (json.model) {
+          // Phase 4: the CLI's `init.model` reflects the actual model after any
+          // backoff/fallback. Update `this.model` so subsequent CostData rows
+          // (and the pricing lookup the API runs) use the right rate.
+          this.model = json.model;
           this.contextWindowSize = getContextWindowSize(json.model);
         }
       }
@@ -487,6 +496,10 @@ class ClaudeSession implements ProviderSession {
               output_tokens?: number;
               cache_read_input_tokens?: number;
               cache_creation_input_tokens?: number;
+              // Phase 4: claude extended-thinking flows surface this — the
+              // CLI emits `thinking_input_tokens` when the model produced
+              // thinking content during the turn.
+              thinking_input_tokens?: number;
             }
           | undefined;
@@ -499,8 +512,12 @@ class ClaudeSession implements ProviderSession {
           outputTokens: usage?.output_tokens ?? 0,
           cacheReadTokens: usage?.cache_read_input_tokens ?? 0,
           cacheWriteTokens: usage?.cache_creation_input_tokens ?? 0,
+          // Phase 4: surface thinking tokens; previously dropped on the floor.
+          thinkingTokens: usage?.thinking_input_tokens ?? 0,
           durationMs: json.duration_ms || 0,
-          numTurns: json.num_turns || 1,
+          // Phase 4: honest null when the CLI omits num_turns instead of a
+          // faked `1` (would have under-counted in dashboards).
+          numTurns: json.num_turns ?? null,
           model: this.model,
           isError: json.is_error || false,
           provider: "claude",
@@ -539,18 +556,26 @@ class ClaudeSession implements ProviderSession {
           }
         }
-        // Context usage extraction from assistant message usage
+        // Context usage extraction from assistant message usage.
+        // Phase 9: unified `input + cache + output` formula across every
+        // provider so cross-provider percent comparisons are meaningful.
         if (json.message.usage) {
           const usage = json.message.usage;
-          const contextUsed = computeContextUsed(usage);
+          const contextUsed = computeContextUsedUnified({
+            inputTokens: usage.input_tokens,
+            cacheReadTokens: usage.cache_read_input_tokens,
+            cacheCreateTokens: usage.cache_creation_input_tokens,
+            outputTokens: usage.output_tokens,
+          });
           const contextTotal = this.contextWindowSize;
           this.emit({
             type: "context_usage",
             contextUsedTokens: contextUsed,
             contextTotalTokens: contextTotal,
-            contextPercent: contextTotal > 0 ? (contextUsed / contextTotal) * 100 : 0,
+            contextPercent: clampContextPercent(contextUsed, contextTotal) ?? 0,
             outputTokens: usage.output_tokens ?? 0,
+            contextFormula: CONTEXT_FORMULA,
           });
         }
       }

package/src/providers/claude-managed-adapter.ts CHANGED Viewed

@@ -59,8 +59,15 @@ import type {
 import type { SkillCreateResponse as Skill } from "@anthropic-ai/sdk/resources/beta/skills";
 import { checkToolLoop } from "../hooks/tool-loop-detection";
+import {
+  CONTEXT_FORMULA,
+  clampContextPercent,
+  computeContextUsedUnified,
+  getContextWindowSize,
+} from "../utils/context-window";
 import { scrubSecrets } from "../utils/secret-scrubber";
 import { computeClaudeManagedCostUsd } from "./claude-managed-models";
+import { getRuntimeFeePerHour } from "./claude-managed-pricing";
 import { createClaudeManagedSwarmEventHandler } from "./claude-managed-swarm-events";
 import type {
   CostData,
@@ -113,13 +120,10 @@ const REQUIRED_ENV_VARS = [
   "MANAGED_ENVIRONMENT_ID",
 ] as const;
-/**
- * Default context window for managed Claude sessions when we don't have a
- * model-specific override. Sized to match Sonnet 4.x (1M extended-context
- * variant). The Phase 4 pricing-table commit will replace this with a
- * per-model lookup.
- */
-const DEFAULT_CONTEXT_TOTAL_TOKENS = 1_000_000;
+// Phase 5: removed the hardcoded `DEFAULT_CONTEXT_TOTAL_TOKENS = 1_000_000`.
+// The adapter now calls `getContextWindowSize(this.model)` from
+// `src/utils/context-window.ts`, which resolves shortnames + dated full ids
+// so haiku-4-5 sessions don't pretend to have a 1M window.
 /**
  * Compose the per-session user-message content blocks. Returns two blocks:
@@ -187,6 +191,8 @@ function emptyCost(config: ProviderSessionConfig, model: string): CostData {
     numTurns: 0,
     model,
     isError: false,
+    // Phase 3 — tag every emitted CostData so the API's recompute path engages.
+    provider: "claude-managed",
   };
 }
@@ -374,6 +380,11 @@ class ClaudeManagedSession implements ProviderSession {
    * 2. Anthropic's $0.08/session-hour runtime fee — billed continuously by
    *    Anthropic regardless of model usage, so we add it here to surface in
    *    the swarm's per-session cost UI.
+   *
+   * Phase 5: the harness-local USD is still computed here, but the server-side
+   * recompute path (`POST /api/session-costs` after Phase 2) will reprice the
+   * row against the seeded pricing-table values and tag `costSource='pricing-table'`.
+   * The runtime fee comes from the same table now (`token_class='runtime_hour'`).
    */
   private snapshotCost(isError: boolean): CostData {
     const durationMs = Date.now() - this.startedAt;
@@ -384,9 +395,11 @@ class ClaudeManagedSession implements ProviderSession {
       this.cost.cacheReadTokens ?? 0,
       this.cost.cacheWriteTokens ?? 0,
     );
-    // $0.08 / session-hour. Sandbox runtime is billed by wallclock, so we
-    // amortize linearly across the session's `durationMs`.
-    const runtimeFeeUsd = (durationMs / 3_600_000) * 0.08;
+    // Phase 5: read the runtime fee from the pricing table when available so
+    // we have one source of truth. Falls back to the historical $0.08/hr
+    // constant if the row hasn't been seeded yet (e.g. on a fresh DB before
+    // seed-pricing.ts ran).
+    const runtimeFeeUsd = (durationMs / 3_600_000) * getRuntimeFeePerHour();
     return {
       ...this.cost,
       durationMs,
@@ -506,12 +519,15 @@ class ClaudeManagedSession implements ProviderSession {
         // this event. Emit a `compaction` ProviderEvent with the values we
         // *do* know; consumers that need richer data can subscribe to
         // `raw_log` for the original payload.
+        // Phase 5 — pre-compact tokens are an inferred proxy (running input
+        // total); flag the compactTrigger as 'auto-inferred' so downstream
+        // dashboards can distinguish a real trigger value from our guess.
         const _cc = event as BetaManagedAgentsAgentThreadContextCompactedEvent;
         this.emit({
           type: "compaction",
           preCompactTokens: this.cost.inputTokens ?? 0,
-          compactTrigger: "auto",
-          contextTotalTokens: DEFAULT_CONTEXT_TOTAL_TOKENS,
+          compactTrigger: "auto-inferred",
+          contextTotalTokens: getContextWindowSize(this.cost.model),
         });
         return { terminal: false, isError: false };
       }
@@ -524,16 +540,26 @@ class ClaudeManagedSession implements ProviderSession {
           (this.cost.cacheReadTokens ?? 0) + usage.cache_read_input_tokens;
         this.cost.cacheWriteTokens =
           (this.cost.cacheWriteTokens ?? 0) + usage.cache_creation_input_tokens;
-        this.cost.numTurns += 1;
-        const used = (this.cost.inputTokens ?? 0) + (this.cost.outputTokens ?? 0);
-        const total = DEFAULT_CONTEXT_TOTAL_TOKENS;
+        this.cost.numTurns = (this.cost.numTurns ?? 0) + 1;
+        // Phase 5 + Phase 9: unified `input + cache + output` formula AND a
+        // per-model window via `getContextWindowSize`. Previously this used
+        // a hardcoded 1M window and ignored cache — fine for sonnet/opus,
+        // wrong for haiku and any future smaller-window model.
+        const used = computeContextUsedUnified({
+          inputTokens: this.cost.inputTokens,
+          cacheReadTokens: this.cost.cacheReadTokens,
+          cacheCreateTokens: this.cost.cacheWriteTokens,
+          outputTokens: this.cost.outputTokens,
+        });
+        const total = getContextWindowSize(this.cost.model);
         this.emit({
           type: "context_usage",
           contextUsedTokens: used,
           contextTotalTokens: total,
-          contextPercent: Math.min(100, (used / total) * 100),
+          contextPercent: clampContextPercent(used, total),
           outputTokens: this.cost.outputTokens ?? 0,
+          contextFormula: CONTEXT_FORMULA,
         });
         return { terminal: false, isError: false };
       }

package/src/providers/claude-managed-pricing.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Phase 5 — small adapter-side pricing constants for claude-managed.
+ *
+ * The API server's pricing table is the canonical store (seeded by
+ * `src/be/seed-pricing.ts`). Workers can't touch the DB directly (DB
+ * boundary), so the adapter keeps a local constant for the runtime fee
+ * and lets the API-side recompute path (Phase 2) override the resulting
+ * `totalCostUsd` with the canonical figure. The constant here is what
+ * shows up in the worker's local logs before the row hits the server.
+ *
+ * If/when we plumb pricing through the worker bootstrap (HTTP fetch of
+ * `/api/pricing` at session start), this module is the place to swap.
+ */
+/**
+ * USD per session-hour for managed claude runtime. Source:
+ * https://docs.claude.com/en/api/agent-sdk/managed-runtime#pricing
+ * (verified 2026-04-28). Override at runtime via env for ops bumps without
+ * a redeploy.
+ */
+export const RUNTIME_FEE_USD_PER_HOUR = (() => {
+  const raw = process.env.CLAUDE_MANAGED_RUNTIME_FEE_USD_PER_HOUR;
+  const n = raw ? Number(raw) : NaN;
+  if (Number.isFinite(n) && n >= 0) return n;
+  return 0.08;
+})();
+/**
+ * Adapter helper. Always returns a finite number — never crashes the
+ * cost snapshot.
+ */
+export function getRuntimeFeePerHour(): number {
+  return RUNTIME_FEE_USD_PER_HOUR;
+}

package/src/providers/codex-adapter.ts CHANGED Viewed

@@ -66,6 +66,11 @@ import {
   type WebSearchItem,
 } from "@openai/codex-sdk";
 import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
+import {
+  CONTEXT_FORMULA,
+  clampContextPercent,
+  computeContextUsedUnified,
+} from "../utils/context-window";
 import { summarizeSession as runSummarize } from "../utils/internal-ai";
 import { scrubSecrets } from "../utils/secret-scrubber";
 import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
@@ -523,6 +528,11 @@ class CodexSession implements ProviderSession {
     const inputTokens = usage?.input_tokens ?? 0;
     const cachedInputTokens = usage?.cached_input_tokens ?? 0;
     const outputTokens = usage?.output_tokens ?? 0;
+    // Phase 6: Codex SDK surfaces `reasoning_output_tokens` separately from
+    // `output_tokens` for reasoning models (gpt-5.3-codex, gpt-5.4 thinking).
+    // Pre-fix this number was read into `lastUsage` but never reached
+    // `CostData`, so reasoning-heavy sessions silently under-billed.
+    const reasoningOutputTokens = usage?.reasoning_output_tokens ?? 0;
     return {
       // Runner overrides with its own session id.
       sessionId: "",
@@ -540,9 +550,12 @@ class CodexSession implements ProviderSession {
       ),
       inputTokens,
       outputTokens,
+      reasoningOutputTokens,
       cacheReadTokens: cachedInputTokens,
-      // Codex does not distinguish cache writes in its Usage payload.
-      cacheWriteTokens: 0,
+      // Phase 6: undefined (NOT 0). Codex SDK can't honestly report cache
+      // writes; leaving it undefined preserves that distinction in the DB
+      // instead of mixing genuine zeros with "unknown".
+      cacheWriteTokens: undefined,
       durationMs: Date.now() - this.startedAt,
       numTurns: this.numTurns,
       model: this.resolvedModel,
@@ -760,36 +773,34 @@ class CodexSession implements ProviderSession {
       case "turn.completed": {
         this.lastUsage = event.usage;
         if (event.usage) {
-          // The Codex SDK reports `input_tokens` as the SUM of every prompt
-          // sent to the model across the entire turn (one `codex exec` call
-          // can fan out to dozens of model invocations as MCP tools roundtrip
-          // back and forth). For chatty turns this number routinely exceeds
-          // the model's context window, even though no single model call did.
+          // Phase 9: switch from the codex-specific "peak proxy" formula
+          // (`uncached_input + output`) to the unified
+          // `input + cache_read + cache_create + output` so cross-provider
+          // percent comparisons are meaningful.
           //
-          // For peak-context reporting we want a proxy for "the largest
-          // single-call prompt". We approximate it as the uncached portion
-          // (cached tokens are reused across calls so they count once toward
-          // the actual peak), plus the output. This isn't perfect — the SDK
-          // would have to expose per-call stats for that — but it's far more
-          // representative than `(input + output) / window` which clamps to
-          // 1.0 the moment a turn makes any meaningful tool history.
-          const uncachedInput = Math.max(
-            0,
-            event.usage.input_tokens - event.usage.cached_input_tokens,
-          );
-          const peakProxy = uncachedInput + event.usage.output_tokens;
-          // `contextPercent` is on a 0-100 scale across all providers — claude
-          // emits `(used / total) * 100`, pi-mono passes through `usage.percent`
-          // which is already 0-100. The dashboard at
-          // ui/src/pages/tasks/[id]/page.tsx renders it via `.toFixed(0)`
-          // expecting an integer percent, so a 0-1 fraction would render as
-          // "0%" instead of e.g. "40%".
+          // Note: Codex's `input_tokens` already includes cached_input_tokens
+          // (it's the TOTAL across the turn — see the longer comment that
+          // used to live here, preserved in git history). We therefore pass
+          // `cacheReadTokens: 0` to avoid double-counting the cached portion.
+          // The trade-off the old comment flagged is still real — a chatty
+          // turn can over-report because `input_tokens` is the SUM across
+          // every model call in the turn — but having the SAME formula
+          // everywhere wins over the local optimum. Clamp catches the
+          // chatty-turn overshoot at 100%. Old rows tagged 'peak-proxy'
+          // remain in `task_context_snapshots`; the UI surfaces both.
+          const contextUsed = computeContextUsedUnified({
+            inputTokens: event.usage.input_tokens,
+            cacheReadTokens: 0,
+            cacheCreateTokens: 0,
+            outputTokens: event.usage.output_tokens,
+          });
           this.emit({
             type: "context_usage",
-            contextUsedTokens: peakProxy,
+            contextUsedTokens: contextUsed,
             contextTotalTokens: this.contextWindow,
-            contextPercent: Math.min(100, (peakProxy / this.contextWindow) * 100),
+            contextPercent: clampContextPercent(contextUsed, this.contextWindow) ?? 0,
             outputTokens: event.usage.output_tokens,
+            contextFormula: CONTEXT_FORMULA,
           });
         }
         break;

package/src/providers/codex-models.ts CHANGED Viewed

@@ -126,12 +126,22 @@ export const CODEX_MODEL_PRICING: Record<CodexModel, CodexModelPricing> = {
   },
 };
+/**
+ * Phase 6 — one-warning-per-process tracking so unknown models log once
+ * instead of spamming the worker log on every turn.
+ */
+const _warnedUnknownCodexModels = new Set<string>();
 /**
  * Compute USD cost from a Codex `Usage` payload. The Codex SDK reports
  * `input_tokens` as the TOTAL input fed to the model across the turn (cached
  * + uncached), so we subtract `cached_input_tokens` before billing the
- * uncached portion at the full rate. Returns 0 for unknown models so we never
- * inflate cost on a typo.
+ * uncached portion at the full rate.
+ *
+ * Phase 6: returns 0 for unknown models AND logs a one-time warning, so an
+ * operator running `MODEL_OVERRIDE=gpt-future-2027` notices that the worker
+ * is silently dropping cost. The server-side recompute path (Phase 2) tags
+ * such rows `costSource='unpriced'`, which surfaces as a yellow UI badge.
  */
 export function computeCodexCostUsd(
   model: string,
@@ -140,7 +150,16 @@ export function computeCodexCostUsd(
   outputTokens: number,
 ): number {
   const pricing = CODEX_MODEL_PRICING[model as CodexModel];
-  if (!pricing) return 0;
+  if (!pricing) {
+    if (!_warnedUnknownCodexModels.has(model)) {
+      _warnedUnknownCodexModels.add(model);
+      console.warn(
+        `[codex] unpriced model ${JSON.stringify(model)} — adapter cost will report $0; ` +
+          "server-side recompute will tag costSource='unpriced' if the pricing table has no rows.",
+      );
+    }
+    return 0;
+  }
   const uncachedInput = Math.max(0, inputTokens - cachedInputTokens);
   const inputCost = (uncachedInput / 1_000_000) * pricing.inputPerMillion;
   const cachedCost = (cachedInputTokens / 1_000_000) * pricing.cachedInputPerMillion;

package/src/providers/devin-adapter.ts CHANGED Viewed

@@ -279,6 +279,12 @@ class DevinSession implements ProviderSession {
     if (this.settled || this.aborted) return;
     this.pollCount += 1;
+    // Phase 8: Devin's session API does NOT report per-poll context-window
+    // info (the model is fully managed by Devin). We deliberately don't emit
+    // a synthetic `context_usage` event here — faking one with `contextUsedTokens=0`
+    // would be misleading. `peakContextTokens` stays null for devin tasks,
+    // which the UI surfaces as "not available" rather than "0".
     let response: DevinSessionResponse;
     try {
       response = await getSession(this.orgId, this.devinApiKey, this._sessionId!);
@@ -788,6 +794,11 @@ class DevinSession implements ProviderSession {
       numTurns: this.pollCount,
       model: "devin",
       isError,
+      // Phase 3 — tag CostData so the API recompute path engages. Devin's
+      // pricing is ACU-based (one row under `provider='devin', model='*',
+      // token_class='acu'`); the harness USD value above is already correct,
+      // but tagging the row exposes its source to the UI badge.
+      provider: "devin",
     };
   }
 }

package/src/providers/opencode-adapter.ts CHANGED Viewed

@@ -12,7 +12,11 @@ import { existsSync, mkdirSync } from "node:fs";
 import { join } from "node:path";
 import type { AssistantMessage, Config, Event as OpencodeEvent } from "@opencode-ai/sdk";
 import { createOpencode } from "@opencode-ai/sdk";
-import { getContextWindowSize } from "../utils/context-window";
+import {
+  CONTEXT_FORMULA,
+  clampContextPercent,
+  getContextWindowSize,
+} from "../utils/context-window";
 import { validateOpencodeCredentials } from "../utils/credentials";
 import { fetchInstalledMcpServers } from "../utils/mcp-server-fetcher";
 import { scrubSecrets } from "../utils/secret-scrubber";
@@ -104,7 +108,7 @@ function resolvePluginPath(): string {
   return join(import.meta.dir, "../../plugin/opencode-plugins/agent-swarm.ts");
 }
-class OpencodeSession implements ProviderSession {
+export class OpencodeSession implements ProviderSession {
   private _sessionId: string;
   private listeners: Array<(event: ProviderEvent) => void> = [];
   // Buffer for events emitted before any listener is attached.
@@ -115,6 +119,7 @@ class OpencodeSession implements ProviderSession {
   // leaving agent_tasks.provider/.model NULL. Buffer + flush on first attach.
   private pendingEvents: ProviderEvent[] = [];
   private completionResolve!: (result: ProviderResult) => void;
+  // biome-ignore lint/correctness/noUnusedPrivateClassMembers: reserved for future error-propagation paths; symmetric with completionResolve.
   private completionReject!: (err: Error) => void;
   private completionPromise: Promise<ProviderResult>;
   private server: { url: string; close(): void };
@@ -237,6 +242,15 @@ class OpencodeSession implements ProviderSession {
       case "message.updated": {
         const msg = ev.properties.info;
         if (!isAssistantMessage(msg) || msg.sessionID !== this._sessionId) break;
+        // Phase 9 fix: opencode fires `message.updated` repeatedly during a single
+        // assistant turn (streaming text deltas, tool transitions, etc.) and only
+        // populates `tokens`/`cost` on the FINAL update once `time.completed` is
+        // set. Accumulating on every event would either no-op (zero tokens) or —
+        // if opencode ever back-fills intermediate snapshots — multi-count. Gate
+        // the accumulator AND the context emit on the finalized signal so both
+        // paths see the same canonical "this turn is done" moment.
+        const messageFinalized = msg.time?.completed != null;
+        if (!messageFinalized) break;
         // Accumulate cost from each completed assistant message ("step")
         this.totalCostUsd += msg.cost;
         this.inputTokens += msg.tokens?.input ?? 0;
@@ -247,21 +261,31 @@ class OpencodeSession implements ProviderSession {
         if (!this.model && msg.modelID) this.model = msg.modelID;
         // Emit context_usage so the runner can POST /api/tasks/:id/context
-        // (drives the dashboard's context-usage progress bar) and the
-        // dashboard's activity timeline shows per-turn progress.
+        // (drives the dashboard's context-usage progress bar). The runner-side
+        // throttle (CONTEXT_THROTTLE_MS = 30s) means the FIRST emit wins for any
+        // short task — so this MUST carry real numbers, not the zero-tokens
+        // placeholder opencode sends on intermediate streaming updates. The
+        // `time.completed` gate above (in the accumulator block) guarantees we
+        // only land here for finalized messages.
         const turnInput = msg.tokens?.input ?? 0;
         const turnOutput = msg.tokens?.output ?? 0;
         const turnCacheRead = msg.tokens?.cache?.read ?? 0;
         const turnCacheWrite = msg.tokens?.cache?.write ?? 0;
-        const contextUsed = turnInput + turnCacheRead + turnCacheWrite;
+        // Phase 8 + Phase 9: unified `input + cache + output` formula instead
+        // of the previous `input + cache_read + cache_write` (which omitted
+        // output and slightly mis-counted vs every other adapter).
+        const contextUsed = turnInput + turnCacheRead + turnCacheWrite + turnOutput;
         const contextTotal = getContextWindowSize(this.model || msg.modelID || "default");
-        if (contextTotal > 0) {
+        if (contextTotal > 0 && contextUsed > 0) {
           this.emit({
             type: "context_usage",
             contextUsedTokens: contextUsed,
             contextTotalTokens: contextTotal,
-            contextPercent: (contextUsed / contextTotal) * 100,
+            // Phase 8: clamp so a turn that briefly overshoots (e.g. due to
+            // a stale total) doesn't render as a 130% gauge in the UI.
+            contextPercent: clampContextPercent(contextUsed, contextTotal) ?? 0,
             outputTokens: turnOutput,
+            contextFormula: CONTEXT_FORMULA,
           });
         }
         break;