npm - @desplega.ai/agent-swarm - Versions diffs - 1.92.1 → 1.92.2 - Mend

@desplega.ai/agent-swarm 1.92.1 → 1.92.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/openapi.json +1 -1
package/package.json +1 -1
package/src/be/db.ts +89 -0
package/src/be/memory/boot-reembed.ts +85 -0
package/src/be/memory/constants.ts +42 -1
package/src/be/memory/providers/openai-embedding.ts +13 -0
package/src/be/memory/providers/sqlite-store.ts +33 -1
package/src/be/memory/reranker.ts +35 -17
package/src/be/memory/types.ts +8 -0
package/src/be/modelsdev-cache.json +5308 -2165
package/src/be/seed-scripts/catalog/compound-insights.ts +371 -0
package/src/http/index.ts +9 -0
package/src/http/memory.ts +4 -0
package/src/tasks/worker-follow-up.ts +12 -0
package/src/tests/memory-e2e.test.ts +6 -6
package/src/tests/memory-rater-e2e.test.ts +4 -5
package/src/tests/memory-reranker.test.ts +135 -124
package/src/tests/memory.test.ts +13 -12
package/src/tests/seed-scripts.test.ts +205 -0
package/src/tests/task-cascade-fail.test.ts +304 -0
package/templates/workflows/llm-safe-release-context/config.json +13 -0
package/templates/workflows/llm-safe-release-context/content.md +69 -0

package/src/tests/memory-reranker.test.ts CHANGED Viewed

@@ -1,5 +1,12 @@
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { accessBoost, computeScore, recencyDecay, rerank, usefulness } from "../be/memory/reranker";
+import {
+  accessBoost,
+  computeScore,
+  recencyDecay,
+  rerank,
+  sourceQuality,
+  usefulness,
+} from "../be/memory/reranker";
 import type { MemoryCandidate } from "../be/memory/types";
 function makeCandidate(
@@ -37,21 +44,33 @@ describe("recencyDecay", () => {
     expect(decay).toBeCloseTo(1.0, 5);
   });
-  test("memory at half-life (14d) → ~0.5", () => {
+  test("task_completion at half-life (14d) → ~0.5", () => {
     const created = new Date(now.getTime() - 14 * 86400000).toISOString();
-    const decay = recencyDecay(created, now);
+    const decay = recencyDecay(created, now, "task_completion");
     expect(decay).toBeCloseTo(0.5, 2);
   });
-  test("memory at 2× half-life (28d) → ~0.25", () => {
-    const created = new Date(now.getTime() - 28 * 86400000).toISOString();
-    const decay = recencyDecay(created, now);
-    expect(decay).toBeCloseTo(0.25, 2);
+  test("session_summary at 7d → ~0.5 (7d half-life)", () => {
+    const created = new Date(now.getTime() - 7 * 86400000).toISOString();
+    const decay = recencyDecay(created, now, "session_summary");
+    expect(decay).toBeCloseTo(0.5, 2);
+  });
+  test("file_index at 180d → ~0.5 (180d half-life)", () => {
+    const created = new Date(now.getTime() - 180 * 86400000).toISOString();
+    const decay = recencyDecay(created, now, "file_index");
+    expect(decay).toBeCloseTo(0.5, 2);
   });
-  test("very old memory (365d) → near 0", () => {
+  test("manual memory at any age → 1.0 (no decay)", () => {
     const created = new Date(now.getTime() - 365 * 86400000).toISOString();
-    const decay = recencyDecay(created, now);
+    const decay = recencyDecay(created, now, "manual");
+    expect(decay).toBe(1.0);
+  });
+  test("very old task_completion (365d) → near 0", () => {
+    const created = new Date(now.getTime() - 365 * 86400000).toISOString();
+    const decay = recencyDecay(created, now, "task_completion");
     expect(decay).toBeLessThan(0.001);
   });
@@ -60,6 +79,12 @@ describe("recencyDecay", () => {
     const decay = recencyDecay(created, now);
     expect(decay).toBe(1.0);
   });
+  test("no source provided → falls back to task_completion half-life", () => {
+    const created = new Date(now.getTime() - 14 * 86400000).toISOString();
+    const decay = recencyDecay(created, now);
+    expect(decay).toBeCloseTo(0.5, 2);
+  });
 });
 describe("accessBoost", () => {
@@ -93,31 +118,71 @@ describe("accessBoost", () => {
   });
 });
+describe("sourceQuality", () => {
+  test("manual → 1.5", () => {
+    expect(sourceQuality("manual")).toBe(1.5);
+  });
+  test("file_index → 1.0", () => {
+    expect(sourceQuality("file_index")).toBe(1.0);
+  });
+  test("task_completion → 0.7", () => {
+    expect(sourceQuality("task_completion")).toBe(0.7);
+  });
+  test("session_summary → 0.5", () => {
+    expect(sourceQuality("session_summary")).toBe(0.5);
+  });
+});
 describe("computeScore", () => {
   const now = new Date("2026-04-12T12:00:00Z");
-  test("multiplies similarity × decay × boost", () => {
+  test("manual: similarity × 1.0 (no decay) × source(1.5) × boost × usefulness", () => {
     const candidate = makeCandidate({
       similarity: 0.8,
+      source: "manual",
       createdAt: now.toISOString(),
       accessedAt: now.toISOString(),
       accessCount: 0,
     });
     const score = computeScore(candidate, now);
-    // 0.8 * 1.0 * 1.0 = 0.8
-    expect(score).toBeCloseTo(0.8, 5);
+    // 0.8 * 1.0 (no decay for manual) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness) = 1.2
+    expect(score).toBeCloseTo(1.2, 5);
   });
-  test("old memory with no access gets penalized", () => {
+  test("task_completion at 14d → penalized by decay AND source multiplier", () => {
     const candidate = makeCandidate({
       similarity: 0.8,
+      source: "task_completion",
       createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
       accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
       accessCount: 0,
     });
     const score = computeScore(candidate, now);
-    // 0.8 * 0.5 * 1.0 = 0.4
-    expect(score).toBeCloseTo(0.4, 2);
+    // 0.8 * 0.5 (14d decay) * 1.0 (no boost) * 0.7 (source) * 1.0 (usefulness) = 0.28
+    expect(score).toBeCloseTo(0.28, 2);
+  });
+  test("old manual vs fresh task_completion: manual wins on relevance", () => {
+    const oldManual = makeCandidate({
+      similarity: 0.8,
+      source: "manual",
+      createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
+      accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
+      accessCount: 0,
+    });
+    const freshTC = makeCandidate({
+      similarity: 0.05,
+      source: "task_completion",
+      createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
+      accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
+      accessCount: 0,
+    });
+    // This is THE bug we're fixing: with the old flat 14d decay, the old manual
+    // memory scored lower than fresh noise. Now manual has no decay.
+    expect(computeScore(oldManual, now)).toBeGreaterThan(computeScore(freshTC, now));
   });
 });
@@ -166,36 +231,51 @@ describe("rerank", () => {
     expect(result[0]!.similarity).toBeGreaterThan(result[1]!.similarity);
   });
-  test("recency boosts newer memory over older with same raw similarity", () => {
+  test("recency boosts newer task_completion over older with same raw similarity", () => {
     const candidates = [
       makeCandidate({
         similarity: 0.8,
-        createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(), // 14d old
+        source: "task_completion",
+        createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
       }),
       makeCandidate({
         similarity: 0.8,
-        createdAt: now.toISOString(), // fresh
+        source: "task_completion",
+        createdAt: now.toISOString(),
       }),
     ];
     const result = rerank(candidates, { limit: 2, now });
-    // Fresh memory should rank higher due to recency decay
     expect(result[0]!.createdAt).toBe(now.toISOString());
   });
   test("now parameter enables deterministic testing", () => {
     const candidate = makeCandidate({
       similarity: 0.8,
+      source: "task_completion",
       createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
     });
     const result1 = rerank([candidate], { limit: 1, now });
     const result2 = rerank([candidate], { limit: 1, now });
     expect(result1[0]!.similarity).toBe(result2[0]!.similarity);
   });
+  test("preserves rawSimilarity and compositeScore", () => {
+    const candidate = makeCandidate({
+      similarity: 0.8,
+      source: "manual",
+      createdAt: now.toISOString(),
+    });
+    const result = rerank([candidate], { limit: 1, now });
+    expect(result[0]!.rawSimilarity).toBe(0.8);
+    expect(result[0]!.compositeScore).toBeDefined();
+    // For a fresh manual memory: 0.8 * 1.0 (no decay) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness)
+    expect(result[0]!.compositeScore).toBeCloseTo(1.2, 5);
+    // similarity field = compositeScore
+    expect(result[0]!.similarity).toBe(result[0]!.compositeScore);
+  });
 });
 describe("usefulness", () => {
-  // The default-floor cases assume MEMORY_DEMOTION_FLOOR is unset/empty.
-  // The override case sets and restores the env var.
   let originalFloor: string | undefined;
   beforeEach(() => {
     originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
@@ -224,10 +304,6 @@ describe("usefulness", () => {
   });
   test("Beta(50,1) → 2 * 50/51 ≈ 1.961 (approaches ceiling, never above 2.0)", () => {
-    // NB: the clamp `Math.min(2.0, 2 * mean)` is a defensive ceiling — the
-    // formula 2 * α/(α+β) is bounded above by 2 for any finite β > 0, so the
-    // clamp only fires on degenerate inputs (β = 0). The plan's "===2.0"
-    // expectation was a numerical slip; the asymptote is what we ship.
     expect(usefulness(50, 1)).toBeCloseTo((2 * 50) / 51, 10);
     expect(usefulness(50, 1)).toBeLessThan(2.0);
   });
@@ -242,110 +318,45 @@ describe("usefulness", () => {
   });
 });
-describe("backward-compat: MEMORY_RATERS unset → reranker is a no-op", () => {
-  // Litmus for step-1: with default Beta(1,1) priors and the default
-  // MEMORY_DEMOTION_FLOOR=1.0, computeScore must return EXACTLY the same value
-  // as a pre-rater build (similarity * recencyDecay * accessBoost).
-  const now = new Date("2026-04-12T12:00:00Z");
-  let originalFloor: string | undefined;
-  beforeEach(() => {
-    originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
-    delete process.env.MEMORY_DEMOTION_FLOOR;
-  });
-  afterEach(() => {
-    if (originalFloor === undefined) {
-      delete process.env.MEMORY_DEMOTION_FLOOR;
-    } else {
-      process.env.MEMORY_DEMOTION_FLOOR = originalFloor;
-    }
-  });
-  test("computeScore equals similarity * recencyDecay * accessBoost (no usefulness drift)", () => {
-    const cases: MemoryCandidate[] = [
-      makeCandidate({
-        similarity: 0.8,
-        createdAt: now.toISOString(),
-        accessedAt: now.toISOString(),
-        accessCount: 0,
-      }),
-      makeCandidate({
-        similarity: 0.5,
-        createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
-        accessedAt: new Date(now.getTime() - 24 * 3600000).toISOString(),
-        accessCount: 5,
-      }),
-      makeCandidate({
-        similarity: 0.99,
-        createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
-        accessedAt: new Date(now.getTime() - 72 * 3600000).toISOString(),
-        accessCount: 12,
-      }),
-    ];
-    for (const c of cases) {
-      const expected =
-        c.similarity *
-        recencyDecay(c.createdAt, now) *
-        accessBoost(c.accessedAt, c.accessCount, now);
-      expect(computeScore(c, now)).toBe(expected);
-    }
-  });
+describe("source-aware scoring: manual memories survive age penalty", () => {
+  const now = new Date("2026-06-08T12:00:00Z");
-  test("snapshot order + scores match a hard-coded pre-rater baseline", () => {
-    // Baseline computed from main (pre-step-1): similarity * recencyDecay * accessBoost.
-    // With alpha=beta=1 + default floor, the new code must produce identical numbers.
-    const candidates = [
-      makeCandidate({
-        similarity: 0.9,
-        createdAt: now.toISOString(),
-        accessedAt: now.toISOString(),
-        accessCount: 0,
-      }),
-      makeCandidate({
-        similarity: 0.6,
-        createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
-        accessedAt: now.toISOString(),
-        accessCount: 0,
-      }),
-      makeCandidate({
-        similarity: 0.3,
-        createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
-        accessedAt: now.toISOString(),
-        accessCount: 0,
-      }),
-    ];
-    const result = rerank(candidates, { limit: 3, now });
+  test("76-day-old manual memory scores higher than 1-day-old noise task_completion", () => {
+    // The root-cause scenario from Taras's report: a 76-day-old manual memory
+    // with raw similarity 0.8 was being outscored by a 1-day-old noise result
+    // with raw similarity 0.05. The old reranker gave the noise result a HIGHER
+    // composite score because the flat 14d half-life crushed the old manual
+    // memory by 2^(-76/14) = 0.023. Now manual has no decay.
+    const oldManual = makeCandidate({
+      similarity: 0.8,
+      source: "manual",
+      createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
+      accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
+      accessCount: 0,
+    });
+    const freshNoise = makeCandidate({
+      similarity: 0.05,
+      source: "task_completion",
+      createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
+      accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
+      accessCount: 0,
+    });
-    // Expected scores: similarity * 2^(-ageDays/14) (no access boost, alpha=beta=1).
-    // 0.9 * 1.0      = 0.9
-    // 0.6 * 2^(-0.5) ≈ 0.4242640687
-    // 0.3 * 2^(-2)   = 0.075
-    expect(result[0]!.similarity).toBeCloseTo(0.9, 10);
-    expect(result[1]!.similarity).toBeCloseTo(0.6 * 2 ** -0.5, 10);
-    expect(result[2]!.similarity).toBeCloseTo(0.075, 10);
+    const ranked = rerank([freshNoise, oldManual], { limit: 2, now });
+    expect(ranked[0]!.source).toBe("manual");
+    expect(ranked[0]!.rawSimilarity).toBe(0.8);
   });
-  test("usefulness multiplies into score when posteriors move", () => {
-    // Sanity: a memory with α=10, β=1 should score ~1.818× higher than the same
-    // memory at α=β=1, holding everything else constant. Other rows unchanged.
-    const proven = makeCandidate({
-      similarity: 0.5,
-      createdAt: now.toISOString(),
-      accessedAt: now.toISOString(),
-      accessCount: 0,
-      alpha: 10,
-      beta: 1,
-    });
-    const baseline = makeCandidate({
-      similarity: 0.5,
-      createdAt: now.toISOString(),
-      accessedAt: now.toISOString(),
+  test("session_summary decays fast (7d half-life)", () => {
+    const oldSummary = makeCandidate({
+      similarity: 0.8,
+      source: "session_summary",
+      createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
+      accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
       accessCount: 0,
     });
-    expect(computeScore(proven, now) / computeScore(baseline, now)).toBeCloseTo(
-      usefulness(10, 1),
-      10,
-    );
+    // At 14d with 7d half-life: decay = 2^(-14/7) = 0.25
+    // Score: 0.8 * 0.25 * 0.5 (source) = 0.1
+    expect(computeScore(oldSummary, now)).toBeCloseTo(0.1, 2);
   });
 });

package/src/tests/memory.test.ts CHANGED Viewed

@@ -342,8 +342,9 @@ describe("Memory System", () => {
         status: "idle",
       });
-      // Create memories with known embeddings
-      // Memory 1: agent scope for searchAgentId, embedding [1,0,0]
+      // Create memories with known embeddings (all share a baseline component
+      // so pairwise cosine similarity stays above the MIN_SIMILARITY floor).
+      // Memory 1: agent scope for searchAgentId
       const m1 = store.store({
         agentId: searchAgentId,
         scope: "agent",
@@ -351,9 +352,9 @@ describe("Memory System", () => {
         content: "Agent-scoped content",
         source: "manual",
       });
-      store.updateEmbedding(m1.id, new Float32Array([1, 0, 0]), "test-model");
+      store.updateEmbedding(m1.id, new Float32Array([1, 0.3, 0.3]), "test-model");
-      // Memory 2: swarm scope, embedding [0,1,0]
+      // Memory 2: swarm scope
       const m2 = store.store({
         agentId: searchAgentId,
         scope: "swarm",
@@ -361,9 +362,9 @@ describe("Memory System", () => {
         content: "Swarm-scoped content",
         source: "file_index",
       });
-      store.updateEmbedding(m2.id, new Float32Array([0, 1, 0]), "test-model");
+      store.updateEmbedding(m2.id, new Float32Array([0.3, 1, 0.3]), "test-model");
-      // Memory 3: agent scope for OTHER agent, embedding [0,0,1]
+      // Memory 3: agent scope for OTHER agent
       const m3 = store.store({
         agentId: searchAgentId2,
         scope: "agent",
@@ -371,11 +372,11 @@ describe("Memory System", () => {
         content: "Other agent's private memory",
         source: "manual",
       });
-      store.updateEmbedding(m3.id, new Float32Array([0, 0, 1]), "test-model");
+      store.updateEmbedding(m3.id, new Float32Array([0.3, 0.3, 1]), "test-model");
     });
     test("worker sees own agent-scoped + swarm memories", () => {
-      const query = new Float32Array([1, 0, 0]); // closest to Memory 1
+      const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1
       const results = store.search(query, searchAgentId, { isLead: false });
       const names = results.map((r) => r.name);
@@ -385,7 +386,7 @@ describe("Memory System", () => {
     });
     test("worker does not see other agent's agent-scoped memories", () => {
-      const query = new Float32Array([0, 0, 1]); // closest to Memory 3
+      const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
       const results = store.search(query, searchAgentId, { isLead: false });
       const names = results.map((r) => r.name);
@@ -393,7 +394,7 @@ describe("Memory System", () => {
     });
     test("lead sees ALL memories across agents", () => {
-      const query = new Float32Array([0, 0, 1]); // closest to Memory 3
+      const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
       const results = store.search(query, searchAgentId, { isLead: true });
       const names = results.map((r) => r.name);
@@ -403,12 +404,12 @@ describe("Memory System", () => {
     });
     test("results sorted by similarity (highest first)", () => {
-      const query = new Float32Array([1, 0, 0]); // identical to Memory 1's embedding
+      const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1's embedding
       const results = store.search(query, searchAgentId, { isLead: true });
       expect(results.length).toBeGreaterThan(0);
       expect(results[0].name).toBe("Agent Memory 1");
-      expect(results[0].similarity).toBeCloseTo(1.0, 3);
+      expect(results[0].similarity).toBeGreaterThan(0.9);
       // Each subsequent result should have lower or equal similarity
       for (let i = 1; i < results.length; i++) {

package/src/tests/seed-scripts.test.ts CHANGED Viewed

@@ -208,6 +208,211 @@ describe("seed-scripts catalog", () => {
     ).toBeGreaterThan(0.99);
   });
+  test("compound-insights reports script usage and cost honesty rails", async () => {
+    const queries: string[] = [];
+    const ctx = {
+      swarm: {
+        async db_query({ sql }: { sql: string }) {
+          queries.push(sql);
+          if (sql.includes("FROM script_runs sr")) {
+            return {
+              columns: ["scriptName", "kind", "status", "startedAt", "finishedAt", "durationMs"],
+              rows: [
+                [
+                  "compound-insights",
+                  "inline",
+                  "completed",
+                  "2026-06-08T00:00:00.000Z",
+                  "2026-06-08T00:00:01.000Z",
+                  1000,
+                ],
+                [
+                  "daily-dashboard",
+                  "workflow",
+                  "failed",
+                  "2026-06-08T01:00:00.000Z",
+                  "2026-06-08T01:00:03.000Z",
+                  3000,
+                ],
+              ],
+            };
+          }
+          if (sql.includes("FROM scripts") && sql.includes("GROUP BY scope, isScratch")) {
+            return {
+              columns: ["scope", "isScratch", "count"],
+              rows: [
+                ["global", 0, 2],
+                ["agent", 1, 1],
+              ],
+            };
+          }
+          if (sql.includes("FROM script_versions sv")) {
+            return {
+              columns: ["scope", "count"],
+              rows: [["global", 3]],
+            };
+          }
+          if (sql.includes("FROM session_logs") && sql.includes("%script-run%")) {
+            return {
+              columns: ["tool", "calls"],
+              rows: [["mcp__agent_swarm__script-run", 5]],
+            };
+          }
+          if (sql.includes("FROM session_costs sc")) {
+            return {
+              columns: [
+                "taskId",
+                "agentId",
+                "agentName",
+                "provider",
+                "totalCostUsd",
+                "inputTokens",
+                "outputTokens",
+                "cacheReadTokens",
+                "cacheWriteTokens",
+                "reasoningOutputTokens",
+                "thinkingTokens",
+                "numTurns",
+                "model",
+                "costSource",
+              ],
+              rows: [
+                [
+                  "task-a",
+                  "agent-a",
+                  "Picateclas",
+                  "codex",
+                  0.3,
+                  100,
+                  20,
+                  10,
+                  null,
+                  3,
+                  4,
+                  null,
+                  "gpt-5.5",
+                  "harness",
+                ],
+                [
+                  "task-b",
+                  "agent-a",
+                  "Picateclas",
+                  "codex",
+                  0.5,
+                  200,
+                  40,
+                  20,
+                  2,
+                  0,
+                  0,
+                  2,
+                  "gpt-5.5",
+                  "pricing-table",
+                ],
+                [
+                  "task-c",
+                  "agent-b",
+                  "Worker",
+                  "claude",
+                  9.9,
+                  300,
+                  60,
+                  30,
+                  3,
+                  0,
+                  0,
+                  3,
+                  "unknown",
+                  "unpriced",
+                ],
+                [
+                  null,
+                  "agent-a",
+                  "Picateclas",
+                  "codex",
+                  0.2,
+                  50,
+                  10,
+                  5,
+                  null,
+                  1,
+                  1,
+                  null,
+                  "gpt-5.5",
+                  "harness",
+                ],
+              ],
+            };
+          }
+          return { columns: [], rows: [] };
+        },
+      },
+    };
+    const result = await compoundInsights(
+      {
+        days: 7,
+        includeToolUsage: false,
+        includeScheduleHealth: false,
+        includeMemoryHealth: false,
+        includeScriptCandidates: false,
+        includeByAgent: false,
+        publishPage: false,
+      },
+      ctx,
+    );
+    expect(queries.some((sql) => sql.includes("FROM script_runs sr"))).toBe(true);
+    expect(queries.some((sql) => sql.includes("FROM session_costs sc"))).toBe(true);
+    expect(result.scriptUsage.runs).toMatchObject({
+      total: 2,
+      inline: 1,
+      workflow: 1,
+      completed: 1,
+      failed: 1,
+      successRate: 50,
+      durationP50Ms: 1000,
+      durationP95Ms: 3000,
+    });
+    expect(result.scriptUsage.creations).toMatchObject({
+      totalNonScratch: 2,
+      scratch: 1,
+      byScope: { global: 2 },
+    });
+    expect(result.scriptUsage.edits).toMatchObject({
+      total: 3,
+      byScope: { global: 3 },
+    });
+    expect(result.scriptUsage.mcpToolCalls).toEqual([
+      { tool: "mcp__agent_swarm__script-run", calls: 5 },
+    ]);
+    expect(result.costAndTokens).toMatchObject({
+      rows: 4,
+      taskCountForHeadlineAvg: 2,
+      avgCostPerTaskUsd: 0.4,
+      totalSpendUsd: 10.9,
+      trustedSpendUsd: 1,
+      trustedRows: 3,
+      trustedRowPercent: 75,
+      unpricedRows: 1,
+      unpricedSpendUsd: 9.9,
+      nonTaskSessionRows: 1,
+      nonTaskSessionSpendUsd: 0.2,
+      unknownCounts: {
+        cacheWriteTokens: 2,
+        numTurns: 2,
+      },
+    });
+    expect(result.costAndTokens.tokenTotals).toMatchObject({
+      inputTokens: 650,
+      outputTokens: 130,
+      cacheReadTokens: 65,
+      cacheWriteTokens: 5,
+      reasoningOutputTokens: 4,
+      thinkingTokens: 5,
+    });
+  });
   test("ops-catalog-audit clusters schedule, workflow, and prompt findings by goal", async () => {
     const queries: string[] = [];
     const result = await opsCatalogAudit(