@desplega.ai/agent-swarm 1.92.1 → 1.92.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,12 @@
1
1
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
- import { accessBoost, computeScore, recencyDecay, rerank, usefulness } from "../be/memory/reranker";
2
+ import {
3
+ accessBoost,
4
+ computeScore,
5
+ recencyDecay,
6
+ rerank,
7
+ sourceQuality,
8
+ usefulness,
9
+ } from "../be/memory/reranker";
3
10
  import type { MemoryCandidate } from "../be/memory/types";
4
11
 
5
12
  function makeCandidate(
@@ -37,21 +44,33 @@ describe("recencyDecay", () => {
37
44
  expect(decay).toBeCloseTo(1.0, 5);
38
45
  });
39
46
 
40
- test("memory at half-life (14d) → ~0.5", () => {
47
+ test("task_completion at half-life (14d) → ~0.5", () => {
41
48
  const created = new Date(now.getTime() - 14 * 86400000).toISOString();
42
- const decay = recencyDecay(created, now);
49
+ const decay = recencyDecay(created, now, "task_completion");
43
50
  expect(decay).toBeCloseTo(0.5, 2);
44
51
  });
45
52
 
46
- test("memory at half-life (28d) → ~0.25", () => {
47
- const created = new Date(now.getTime() - 28 * 86400000).toISOString();
48
- const decay = recencyDecay(created, now);
49
- expect(decay).toBeCloseTo(0.25, 2);
53
+ test("session_summary at 7d → ~0.5 (7d half-life)", () => {
54
+ const created = new Date(now.getTime() - 7 * 86400000).toISOString();
55
+ const decay = recencyDecay(created, now, "session_summary");
56
+ expect(decay).toBeCloseTo(0.5, 2);
57
+ });
58
+
59
+ test("file_index at 180d → ~0.5 (180d half-life)", () => {
60
+ const created = new Date(now.getTime() - 180 * 86400000).toISOString();
61
+ const decay = recencyDecay(created, now, "file_index");
62
+ expect(decay).toBeCloseTo(0.5, 2);
50
63
  });
51
64
 
52
- test("very old memory (365d)near 0", () => {
65
+ test("manual memory at any age 1.0 (no decay)", () => {
53
66
  const created = new Date(now.getTime() - 365 * 86400000).toISOString();
54
- const decay = recencyDecay(created, now);
67
+ const decay = recencyDecay(created, now, "manual");
68
+ expect(decay).toBe(1.0);
69
+ });
70
+
71
+ test("very old task_completion (365d) → near 0", () => {
72
+ const created = new Date(now.getTime() - 365 * 86400000).toISOString();
73
+ const decay = recencyDecay(created, now, "task_completion");
55
74
  expect(decay).toBeLessThan(0.001);
56
75
  });
57
76
 
@@ -60,6 +79,12 @@ describe("recencyDecay", () => {
60
79
  const decay = recencyDecay(created, now);
61
80
  expect(decay).toBe(1.0);
62
81
  });
82
+
83
+ test("no source provided → falls back to task_completion half-life", () => {
84
+ const created = new Date(now.getTime() - 14 * 86400000).toISOString();
85
+ const decay = recencyDecay(created, now);
86
+ expect(decay).toBeCloseTo(0.5, 2);
87
+ });
63
88
  });
64
89
 
65
90
  describe("accessBoost", () => {
@@ -93,31 +118,71 @@ describe("accessBoost", () => {
93
118
  });
94
119
  });
95
120
 
121
+ describe("sourceQuality", () => {
122
+ test("manual → 1.5", () => {
123
+ expect(sourceQuality("manual")).toBe(1.5);
124
+ });
125
+
126
+ test("file_index → 1.0", () => {
127
+ expect(sourceQuality("file_index")).toBe(1.0);
128
+ });
129
+
130
+ test("task_completion → 0.7", () => {
131
+ expect(sourceQuality("task_completion")).toBe(0.7);
132
+ });
133
+
134
+ test("session_summary → 0.5", () => {
135
+ expect(sourceQuality("session_summary")).toBe(0.5);
136
+ });
137
+ });
138
+
96
139
  describe("computeScore", () => {
97
140
  const now = new Date("2026-04-12T12:00:00Z");
98
141
 
99
- test("multiplies similarity × decay × boost", () => {
142
+ test("manual: similarity × 1.0 (no decay) × source(1.5) × boost × usefulness", () => {
100
143
  const candidate = makeCandidate({
101
144
  similarity: 0.8,
145
+ source: "manual",
102
146
  createdAt: now.toISOString(),
103
147
  accessedAt: now.toISOString(),
104
148
  accessCount: 0,
105
149
  });
106
150
  const score = computeScore(candidate, now);
107
- // 0.8 * 1.0 * 1.0 = 0.8
108
- expect(score).toBeCloseTo(0.8, 5);
151
+ // 0.8 * 1.0 (no decay for manual) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness) = 1.2
152
+ expect(score).toBeCloseTo(1.2, 5);
109
153
  });
110
154
 
111
- test("old memory with no access gets penalized", () => {
155
+ test("task_completion at 14d penalized by decay AND source multiplier", () => {
112
156
  const candidate = makeCandidate({
113
157
  similarity: 0.8,
158
+ source: "task_completion",
114
159
  createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
115
160
  accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
116
161
  accessCount: 0,
117
162
  });
118
163
  const score = computeScore(candidate, now);
119
- // 0.8 * 0.5 * 1.0 = 0.4
120
- expect(score).toBeCloseTo(0.4, 2);
164
+ // 0.8 * 0.5 (14d decay) * 1.0 (no boost) * 0.7 (source) * 1.0 (usefulness) = 0.28
165
+ expect(score).toBeCloseTo(0.28, 2);
166
+ });
167
+
168
+ test("old manual vs fresh task_completion: manual wins on relevance", () => {
169
+ const oldManual = makeCandidate({
170
+ similarity: 0.8,
171
+ source: "manual",
172
+ createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
173
+ accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
174
+ accessCount: 0,
175
+ });
176
+ const freshTC = makeCandidate({
177
+ similarity: 0.05,
178
+ source: "task_completion",
179
+ createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
180
+ accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
181
+ accessCount: 0,
182
+ });
183
+ // This is THE bug we're fixing: with the old flat 14d decay, the old manual
184
+ // memory scored lower than fresh noise. Now manual has no decay.
185
+ expect(computeScore(oldManual, now)).toBeGreaterThan(computeScore(freshTC, now));
121
186
  });
122
187
  });
123
188
 
@@ -166,36 +231,51 @@ describe("rerank", () => {
166
231
  expect(result[0]!.similarity).toBeGreaterThan(result[1]!.similarity);
167
232
  });
168
233
 
169
- test("recency boosts newer memory over older with same raw similarity", () => {
234
+ test("recency boosts newer task_completion over older with same raw similarity", () => {
170
235
  const candidates = [
171
236
  makeCandidate({
172
237
  similarity: 0.8,
173
- createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(), // 14d old
238
+ source: "task_completion",
239
+ createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
174
240
  }),
175
241
  makeCandidate({
176
242
  similarity: 0.8,
177
- createdAt: now.toISOString(), // fresh
243
+ source: "task_completion",
244
+ createdAt: now.toISOString(),
178
245
  }),
179
246
  ];
180
247
  const result = rerank(candidates, { limit: 2, now });
181
- // Fresh memory should rank higher due to recency decay
182
248
  expect(result[0]!.createdAt).toBe(now.toISOString());
183
249
  });
184
250
 
185
251
  test("now parameter enables deterministic testing", () => {
186
252
  const candidate = makeCandidate({
187
253
  similarity: 0.8,
254
+ source: "task_completion",
188
255
  createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
189
256
  });
190
257
  const result1 = rerank([candidate], { limit: 1, now });
191
258
  const result2 = rerank([candidate], { limit: 1, now });
192
259
  expect(result1[0]!.similarity).toBe(result2[0]!.similarity);
193
260
  });
261
+
262
+ test("preserves rawSimilarity and compositeScore", () => {
263
+ const candidate = makeCandidate({
264
+ similarity: 0.8,
265
+ source: "manual",
266
+ createdAt: now.toISOString(),
267
+ });
268
+ const result = rerank([candidate], { limit: 1, now });
269
+ expect(result[0]!.rawSimilarity).toBe(0.8);
270
+ expect(result[0]!.compositeScore).toBeDefined();
271
+ // For a fresh manual memory: 0.8 * 1.0 (no decay) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness)
272
+ expect(result[0]!.compositeScore).toBeCloseTo(1.2, 5);
273
+ // similarity field = compositeScore
274
+ expect(result[0]!.similarity).toBe(result[0]!.compositeScore);
275
+ });
194
276
  });
195
277
 
196
278
  describe("usefulness", () => {
197
- // The default-floor cases assume MEMORY_DEMOTION_FLOOR is unset/empty.
198
- // The override case sets and restores the env var.
199
279
  let originalFloor: string | undefined;
200
280
  beforeEach(() => {
201
281
  originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
@@ -224,10 +304,6 @@ describe("usefulness", () => {
224
304
  });
225
305
 
226
306
  test("Beta(50,1) → 2 * 50/51 ≈ 1.961 (approaches ceiling, never above 2.0)", () => {
227
- // NB: the clamp `Math.min(2.0, 2 * mean)` is a defensive ceiling — the
228
- // formula 2 * α/(α+β) is bounded above by 2 for any finite β > 0, so the
229
- // clamp only fires on degenerate inputs (β = 0). The plan's "===2.0"
230
- // expectation was a numerical slip; the asymptote is what we ship.
231
307
  expect(usefulness(50, 1)).toBeCloseTo((2 * 50) / 51, 10);
232
308
  expect(usefulness(50, 1)).toBeLessThan(2.0);
233
309
  });
@@ -242,110 +318,45 @@ describe("usefulness", () => {
242
318
  });
243
319
  });
244
320
 
245
- describe("backward-compat: MEMORY_RATERS unset reranker is a no-op", () => {
246
- // Litmus for step-1: with default Beta(1,1) priors and the default
247
- // MEMORY_DEMOTION_FLOOR=1.0, computeScore must return EXACTLY the same value
248
- // as a pre-rater build (similarity * recencyDecay * accessBoost).
249
- const now = new Date("2026-04-12T12:00:00Z");
250
-
251
- let originalFloor: string | undefined;
252
- beforeEach(() => {
253
- originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
254
- delete process.env.MEMORY_DEMOTION_FLOOR;
255
- });
256
- afterEach(() => {
257
- if (originalFloor === undefined) {
258
- delete process.env.MEMORY_DEMOTION_FLOOR;
259
- } else {
260
- process.env.MEMORY_DEMOTION_FLOOR = originalFloor;
261
- }
262
- });
263
-
264
- test("computeScore equals similarity * recencyDecay * accessBoost (no usefulness drift)", () => {
265
- const cases: MemoryCandidate[] = [
266
- makeCandidate({
267
- similarity: 0.8,
268
- createdAt: now.toISOString(),
269
- accessedAt: now.toISOString(),
270
- accessCount: 0,
271
- }),
272
- makeCandidate({
273
- similarity: 0.5,
274
- createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
275
- accessedAt: new Date(now.getTime() - 24 * 3600000).toISOString(),
276
- accessCount: 5,
277
- }),
278
- makeCandidate({
279
- similarity: 0.99,
280
- createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
281
- accessedAt: new Date(now.getTime() - 72 * 3600000).toISOString(),
282
- accessCount: 12,
283
- }),
284
- ];
285
-
286
- for (const c of cases) {
287
- const expected =
288
- c.similarity *
289
- recencyDecay(c.createdAt, now) *
290
- accessBoost(c.accessedAt, c.accessCount, now);
291
- expect(computeScore(c, now)).toBe(expected);
292
- }
293
- });
321
+ describe("source-aware scoring: manual memories survive age penalty", () => {
322
+ const now = new Date("2026-06-08T12:00:00Z");
294
323
 
295
- test("snapshot order + scores match a hard-coded pre-rater baseline", () => {
296
- // Baseline computed from main (pre-step-1): similarity * recencyDecay * accessBoost.
297
- // With alpha=beta=1 + default floor, the new code must produce identical numbers.
298
- const candidates = [
299
- makeCandidate({
300
- similarity: 0.9,
301
- createdAt: now.toISOString(),
302
- accessedAt: now.toISOString(),
303
- accessCount: 0,
304
- }),
305
- makeCandidate({
306
- similarity: 0.6,
307
- createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
308
- accessedAt: now.toISOString(),
309
- accessCount: 0,
310
- }),
311
- makeCandidate({
312
- similarity: 0.3,
313
- createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
314
- accessedAt: now.toISOString(),
315
- accessCount: 0,
316
- }),
317
- ];
318
- const result = rerank(candidates, { limit: 3, now });
324
+ test("76-day-old manual memory scores higher than 1-day-old noise task_completion", () => {
325
+ // The root-cause scenario from Taras's report: a 76-day-old manual memory
326
+ // with raw similarity 0.8 was being outscored by a 1-day-old noise result
327
+ // with raw similarity 0.05. The old reranker gave the noise result a HIGHER
328
+ // composite score because the flat 14d half-life crushed the old manual
329
+ // memory by 2^(-76/14) = 0.023. Now manual has no decay.
330
+ const oldManual = makeCandidate({
331
+ similarity: 0.8,
332
+ source: "manual",
333
+ createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
334
+ accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
335
+ accessCount: 0,
336
+ });
337
+ const freshNoise = makeCandidate({
338
+ similarity: 0.05,
339
+ source: "task_completion",
340
+ createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
341
+ accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
342
+ accessCount: 0,
343
+ });
319
344
 
320
- // Expected scores: similarity * 2^(-ageDays/14) (no access boost, alpha=beta=1).
321
- // 0.9 * 1.0 = 0.9
322
- // 0.6 * 2^(-0.5) ≈ 0.4242640687
323
- // 0.3 * 2^(-2) = 0.075
324
- expect(result[0]!.similarity).toBeCloseTo(0.9, 10);
325
- expect(result[1]!.similarity).toBeCloseTo(0.6 * 2 ** -0.5, 10);
326
- expect(result[2]!.similarity).toBeCloseTo(0.075, 10);
345
+ const ranked = rerank([freshNoise, oldManual], { limit: 2, now });
346
+ expect(ranked[0]!.source).toBe("manual");
347
+ expect(ranked[0]!.rawSimilarity).toBe(0.8);
327
348
  });
328
349
 
329
- test("usefulness multiplies into score when posteriors move", () => {
330
- // Sanity: a memory with α=10, β=1 should score ~1.818× higher than the same
331
- // memory at α=β=1, holding everything else constant. Other rows unchanged.
332
- const proven = makeCandidate({
333
- similarity: 0.5,
334
- createdAt: now.toISOString(),
335
- accessedAt: now.toISOString(),
336
- accessCount: 0,
337
- alpha: 10,
338
- beta: 1,
339
- });
340
- const baseline = makeCandidate({
341
- similarity: 0.5,
342
- createdAt: now.toISOString(),
343
- accessedAt: now.toISOString(),
350
+ test("session_summary decays fast (7d half-life)", () => {
351
+ const oldSummary = makeCandidate({
352
+ similarity: 0.8,
353
+ source: "session_summary",
354
+ createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
355
+ accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
344
356
  accessCount: 0,
345
357
  });
346
- expect(computeScore(proven, now) / computeScore(baseline, now)).toBeCloseTo(
347
- usefulness(10, 1),
348
- 10,
349
- );
358
+ // At 14d with 7d half-life: decay = 2^(-14/7) = 0.25
359
+ // Score: 0.8 * 0.25 * 0.5 (source) = 0.1
360
+ expect(computeScore(oldSummary, now)).toBeCloseTo(0.1, 2);
350
361
  });
351
362
  });
@@ -342,8 +342,9 @@ describe("Memory System", () => {
342
342
  status: "idle",
343
343
  });
344
344
 
345
- // Create memories with known embeddings
346
- // Memory 1: agent scope for searchAgentId, embedding [1,0,0]
345
+ // Create memories with known embeddings (all share a baseline component
346
+ // so pairwise cosine similarity stays above the MIN_SIMILARITY floor).
347
+ // Memory 1: agent scope for searchAgentId
347
348
  const m1 = store.store({
348
349
  agentId: searchAgentId,
349
350
  scope: "agent",
@@ -351,9 +352,9 @@ describe("Memory System", () => {
351
352
  content: "Agent-scoped content",
352
353
  source: "manual",
353
354
  });
354
- store.updateEmbedding(m1.id, new Float32Array([1, 0, 0]), "test-model");
355
+ store.updateEmbedding(m1.id, new Float32Array([1, 0.3, 0.3]), "test-model");
355
356
 
356
- // Memory 2: swarm scope, embedding [0,1,0]
357
+ // Memory 2: swarm scope
357
358
  const m2 = store.store({
358
359
  agentId: searchAgentId,
359
360
  scope: "swarm",
@@ -361,9 +362,9 @@ describe("Memory System", () => {
361
362
  content: "Swarm-scoped content",
362
363
  source: "file_index",
363
364
  });
364
- store.updateEmbedding(m2.id, new Float32Array([0, 1, 0]), "test-model");
365
+ store.updateEmbedding(m2.id, new Float32Array([0.3, 1, 0.3]), "test-model");
365
366
 
366
- // Memory 3: agent scope for OTHER agent, embedding [0,0,1]
367
+ // Memory 3: agent scope for OTHER agent
367
368
  const m3 = store.store({
368
369
  agentId: searchAgentId2,
369
370
  scope: "agent",
@@ -371,11 +372,11 @@ describe("Memory System", () => {
371
372
  content: "Other agent's private memory",
372
373
  source: "manual",
373
374
  });
374
- store.updateEmbedding(m3.id, new Float32Array([0, 0, 1]), "test-model");
375
+ store.updateEmbedding(m3.id, new Float32Array([0.3, 0.3, 1]), "test-model");
375
376
  });
376
377
 
377
378
  test("worker sees own agent-scoped + swarm memories", () => {
378
- const query = new Float32Array([1, 0, 0]); // closest to Memory 1
379
+ const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1
379
380
  const results = store.search(query, searchAgentId, { isLead: false });
380
381
  const names = results.map((r) => r.name);
381
382
 
@@ -385,7 +386,7 @@ describe("Memory System", () => {
385
386
  });
386
387
 
387
388
  test("worker does not see other agent's agent-scoped memories", () => {
388
- const query = new Float32Array([0, 0, 1]); // closest to Memory 3
389
+ const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
389
390
  const results = store.search(query, searchAgentId, { isLead: false });
390
391
  const names = results.map((r) => r.name);
391
392
 
@@ -393,7 +394,7 @@ describe("Memory System", () => {
393
394
  });
394
395
 
395
396
  test("lead sees ALL memories across agents", () => {
396
- const query = new Float32Array([0, 0, 1]); // closest to Memory 3
397
+ const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
397
398
  const results = store.search(query, searchAgentId, { isLead: true });
398
399
  const names = results.map((r) => r.name);
399
400
 
@@ -403,12 +404,12 @@ describe("Memory System", () => {
403
404
  });
404
405
 
405
406
  test("results sorted by similarity (highest first)", () => {
406
- const query = new Float32Array([1, 0, 0]); // identical to Memory 1's embedding
407
+ const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1's embedding
407
408
  const results = store.search(query, searchAgentId, { isLead: true });
408
409
 
409
410
  expect(results.length).toBeGreaterThan(0);
410
411
  expect(results[0].name).toBe("Agent Memory 1");
411
- expect(results[0].similarity).toBeCloseTo(1.0, 3);
412
+ expect(results[0].similarity).toBeGreaterThan(0.9);
412
413
 
413
414
  // Each subsequent result should have lower or equal similarity
414
415
  for (let i = 1; i < results.length; i++) {
@@ -208,6 +208,211 @@ describe("seed-scripts catalog", () => {
208
208
  ).toBeGreaterThan(0.99);
209
209
  });
210
210
 
211
+ test("compound-insights reports script usage and cost honesty rails", async () => {
212
+ const queries: string[] = [];
213
+ const ctx = {
214
+ swarm: {
215
+ async db_query({ sql }: { sql: string }) {
216
+ queries.push(sql);
217
+ if (sql.includes("FROM script_runs sr")) {
218
+ return {
219
+ columns: ["scriptName", "kind", "status", "startedAt", "finishedAt", "durationMs"],
220
+ rows: [
221
+ [
222
+ "compound-insights",
223
+ "inline",
224
+ "completed",
225
+ "2026-06-08T00:00:00.000Z",
226
+ "2026-06-08T00:00:01.000Z",
227
+ 1000,
228
+ ],
229
+ [
230
+ "daily-dashboard",
231
+ "workflow",
232
+ "failed",
233
+ "2026-06-08T01:00:00.000Z",
234
+ "2026-06-08T01:00:03.000Z",
235
+ 3000,
236
+ ],
237
+ ],
238
+ };
239
+ }
240
+ if (sql.includes("FROM scripts") && sql.includes("GROUP BY scope, isScratch")) {
241
+ return {
242
+ columns: ["scope", "isScratch", "count"],
243
+ rows: [
244
+ ["global", 0, 2],
245
+ ["agent", 1, 1],
246
+ ],
247
+ };
248
+ }
249
+ if (sql.includes("FROM script_versions sv")) {
250
+ return {
251
+ columns: ["scope", "count"],
252
+ rows: [["global", 3]],
253
+ };
254
+ }
255
+ if (sql.includes("FROM session_logs") && sql.includes("%script-run%")) {
256
+ return {
257
+ columns: ["tool", "calls"],
258
+ rows: [["mcp__agent_swarm__script-run", 5]],
259
+ };
260
+ }
261
+ if (sql.includes("FROM session_costs sc")) {
262
+ return {
263
+ columns: [
264
+ "taskId",
265
+ "agentId",
266
+ "agentName",
267
+ "provider",
268
+ "totalCostUsd",
269
+ "inputTokens",
270
+ "outputTokens",
271
+ "cacheReadTokens",
272
+ "cacheWriteTokens",
273
+ "reasoningOutputTokens",
274
+ "thinkingTokens",
275
+ "numTurns",
276
+ "model",
277
+ "costSource",
278
+ ],
279
+ rows: [
280
+ [
281
+ "task-a",
282
+ "agent-a",
283
+ "Picateclas",
284
+ "codex",
285
+ 0.3,
286
+ 100,
287
+ 20,
288
+ 10,
289
+ null,
290
+ 3,
291
+ 4,
292
+ null,
293
+ "gpt-5.5",
294
+ "harness",
295
+ ],
296
+ [
297
+ "task-b",
298
+ "agent-a",
299
+ "Picateclas",
300
+ "codex",
301
+ 0.5,
302
+ 200,
303
+ 40,
304
+ 20,
305
+ 2,
306
+ 0,
307
+ 0,
308
+ 2,
309
+ "gpt-5.5",
310
+ "pricing-table",
311
+ ],
312
+ [
313
+ "task-c",
314
+ "agent-b",
315
+ "Worker",
316
+ "claude",
317
+ 9.9,
318
+ 300,
319
+ 60,
320
+ 30,
321
+ 3,
322
+ 0,
323
+ 0,
324
+ 3,
325
+ "unknown",
326
+ "unpriced",
327
+ ],
328
+ [
329
+ null,
330
+ "agent-a",
331
+ "Picateclas",
332
+ "codex",
333
+ 0.2,
334
+ 50,
335
+ 10,
336
+ 5,
337
+ null,
338
+ 1,
339
+ 1,
340
+ null,
341
+ "gpt-5.5",
342
+ "harness",
343
+ ],
344
+ ],
345
+ };
346
+ }
347
+ return { columns: [], rows: [] };
348
+ },
349
+ },
350
+ };
351
+
352
+ const result = await compoundInsights(
353
+ {
354
+ days: 7,
355
+ includeToolUsage: false,
356
+ includeScheduleHealth: false,
357
+ includeMemoryHealth: false,
358
+ includeScriptCandidates: false,
359
+ includeByAgent: false,
360
+ publishPage: false,
361
+ },
362
+ ctx,
363
+ );
364
+
365
+ expect(queries.some((sql) => sql.includes("FROM script_runs sr"))).toBe(true);
366
+ expect(queries.some((sql) => sql.includes("FROM session_costs sc"))).toBe(true);
367
+ expect(result.scriptUsage.runs).toMatchObject({
368
+ total: 2,
369
+ inline: 1,
370
+ workflow: 1,
371
+ completed: 1,
372
+ failed: 1,
373
+ successRate: 50,
374
+ durationP50Ms: 1000,
375
+ durationP95Ms: 3000,
376
+ });
377
+ expect(result.scriptUsage.creations).toMatchObject({
378
+ totalNonScratch: 2,
379
+ scratch: 1,
380
+ byScope: { global: 2 },
381
+ });
382
+ expect(result.scriptUsage.edits).toMatchObject({
383
+ total: 3,
384
+ byScope: { global: 3 },
385
+ });
386
+ expect(result.scriptUsage.mcpToolCalls).toEqual([
387
+ { tool: "mcp__agent_swarm__script-run", calls: 5 },
388
+ ]);
389
+ expect(result.costAndTokens).toMatchObject({
390
+ rows: 4,
391
+ taskCountForHeadlineAvg: 2,
392
+ avgCostPerTaskUsd: 0.4,
393
+ totalSpendUsd: 10.9,
394
+ trustedSpendUsd: 1,
395
+ trustedRows: 3,
396
+ trustedRowPercent: 75,
397
+ unpricedRows: 1,
398
+ unpricedSpendUsd: 9.9,
399
+ nonTaskSessionRows: 1,
400
+ nonTaskSessionSpendUsd: 0.2,
401
+ unknownCounts: {
402
+ cacheWriteTokens: 2,
403
+ numTurns: 2,
404
+ },
405
+ });
406
+ expect(result.costAndTokens.tokenTotals).toMatchObject({
407
+ inputTokens: 650,
408
+ outputTokens: 130,
409
+ cacheReadTokens: 65,
410
+ cacheWriteTokens: 5,
411
+ reasoningOutputTokens: 4,
412
+ thinkingTokens: 5,
413
+ });
414
+ });
415
+
211
416
  test("ops-catalog-audit clusters schedule, workflow, and prompt findings by goal", async () => {
212
417
  const queries: string[] = [];
213
418
  const result = await opsCatalogAudit(