@desplega.ai/agent-swarm 1.92.1 → 1.92.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +1 -1
- package/package.json +1 -1
- package/src/be/db.ts +89 -0
- package/src/be/memory/boot-reembed.ts +85 -0
- package/src/be/memory/constants.ts +42 -1
- package/src/be/memory/providers/openai-embedding.ts +13 -0
- package/src/be/memory/providers/sqlite-store.ts +33 -1
- package/src/be/memory/reranker.ts +35 -17
- package/src/be/memory/types.ts +8 -0
- package/src/be/modelsdev-cache.json +5308 -2165
- package/src/be/seed-scripts/catalog/compound-insights.ts +371 -0
- package/src/http/index.ts +9 -0
- package/src/http/memory.ts +4 -0
- package/src/tasks/worker-follow-up.ts +12 -0
- package/src/tests/memory-e2e.test.ts +6 -6
- package/src/tests/memory-rater-e2e.test.ts +4 -5
- package/src/tests/memory-reranker.test.ts +135 -124
- package/src/tests/memory.test.ts +13 -12
- package/src/tests/seed-scripts.test.ts +205 -0
- package/src/tests/task-cascade-fail.test.ts +304 -0
- package/templates/workflows/llm-safe-release-context/config.json +13 -0
- package/templates/workflows/llm-safe-release-context/content.md +69 -0
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
accessBoost,
|
|
4
|
+
computeScore,
|
|
5
|
+
recencyDecay,
|
|
6
|
+
rerank,
|
|
7
|
+
sourceQuality,
|
|
8
|
+
usefulness,
|
|
9
|
+
} from "../be/memory/reranker";
|
|
3
10
|
import type { MemoryCandidate } from "../be/memory/types";
|
|
4
11
|
|
|
5
12
|
function makeCandidate(
|
|
@@ -37,21 +44,33 @@ describe("recencyDecay", () => {
|
|
|
37
44
|
expect(decay).toBeCloseTo(1.0, 5);
|
|
38
45
|
});
|
|
39
46
|
|
|
40
|
-
test("
|
|
47
|
+
test("task_completion at half-life (14d) → ~0.5", () => {
|
|
41
48
|
const created = new Date(now.getTime() - 14 * 86400000).toISOString();
|
|
42
|
-
const decay = recencyDecay(created, now);
|
|
49
|
+
const decay = recencyDecay(created, now, "task_completion");
|
|
43
50
|
expect(decay).toBeCloseTo(0.5, 2);
|
|
44
51
|
});
|
|
45
52
|
|
|
46
|
-
test("
|
|
47
|
-
const created = new Date(now.getTime() -
|
|
48
|
-
const decay = recencyDecay(created, now);
|
|
49
|
-
expect(decay).toBeCloseTo(0.
|
|
53
|
+
test("session_summary at 7d → ~0.5 (7d half-life)", () => {
|
|
54
|
+
const created = new Date(now.getTime() - 7 * 86400000).toISOString();
|
|
55
|
+
const decay = recencyDecay(created, now, "session_summary");
|
|
56
|
+
expect(decay).toBeCloseTo(0.5, 2);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("file_index at 180d → ~0.5 (180d half-life)", () => {
|
|
60
|
+
const created = new Date(now.getTime() - 180 * 86400000).toISOString();
|
|
61
|
+
const decay = recencyDecay(created, now, "file_index");
|
|
62
|
+
expect(decay).toBeCloseTo(0.5, 2);
|
|
50
63
|
});
|
|
51
64
|
|
|
52
|
-
test("
|
|
65
|
+
test("manual memory at any age → 1.0 (no decay)", () => {
|
|
53
66
|
const created = new Date(now.getTime() - 365 * 86400000).toISOString();
|
|
54
|
-
const decay = recencyDecay(created, now);
|
|
67
|
+
const decay = recencyDecay(created, now, "manual");
|
|
68
|
+
expect(decay).toBe(1.0);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("very old task_completion (365d) → near 0", () => {
|
|
72
|
+
const created = new Date(now.getTime() - 365 * 86400000).toISOString();
|
|
73
|
+
const decay = recencyDecay(created, now, "task_completion");
|
|
55
74
|
expect(decay).toBeLessThan(0.001);
|
|
56
75
|
});
|
|
57
76
|
|
|
@@ -60,6 +79,12 @@ describe("recencyDecay", () => {
|
|
|
60
79
|
const decay = recencyDecay(created, now);
|
|
61
80
|
expect(decay).toBe(1.0);
|
|
62
81
|
});
|
|
82
|
+
|
|
83
|
+
test("no source provided → falls back to task_completion half-life", () => {
|
|
84
|
+
const created = new Date(now.getTime() - 14 * 86400000).toISOString();
|
|
85
|
+
const decay = recencyDecay(created, now);
|
|
86
|
+
expect(decay).toBeCloseTo(0.5, 2);
|
|
87
|
+
});
|
|
63
88
|
});
|
|
64
89
|
|
|
65
90
|
describe("accessBoost", () => {
|
|
@@ -93,31 +118,71 @@ describe("accessBoost", () => {
|
|
|
93
118
|
});
|
|
94
119
|
});
|
|
95
120
|
|
|
121
|
+
describe("sourceQuality", () => {
|
|
122
|
+
test("manual → 1.5", () => {
|
|
123
|
+
expect(sourceQuality("manual")).toBe(1.5);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("file_index → 1.0", () => {
|
|
127
|
+
expect(sourceQuality("file_index")).toBe(1.0);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("task_completion → 0.7", () => {
|
|
131
|
+
expect(sourceQuality("task_completion")).toBe(0.7);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test("session_summary → 0.5", () => {
|
|
135
|
+
expect(sourceQuality("session_summary")).toBe(0.5);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
96
139
|
describe("computeScore", () => {
|
|
97
140
|
const now = new Date("2026-04-12T12:00:00Z");
|
|
98
141
|
|
|
99
|
-
test("
|
|
142
|
+
test("manual: similarity × 1.0 (no decay) × source(1.5) × boost × usefulness", () => {
|
|
100
143
|
const candidate = makeCandidate({
|
|
101
144
|
similarity: 0.8,
|
|
145
|
+
source: "manual",
|
|
102
146
|
createdAt: now.toISOString(),
|
|
103
147
|
accessedAt: now.toISOString(),
|
|
104
148
|
accessCount: 0,
|
|
105
149
|
});
|
|
106
150
|
const score = computeScore(candidate, now);
|
|
107
|
-
// 0.8 * 1.0 * 1.0 =
|
|
108
|
-
expect(score).toBeCloseTo(
|
|
151
|
+
// 0.8 * 1.0 (no decay for manual) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness) = 1.2
|
|
152
|
+
expect(score).toBeCloseTo(1.2, 5);
|
|
109
153
|
});
|
|
110
154
|
|
|
111
|
-
test("
|
|
155
|
+
test("task_completion at 14d → penalized by decay AND source multiplier", () => {
|
|
112
156
|
const candidate = makeCandidate({
|
|
113
157
|
similarity: 0.8,
|
|
158
|
+
source: "task_completion",
|
|
114
159
|
createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
115
160
|
accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
116
161
|
accessCount: 0,
|
|
117
162
|
});
|
|
118
163
|
const score = computeScore(candidate, now);
|
|
119
|
-
// 0.8 * 0.5 * 1.0 = 0.
|
|
120
|
-
expect(score).toBeCloseTo(0.
|
|
164
|
+
// 0.8 * 0.5 (14d decay) * 1.0 (no boost) * 0.7 (source) * 1.0 (usefulness) = 0.28
|
|
165
|
+
expect(score).toBeCloseTo(0.28, 2);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test("old manual vs fresh task_completion: manual wins on relevance", () => {
|
|
169
|
+
const oldManual = makeCandidate({
|
|
170
|
+
similarity: 0.8,
|
|
171
|
+
source: "manual",
|
|
172
|
+
createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
|
|
173
|
+
accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
|
|
174
|
+
accessCount: 0,
|
|
175
|
+
});
|
|
176
|
+
const freshTC = makeCandidate({
|
|
177
|
+
similarity: 0.05,
|
|
178
|
+
source: "task_completion",
|
|
179
|
+
createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
|
|
180
|
+
accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
|
|
181
|
+
accessCount: 0,
|
|
182
|
+
});
|
|
183
|
+
// This is THE bug we're fixing: with the old flat 14d decay, the old manual
|
|
184
|
+
// memory scored lower than fresh noise. Now manual has no decay.
|
|
185
|
+
expect(computeScore(oldManual, now)).toBeGreaterThan(computeScore(freshTC, now));
|
|
121
186
|
});
|
|
122
187
|
});
|
|
123
188
|
|
|
@@ -166,36 +231,51 @@ describe("rerank", () => {
|
|
|
166
231
|
expect(result[0]!.similarity).toBeGreaterThan(result[1]!.similarity);
|
|
167
232
|
});
|
|
168
233
|
|
|
169
|
-
test("recency boosts newer
|
|
234
|
+
test("recency boosts newer task_completion over older with same raw similarity", () => {
|
|
170
235
|
const candidates = [
|
|
171
236
|
makeCandidate({
|
|
172
237
|
similarity: 0.8,
|
|
173
|
-
|
|
238
|
+
source: "task_completion",
|
|
239
|
+
createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
174
240
|
}),
|
|
175
241
|
makeCandidate({
|
|
176
242
|
similarity: 0.8,
|
|
177
|
-
|
|
243
|
+
source: "task_completion",
|
|
244
|
+
createdAt: now.toISOString(),
|
|
178
245
|
}),
|
|
179
246
|
];
|
|
180
247
|
const result = rerank(candidates, { limit: 2, now });
|
|
181
|
-
// Fresh memory should rank higher due to recency decay
|
|
182
248
|
expect(result[0]!.createdAt).toBe(now.toISOString());
|
|
183
249
|
});
|
|
184
250
|
|
|
185
251
|
test("now parameter enables deterministic testing", () => {
|
|
186
252
|
const candidate = makeCandidate({
|
|
187
253
|
similarity: 0.8,
|
|
254
|
+
source: "task_completion",
|
|
188
255
|
createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
|
|
189
256
|
});
|
|
190
257
|
const result1 = rerank([candidate], { limit: 1, now });
|
|
191
258
|
const result2 = rerank([candidate], { limit: 1, now });
|
|
192
259
|
expect(result1[0]!.similarity).toBe(result2[0]!.similarity);
|
|
193
260
|
});
|
|
261
|
+
|
|
262
|
+
test("preserves rawSimilarity and compositeScore", () => {
|
|
263
|
+
const candidate = makeCandidate({
|
|
264
|
+
similarity: 0.8,
|
|
265
|
+
source: "manual",
|
|
266
|
+
createdAt: now.toISOString(),
|
|
267
|
+
});
|
|
268
|
+
const result = rerank([candidate], { limit: 1, now });
|
|
269
|
+
expect(result[0]!.rawSimilarity).toBe(0.8);
|
|
270
|
+
expect(result[0]!.compositeScore).toBeDefined();
|
|
271
|
+
// For a fresh manual memory: 0.8 * 1.0 (no decay) * 1.0 (no boost) * 1.5 (source) * 1.0 (usefulness)
|
|
272
|
+
expect(result[0]!.compositeScore).toBeCloseTo(1.2, 5);
|
|
273
|
+
// similarity field = compositeScore
|
|
274
|
+
expect(result[0]!.similarity).toBe(result[0]!.compositeScore);
|
|
275
|
+
});
|
|
194
276
|
});
|
|
195
277
|
|
|
196
278
|
describe("usefulness", () => {
|
|
197
|
-
// The default-floor cases assume MEMORY_DEMOTION_FLOOR is unset/empty.
|
|
198
|
-
// The override case sets and restores the env var.
|
|
199
279
|
let originalFloor: string | undefined;
|
|
200
280
|
beforeEach(() => {
|
|
201
281
|
originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
|
|
@@ -224,10 +304,6 @@ describe("usefulness", () => {
|
|
|
224
304
|
});
|
|
225
305
|
|
|
226
306
|
test("Beta(50,1) → 2 * 50/51 ≈ 1.961 (approaches ceiling, never above 2.0)", () => {
|
|
227
|
-
// NB: the clamp `Math.min(2.0, 2 * mean)` is a defensive ceiling — the
|
|
228
|
-
// formula 2 * α/(α+β) is bounded above by 2 for any finite β > 0, so the
|
|
229
|
-
// clamp only fires on degenerate inputs (β = 0). The plan's "===2.0"
|
|
230
|
-
// expectation was a numerical slip; the asymptote is what we ship.
|
|
231
307
|
expect(usefulness(50, 1)).toBeCloseTo((2 * 50) / 51, 10);
|
|
232
308
|
expect(usefulness(50, 1)).toBeLessThan(2.0);
|
|
233
309
|
});
|
|
@@ -242,110 +318,45 @@ describe("usefulness", () => {
|
|
|
242
318
|
});
|
|
243
319
|
});
|
|
244
320
|
|
|
245
|
-
describe("
|
|
246
|
-
|
|
247
|
-
// MEMORY_DEMOTION_FLOOR=1.0, computeScore must return EXACTLY the same value
|
|
248
|
-
// as a pre-rater build (similarity * recencyDecay * accessBoost).
|
|
249
|
-
const now = new Date("2026-04-12T12:00:00Z");
|
|
250
|
-
|
|
251
|
-
let originalFloor: string | undefined;
|
|
252
|
-
beforeEach(() => {
|
|
253
|
-
originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
|
|
254
|
-
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
255
|
-
});
|
|
256
|
-
afterEach(() => {
|
|
257
|
-
if (originalFloor === undefined) {
|
|
258
|
-
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
259
|
-
} else {
|
|
260
|
-
process.env.MEMORY_DEMOTION_FLOOR = originalFloor;
|
|
261
|
-
}
|
|
262
|
-
});
|
|
263
|
-
|
|
264
|
-
test("computeScore equals similarity * recencyDecay * accessBoost (no usefulness drift)", () => {
|
|
265
|
-
const cases: MemoryCandidate[] = [
|
|
266
|
-
makeCandidate({
|
|
267
|
-
similarity: 0.8,
|
|
268
|
-
createdAt: now.toISOString(),
|
|
269
|
-
accessedAt: now.toISOString(),
|
|
270
|
-
accessCount: 0,
|
|
271
|
-
}),
|
|
272
|
-
makeCandidate({
|
|
273
|
-
similarity: 0.5,
|
|
274
|
-
createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
275
|
-
accessedAt: new Date(now.getTime() - 24 * 3600000).toISOString(),
|
|
276
|
-
accessCount: 5,
|
|
277
|
-
}),
|
|
278
|
-
makeCandidate({
|
|
279
|
-
similarity: 0.99,
|
|
280
|
-
createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
|
|
281
|
-
accessedAt: new Date(now.getTime() - 72 * 3600000).toISOString(),
|
|
282
|
-
accessCount: 12,
|
|
283
|
-
}),
|
|
284
|
-
];
|
|
285
|
-
|
|
286
|
-
for (const c of cases) {
|
|
287
|
-
const expected =
|
|
288
|
-
c.similarity *
|
|
289
|
-
recencyDecay(c.createdAt, now) *
|
|
290
|
-
accessBoost(c.accessedAt, c.accessCount, now);
|
|
291
|
-
expect(computeScore(c, now)).toBe(expected);
|
|
292
|
-
}
|
|
293
|
-
});
|
|
321
|
+
describe("source-aware scoring: manual memories survive age penalty", () => {
|
|
322
|
+
const now = new Date("2026-06-08T12:00:00Z");
|
|
294
323
|
|
|
295
|
-
test("
|
|
296
|
-
//
|
|
297
|
-
//
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
accessCount: 0,
|
|
316
|
-
}),
|
|
317
|
-
];
|
|
318
|
-
const result = rerank(candidates, { limit: 3, now });
|
|
324
|
+
test("76-day-old manual memory scores higher than 1-day-old noise task_completion", () => {
|
|
325
|
+
// The root-cause scenario from Taras's report: a 76-day-old manual memory
|
|
326
|
+
// with raw similarity 0.8 was being outscored by a 1-day-old noise result
|
|
327
|
+
// with raw similarity 0.05. The old reranker gave the noise result a HIGHER
|
|
328
|
+
// composite score because the flat 14d half-life crushed the old manual
|
|
329
|
+
// memory by 2^(-76/14) = 0.023. Now manual has no decay.
|
|
330
|
+
const oldManual = makeCandidate({
|
|
331
|
+
similarity: 0.8,
|
|
332
|
+
source: "manual",
|
|
333
|
+
createdAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
|
|
334
|
+
accessedAt: new Date(now.getTime() - 76 * 86400000).toISOString(),
|
|
335
|
+
accessCount: 0,
|
|
336
|
+
});
|
|
337
|
+
const freshNoise = makeCandidate({
|
|
338
|
+
similarity: 0.05,
|
|
339
|
+
source: "task_completion",
|
|
340
|
+
createdAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
|
|
341
|
+
accessedAt: new Date(now.getTime() - 1 * 86400000).toISOString(),
|
|
342
|
+
accessCount: 0,
|
|
343
|
+
});
|
|
319
344
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
// 0.3 * 2^(-2) = 0.075
|
|
324
|
-
expect(result[0]!.similarity).toBeCloseTo(0.9, 10);
|
|
325
|
-
expect(result[1]!.similarity).toBeCloseTo(0.6 * 2 ** -0.5, 10);
|
|
326
|
-
expect(result[2]!.similarity).toBeCloseTo(0.075, 10);
|
|
345
|
+
const ranked = rerank([freshNoise, oldManual], { limit: 2, now });
|
|
346
|
+
expect(ranked[0]!.source).toBe("manual");
|
|
347
|
+
expect(ranked[0]!.rawSimilarity).toBe(0.8);
|
|
327
348
|
});
|
|
328
349
|
|
|
329
|
-
test("
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
accessedAt: now.toISOString(),
|
|
336
|
-
accessCount: 0,
|
|
337
|
-
alpha: 10,
|
|
338
|
-
beta: 1,
|
|
339
|
-
});
|
|
340
|
-
const baseline = makeCandidate({
|
|
341
|
-
similarity: 0.5,
|
|
342
|
-
createdAt: now.toISOString(),
|
|
343
|
-
accessedAt: now.toISOString(),
|
|
350
|
+
test("session_summary decays fast (7d half-life)", () => {
|
|
351
|
+
const oldSummary = makeCandidate({
|
|
352
|
+
similarity: 0.8,
|
|
353
|
+
source: "session_summary",
|
|
354
|
+
createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
355
|
+
accessedAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
344
356
|
accessCount: 0,
|
|
345
357
|
});
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
);
|
|
358
|
+
// At 14d with 7d half-life: decay = 2^(-14/7) = 0.25
|
|
359
|
+
// Score: 0.8 * 0.25 * 0.5 (source) = 0.1
|
|
360
|
+
expect(computeScore(oldSummary, now)).toBeCloseTo(0.1, 2);
|
|
350
361
|
});
|
|
351
362
|
});
|
package/src/tests/memory.test.ts
CHANGED
|
@@ -342,8 +342,9 @@ describe("Memory System", () => {
|
|
|
342
342
|
status: "idle",
|
|
343
343
|
});
|
|
344
344
|
|
|
345
|
-
// Create memories with known embeddings
|
|
346
|
-
//
|
|
345
|
+
// Create memories with known embeddings (all share a baseline component
|
|
346
|
+
// so pairwise cosine similarity stays above the MIN_SIMILARITY floor).
|
|
347
|
+
// Memory 1: agent scope for searchAgentId
|
|
347
348
|
const m1 = store.store({
|
|
348
349
|
agentId: searchAgentId,
|
|
349
350
|
scope: "agent",
|
|
@@ -351,9 +352,9 @@ describe("Memory System", () => {
|
|
|
351
352
|
content: "Agent-scoped content",
|
|
352
353
|
source: "manual",
|
|
353
354
|
});
|
|
354
|
-
store.updateEmbedding(m1.id, new Float32Array([1, 0, 0]), "test-model");
|
|
355
|
+
store.updateEmbedding(m1.id, new Float32Array([1, 0.3, 0.3]), "test-model");
|
|
355
356
|
|
|
356
|
-
// Memory 2: swarm scope
|
|
357
|
+
// Memory 2: swarm scope
|
|
357
358
|
const m2 = store.store({
|
|
358
359
|
agentId: searchAgentId,
|
|
359
360
|
scope: "swarm",
|
|
@@ -361,9 +362,9 @@ describe("Memory System", () => {
|
|
|
361
362
|
content: "Swarm-scoped content",
|
|
362
363
|
source: "file_index",
|
|
363
364
|
});
|
|
364
|
-
store.updateEmbedding(m2.id, new Float32Array([0, 1, 0]), "test-model");
|
|
365
|
+
store.updateEmbedding(m2.id, new Float32Array([0.3, 1, 0.3]), "test-model");
|
|
365
366
|
|
|
366
|
-
// Memory 3: agent scope for OTHER agent
|
|
367
|
+
// Memory 3: agent scope for OTHER agent
|
|
367
368
|
const m3 = store.store({
|
|
368
369
|
agentId: searchAgentId2,
|
|
369
370
|
scope: "agent",
|
|
@@ -371,11 +372,11 @@ describe("Memory System", () => {
|
|
|
371
372
|
content: "Other agent's private memory",
|
|
372
373
|
source: "manual",
|
|
373
374
|
});
|
|
374
|
-
store.updateEmbedding(m3.id, new Float32Array([0, 0, 1]), "test-model");
|
|
375
|
+
store.updateEmbedding(m3.id, new Float32Array([0.3, 0.3, 1]), "test-model");
|
|
375
376
|
});
|
|
376
377
|
|
|
377
378
|
test("worker sees own agent-scoped + swarm memories", () => {
|
|
378
|
-
const query = new Float32Array([1, 0, 0]); // closest to Memory 1
|
|
379
|
+
const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1
|
|
379
380
|
const results = store.search(query, searchAgentId, { isLead: false });
|
|
380
381
|
const names = results.map((r) => r.name);
|
|
381
382
|
|
|
@@ -385,7 +386,7 @@ describe("Memory System", () => {
|
|
|
385
386
|
});
|
|
386
387
|
|
|
387
388
|
test("worker does not see other agent's agent-scoped memories", () => {
|
|
388
|
-
const query = new Float32Array([0, 0, 1]); // closest to Memory 3
|
|
389
|
+
const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
|
|
389
390
|
const results = store.search(query, searchAgentId, { isLead: false });
|
|
390
391
|
const names = results.map((r) => r.name);
|
|
391
392
|
|
|
@@ -393,7 +394,7 @@ describe("Memory System", () => {
|
|
|
393
394
|
});
|
|
394
395
|
|
|
395
396
|
test("lead sees ALL memories across agents", () => {
|
|
396
|
-
const query = new Float32Array([0, 0, 1]); // closest to Memory 3
|
|
397
|
+
const query = new Float32Array([0.3, 0.3, 1]); // closest to Memory 3
|
|
397
398
|
const results = store.search(query, searchAgentId, { isLead: true });
|
|
398
399
|
const names = results.map((r) => r.name);
|
|
399
400
|
|
|
@@ -403,12 +404,12 @@ describe("Memory System", () => {
|
|
|
403
404
|
});
|
|
404
405
|
|
|
405
406
|
test("results sorted by similarity (highest first)", () => {
|
|
406
|
-
const query = new Float32Array([1, 0, 0]); //
|
|
407
|
+
const query = new Float32Array([1, 0.3, 0.3]); // closest to Memory 1's embedding
|
|
407
408
|
const results = store.search(query, searchAgentId, { isLead: true });
|
|
408
409
|
|
|
409
410
|
expect(results.length).toBeGreaterThan(0);
|
|
410
411
|
expect(results[0].name).toBe("Agent Memory 1");
|
|
411
|
-
expect(results[0].similarity).
|
|
412
|
+
expect(results[0].similarity).toBeGreaterThan(0.9);
|
|
412
413
|
|
|
413
414
|
// Each subsequent result should have lower or equal similarity
|
|
414
415
|
for (let i = 1; i < results.length; i++) {
|
|
@@ -208,6 +208,211 @@ describe("seed-scripts catalog", () => {
|
|
|
208
208
|
).toBeGreaterThan(0.99);
|
|
209
209
|
});
|
|
210
210
|
|
|
211
|
+
test("compound-insights reports script usage and cost honesty rails", async () => {
|
|
212
|
+
const queries: string[] = [];
|
|
213
|
+
const ctx = {
|
|
214
|
+
swarm: {
|
|
215
|
+
async db_query({ sql }: { sql: string }) {
|
|
216
|
+
queries.push(sql);
|
|
217
|
+
if (sql.includes("FROM script_runs sr")) {
|
|
218
|
+
return {
|
|
219
|
+
columns: ["scriptName", "kind", "status", "startedAt", "finishedAt", "durationMs"],
|
|
220
|
+
rows: [
|
|
221
|
+
[
|
|
222
|
+
"compound-insights",
|
|
223
|
+
"inline",
|
|
224
|
+
"completed",
|
|
225
|
+
"2026-06-08T00:00:00.000Z",
|
|
226
|
+
"2026-06-08T00:00:01.000Z",
|
|
227
|
+
1000,
|
|
228
|
+
],
|
|
229
|
+
[
|
|
230
|
+
"daily-dashboard",
|
|
231
|
+
"workflow",
|
|
232
|
+
"failed",
|
|
233
|
+
"2026-06-08T01:00:00.000Z",
|
|
234
|
+
"2026-06-08T01:00:03.000Z",
|
|
235
|
+
3000,
|
|
236
|
+
],
|
|
237
|
+
],
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
if (sql.includes("FROM scripts") && sql.includes("GROUP BY scope, isScratch")) {
|
|
241
|
+
return {
|
|
242
|
+
columns: ["scope", "isScratch", "count"],
|
|
243
|
+
rows: [
|
|
244
|
+
["global", 0, 2],
|
|
245
|
+
["agent", 1, 1],
|
|
246
|
+
],
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
if (sql.includes("FROM script_versions sv")) {
|
|
250
|
+
return {
|
|
251
|
+
columns: ["scope", "count"],
|
|
252
|
+
rows: [["global", 3]],
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
if (sql.includes("FROM session_logs") && sql.includes("%script-run%")) {
|
|
256
|
+
return {
|
|
257
|
+
columns: ["tool", "calls"],
|
|
258
|
+
rows: [["mcp__agent_swarm__script-run", 5]],
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
if (sql.includes("FROM session_costs sc")) {
|
|
262
|
+
return {
|
|
263
|
+
columns: [
|
|
264
|
+
"taskId",
|
|
265
|
+
"agentId",
|
|
266
|
+
"agentName",
|
|
267
|
+
"provider",
|
|
268
|
+
"totalCostUsd",
|
|
269
|
+
"inputTokens",
|
|
270
|
+
"outputTokens",
|
|
271
|
+
"cacheReadTokens",
|
|
272
|
+
"cacheWriteTokens",
|
|
273
|
+
"reasoningOutputTokens",
|
|
274
|
+
"thinkingTokens",
|
|
275
|
+
"numTurns",
|
|
276
|
+
"model",
|
|
277
|
+
"costSource",
|
|
278
|
+
],
|
|
279
|
+
rows: [
|
|
280
|
+
[
|
|
281
|
+
"task-a",
|
|
282
|
+
"agent-a",
|
|
283
|
+
"Picateclas",
|
|
284
|
+
"codex",
|
|
285
|
+
0.3,
|
|
286
|
+
100,
|
|
287
|
+
20,
|
|
288
|
+
10,
|
|
289
|
+
null,
|
|
290
|
+
3,
|
|
291
|
+
4,
|
|
292
|
+
null,
|
|
293
|
+
"gpt-5.5",
|
|
294
|
+
"harness",
|
|
295
|
+
],
|
|
296
|
+
[
|
|
297
|
+
"task-b",
|
|
298
|
+
"agent-a",
|
|
299
|
+
"Picateclas",
|
|
300
|
+
"codex",
|
|
301
|
+
0.5,
|
|
302
|
+
200,
|
|
303
|
+
40,
|
|
304
|
+
20,
|
|
305
|
+
2,
|
|
306
|
+
0,
|
|
307
|
+
0,
|
|
308
|
+
2,
|
|
309
|
+
"gpt-5.5",
|
|
310
|
+
"pricing-table",
|
|
311
|
+
],
|
|
312
|
+
[
|
|
313
|
+
"task-c",
|
|
314
|
+
"agent-b",
|
|
315
|
+
"Worker",
|
|
316
|
+
"claude",
|
|
317
|
+
9.9,
|
|
318
|
+
300,
|
|
319
|
+
60,
|
|
320
|
+
30,
|
|
321
|
+
3,
|
|
322
|
+
0,
|
|
323
|
+
0,
|
|
324
|
+
3,
|
|
325
|
+
"unknown",
|
|
326
|
+
"unpriced",
|
|
327
|
+
],
|
|
328
|
+
[
|
|
329
|
+
null,
|
|
330
|
+
"agent-a",
|
|
331
|
+
"Picateclas",
|
|
332
|
+
"codex",
|
|
333
|
+
0.2,
|
|
334
|
+
50,
|
|
335
|
+
10,
|
|
336
|
+
5,
|
|
337
|
+
null,
|
|
338
|
+
1,
|
|
339
|
+
1,
|
|
340
|
+
null,
|
|
341
|
+
"gpt-5.5",
|
|
342
|
+
"harness",
|
|
343
|
+
],
|
|
344
|
+
],
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
return { columns: [], rows: [] };
|
|
348
|
+
},
|
|
349
|
+
},
|
|
350
|
+
};
|
|
351
|
+
|
|
352
|
+
const result = await compoundInsights(
|
|
353
|
+
{
|
|
354
|
+
days: 7,
|
|
355
|
+
includeToolUsage: false,
|
|
356
|
+
includeScheduleHealth: false,
|
|
357
|
+
includeMemoryHealth: false,
|
|
358
|
+
includeScriptCandidates: false,
|
|
359
|
+
includeByAgent: false,
|
|
360
|
+
publishPage: false,
|
|
361
|
+
},
|
|
362
|
+
ctx,
|
|
363
|
+
);
|
|
364
|
+
|
|
365
|
+
expect(queries.some((sql) => sql.includes("FROM script_runs sr"))).toBe(true);
|
|
366
|
+
expect(queries.some((sql) => sql.includes("FROM session_costs sc"))).toBe(true);
|
|
367
|
+
expect(result.scriptUsage.runs).toMatchObject({
|
|
368
|
+
total: 2,
|
|
369
|
+
inline: 1,
|
|
370
|
+
workflow: 1,
|
|
371
|
+
completed: 1,
|
|
372
|
+
failed: 1,
|
|
373
|
+
successRate: 50,
|
|
374
|
+
durationP50Ms: 1000,
|
|
375
|
+
durationP95Ms: 3000,
|
|
376
|
+
});
|
|
377
|
+
expect(result.scriptUsage.creations).toMatchObject({
|
|
378
|
+
totalNonScratch: 2,
|
|
379
|
+
scratch: 1,
|
|
380
|
+
byScope: { global: 2 },
|
|
381
|
+
});
|
|
382
|
+
expect(result.scriptUsage.edits).toMatchObject({
|
|
383
|
+
total: 3,
|
|
384
|
+
byScope: { global: 3 },
|
|
385
|
+
});
|
|
386
|
+
expect(result.scriptUsage.mcpToolCalls).toEqual([
|
|
387
|
+
{ tool: "mcp__agent_swarm__script-run", calls: 5 },
|
|
388
|
+
]);
|
|
389
|
+
expect(result.costAndTokens).toMatchObject({
|
|
390
|
+
rows: 4,
|
|
391
|
+
taskCountForHeadlineAvg: 2,
|
|
392
|
+
avgCostPerTaskUsd: 0.4,
|
|
393
|
+
totalSpendUsd: 10.9,
|
|
394
|
+
trustedSpendUsd: 1,
|
|
395
|
+
trustedRows: 3,
|
|
396
|
+
trustedRowPercent: 75,
|
|
397
|
+
unpricedRows: 1,
|
|
398
|
+
unpricedSpendUsd: 9.9,
|
|
399
|
+
nonTaskSessionRows: 1,
|
|
400
|
+
nonTaskSessionSpendUsd: 0.2,
|
|
401
|
+
unknownCounts: {
|
|
402
|
+
cacheWriteTokens: 2,
|
|
403
|
+
numTurns: 2,
|
|
404
|
+
},
|
|
405
|
+
});
|
|
406
|
+
expect(result.costAndTokens.tokenTotals).toMatchObject({
|
|
407
|
+
inputTokens: 650,
|
|
408
|
+
outputTokens: 130,
|
|
409
|
+
cacheReadTokens: 65,
|
|
410
|
+
cacheWriteTokens: 5,
|
|
411
|
+
reasoningOutputTokens: 4,
|
|
412
|
+
thinkingTokens: 5,
|
|
413
|
+
});
|
|
414
|
+
});
|
|
415
|
+
|
|
211
416
|
test("ops-catalog-audit clusters schedule, workflow, and prompt findings by goal", async () => {
|
|
212
417
|
const queries: string[] = [];
|
|
213
418
|
const result = await opsCatalogAudit(
|