@desplega.ai/agent-swarm 1.74.4 → 1.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +1264 -46
- package/package.json +2 -2
- package/src/be/db.ts +563 -9
- package/src/be/memory/edges-store.ts +69 -0
- package/src/be/memory/providers/sqlite-store.ts +4 -0
- package/src/be/memory/raters/explicit-self.ts +22 -0
- package/src/be/memory/raters/implicit-citation.ts +44 -0
- package/src/be/memory/raters/llm-client.ts +172 -0
- package/src/be/memory/raters/llm-summarizer.ts +218 -0
- package/src/be/memory/raters/llm.ts +375 -0
- package/src/be/memory/raters/noop.ts +14 -0
- package/src/be/memory/raters/registry.ts +86 -0
- package/src/be/memory/raters/retrieval.ts +88 -0
- package/src/be/memory/raters/run-server-raters.ts +97 -0
- package/src/be/memory/raters/store.ts +228 -0
- package/src/be/memory/raters/types.ts +101 -0
- package/src/be/memory/reranker.ts +32 -2
- package/src/be/memory/retrieval-store.ts +116 -0
- package/src/be/memory/types.ts +3 -0
- package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
- package/src/be/migrations/052_memory_edges.sql +36 -0
- package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
- package/src/be/migrations/054_agent_harness_provider.sql +21 -0
- package/src/be/migrations/055_agent_cred_status.sql +15 -0
- package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
- package/src/be/migrations/057_inbox_item_state.sql +27 -0
- package/src/be/migrations/058_task_templates.sql +31 -0
- package/src/be/swarm-config-guard.ts +24 -0
- package/src/commands/credential-wait.ts +186 -0
- package/src/commands/provider-credentials.ts +434 -0
- package/src/commands/runner.ts +253 -21
- package/src/hooks/hook.ts +143 -66
- package/src/http/agents.ts +191 -1
- package/src/http/config.ts +11 -1
- package/src/http/core.ts +5 -0
- package/src/http/inbox-state.ts +89 -0
- package/src/http/index.ts +10 -0
- package/src/http/memory.ts +230 -1
- package/src/http/sessions.ts +86 -0
- package/src/http/status.ts +665 -0
- package/src/http/task-templates.ts +51 -0
- package/src/http/tasks.ts +85 -5
- package/src/http/users.ts +134 -0
- package/src/prompts/memories.ts +62 -0
- package/src/providers/claude-adapter.ts +22 -0
- package/src/providers/claude-managed-adapter.ts +24 -0
- package/src/providers/codex-adapter.ts +43 -1
- package/src/providers/devin-adapter.ts +18 -0
- package/src/providers/index.ts +7 -0
- package/src/providers/opencode-adapter.ts +60 -0
- package/src/providers/pi-mono-adapter.ts +71 -0
- package/src/providers/types.ts +34 -0
- package/src/server.ts +2 -0
- package/src/slack/handlers.ts +0 -1
- package/src/tests/agents-harness-provider.test.ts +333 -0
- package/src/tests/credential-check.test.ts +367 -0
- package/src/tests/credential-status-api.test.ts +223 -0
- package/src/tests/credential-status-routing.test.ts +150 -0
- package/src/tests/credential-wait.test.ts +282 -0
- package/src/tests/harness-provider-resolution.test.ts +242 -0
- package/src/tests/jira-sync.test.ts +1 -1
- package/src/tests/memory-edges.test.ts +722 -0
- package/src/tests/memory-rate-endpoint.test.ts +330 -0
- package/src/tests/memory-rate-tool.test.ts +252 -0
- package/src/tests/memory-rater-e2e.test.ts +578 -0
- package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
- package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
- package/src/tests/memory-rater-llm.test.ts +964 -0
- package/src/tests/memory-rater-store.test.ts +249 -0
- package/src/tests/memory-reranker.test.ts +161 -2
- package/src/tests/migration-runner-regressions.test.ts +17 -2
- package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
- package/src/tests/run-server-raters.test.ts +291 -0
- package/src/tests/sessions.test.ts +141 -0
- package/src/tests/status.test.ts +843 -0
- package/src/tests/stop-hook-task-resolution.test.ts +98 -0
- package/src/tests/template-recommendations.test.ts +148 -0
- package/src/tests/tool-annotations.test.ts +2 -2
- package/src/tests/use-dismissible-card.test.ts +140 -0
- package/src/tools/memory-rate.ts +166 -0
- package/src/tools/memory-search.ts +18 -0
- package/src/tools/store-progress.ts +37 -0
- package/src/tools/swarm-config/set-config.ts +17 -1
- package/src/tools/tool-config.ts +1 -0
- package/src/types.ts +122 -1
- package/src/utils/harness-provider.ts +32 -0
- package/tsconfig.json +0 -2
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTTP integration tests for the memory-rater v1.5 worker-facing endpoints:
|
|
3
|
+
* POST /api/memory/rate
|
|
4
|
+
* GET /api/memory/retrievals
|
|
5
|
+
*
|
|
6
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-3.md
|
|
7
|
+
*
|
|
8
|
+
* Spawns the API server against an isolated SQLite file, then opens the same
|
|
9
|
+
* file from the test process for state setup/verification (WAL mode allows
|
|
10
|
+
* concurrent readers + writers).
|
|
11
|
+
*/
|
|
12
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
|
|
13
|
+
import { randomUUID } from "node:crypto";
|
|
14
|
+
import { unlink } from "node:fs/promises";
|
|
15
|
+
import type { Subprocess } from "bun";
|
|
16
|
+
import { closeDb, createAgent, getDb, initDb } from "../be/db";
|
|
17
|
+
import { SqliteMemoryStore } from "../be/memory/providers/sqlite-store";
|
|
18
|
+
|
|
19
|
+
const TEST_PORT = 19111;
|
|
20
|
+
const TEST_DB_PATH = `/tmp/test-memory-rate-${Date.now()}.sqlite`;
|
|
21
|
+
const BASE = `http://localhost:${TEST_PORT}`;
|
|
22
|
+
const API_KEY = "test-key";
|
|
23
|
+
|
|
24
|
+
let serverProc: Subprocess;
|
|
25
|
+
const agentA = randomUUID();
|
|
26
|
+
const agentB = randomUUID();
|
|
27
|
+
const taskA = randomUUID();
|
|
28
|
+
const taskB = randomUUID();
|
|
29
|
+
let store: SqliteMemoryStore;
|
|
30
|
+
|
|
31
|
+
// preload.ts builds an in-memory migrated DB template that initDb's fast path
|
|
32
|
+
// restores from — fine for single-process tests, but here we need the test
|
|
33
|
+
// process to share a real file-backed DB with the spawned API server. Hide the
|
|
34
|
+
// template across the suite and restore it in afterAll for downstream suites.
|
|
35
|
+
const testTemplateGlobals = globalThis as typeof globalThis & {
|
|
36
|
+
__testMigrationTemplate?: Uint8Array;
|
|
37
|
+
__savedRateTemplate?: Uint8Array;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
async function api(
|
|
41
|
+
method: string,
|
|
42
|
+
path: string,
|
|
43
|
+
opts: { body?: unknown; agentId?: string } = {},
|
|
44
|
+
// biome-ignore lint/suspicious/noExplicitAny: test helper
|
|
45
|
+
): Promise<{ status: number; body: any }> {
|
|
46
|
+
const headers: Record<string, string> = {
|
|
47
|
+
"Content-Type": "application/json",
|
|
48
|
+
Authorization: `Bearer ${API_KEY}`,
|
|
49
|
+
};
|
|
50
|
+
if (opts.agentId) headers["x-agent-id"] = opts.agentId;
|
|
51
|
+
const res = await fetch(`${BASE}${path}`, {
|
|
52
|
+
method,
|
|
53
|
+
headers,
|
|
54
|
+
body: opts.body !== undefined ? JSON.stringify(opts.body) : undefined,
|
|
55
|
+
});
|
|
56
|
+
const text = await res.text();
|
|
57
|
+
// biome-ignore lint/suspicious/noExplicitAny: body may be JSON or text
|
|
58
|
+
let body: any;
|
|
59
|
+
try {
|
|
60
|
+
body = JSON.parse(text);
|
|
61
|
+
} catch {
|
|
62
|
+
body = text;
|
|
63
|
+
}
|
|
64
|
+
return { status: res.status, body };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function waitForServer(url: string, timeoutMs = 15000): Promise<void> {
|
|
68
|
+
const start = Date.now();
|
|
69
|
+
while (Date.now() - start < timeoutMs) {
|
|
70
|
+
try {
|
|
71
|
+
const r = await fetch(url);
|
|
72
|
+
if (r.ok) return;
|
|
73
|
+
} catch {
|
|
74
|
+
// not ready
|
|
75
|
+
}
|
|
76
|
+
await Bun.sleep(50);
|
|
77
|
+
}
|
|
78
|
+
throw new Error(`Server did not start within ${timeoutMs}ms`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function makeMemory(name: string, agentId = agentA): { id: string } {
|
|
82
|
+
return store.store({
|
|
83
|
+
agentId,
|
|
84
|
+
scope: "agent",
|
|
85
|
+
name,
|
|
86
|
+
content: `${name} content`,
|
|
87
|
+
source: "manual",
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function insertRetrieval(taskId: string, agentId: string, memoryId: string): void {
|
|
92
|
+
getDb()
|
|
93
|
+
.prepare(
|
|
94
|
+
`INSERT INTO memory_retrieval (id, taskId, agentId, sessionId, memoryId, similarity, retrievedAt)
|
|
95
|
+
VALUES (?, ?, ?, NULL, ?, 0.85, ?)`,
|
|
96
|
+
)
|
|
97
|
+
.run(randomUUID(), taskId, agentId, memoryId, new Date().toISOString());
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function readPosterior(id: string): { alpha: number; beta: number } {
|
|
101
|
+
const row = getDb()
|
|
102
|
+
.prepare<{ alpha: number; beta: number }, [string]>(
|
|
103
|
+
"SELECT alpha, beta FROM agent_memory WHERE id = ?",
|
|
104
|
+
)
|
|
105
|
+
.get(id);
|
|
106
|
+
if (!row) throw new Error(`memory ${id} not found`);
|
|
107
|
+
return { alpha: row.alpha, beta: row.beta };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
beforeAll(async () => {
|
|
111
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
112
|
+
try {
|
|
113
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
114
|
+
} catch {}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
serverProc = Bun.spawn(["bun", "src/http.ts"], {
|
|
118
|
+
cwd: `${import.meta.dir}/../..`,
|
|
119
|
+
env: {
|
|
120
|
+
...process.env,
|
|
121
|
+
PORT: String(TEST_PORT),
|
|
122
|
+
DATABASE_PATH: TEST_DB_PATH,
|
|
123
|
+
API_KEY,
|
|
124
|
+
CAPABILITIES: "core",
|
|
125
|
+
SLACK_BOT_TOKEN: "",
|
|
126
|
+
LINEAR_DISABLE: "true",
|
|
127
|
+
JIRA_DISABLE: "true",
|
|
128
|
+
GITHUB_DISABLE: "true",
|
|
129
|
+
SLACK_DISABLE: "true",
|
|
130
|
+
HEARTBEAT_DISABLE: "true",
|
|
131
|
+
OAUTH_KEEPALIVE_DISABLE: "true",
|
|
132
|
+
ANONYMIZED_TELEMETRY: "false",
|
|
133
|
+
},
|
|
134
|
+
stdout: "ignore",
|
|
135
|
+
stderr: "ignore",
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
await waitForServer(`${BASE}/health`);
|
|
139
|
+
|
|
140
|
+
// Hide preload.ts's in-memory template so initDb opens the real file (the
|
|
141
|
+
// server already migrated it), giving us cross-process WAL visibility.
|
|
142
|
+
testTemplateGlobals.__savedRateTemplate = testTemplateGlobals.__testMigrationTemplate;
|
|
143
|
+
testTemplateGlobals.__testMigrationTemplate = undefined;
|
|
144
|
+
// Close any leftover in-memory DB from a prior test in the same Bun worker.
|
|
145
|
+
// initDb is a no-op when `db` is already set, so without this the test
|
|
146
|
+
// process can keep writing to the previous template-restored DB while the
|
|
147
|
+
// spawned server reads from TEST_DB_PATH — defensive even if today's CI
|
|
148
|
+
// ordering happens to leave `db` null here.
|
|
149
|
+
closeDb();
|
|
150
|
+
initDb(TEST_DB_PATH);
|
|
151
|
+
createAgent({ id: agentA, name: "Agent A", isLead: false, status: "idle" });
|
|
152
|
+
createAgent({ id: agentB, name: "Agent B", isLead: false, status: "idle" });
|
|
153
|
+
|
|
154
|
+
const insertTask = getDb().prepare(
|
|
155
|
+
`INSERT INTO agent_tasks (id, agentId, task, status, source, createdAt, lastUpdatedAt)
|
|
156
|
+
VALUES (?, ?, ?, 'in_progress', 'mcp', ?, ?)`,
|
|
157
|
+
);
|
|
158
|
+
const now = new Date().toISOString();
|
|
159
|
+
insertTask.run(taskA, agentA, "task A", now, now);
|
|
160
|
+
insertTask.run(taskB, agentA, "task B", now, now);
|
|
161
|
+
|
|
162
|
+
store = new SqliteMemoryStore();
|
|
163
|
+
}, 20000);
|
|
164
|
+
|
|
165
|
+
afterAll(async () => {
|
|
166
|
+
closeDb();
|
|
167
|
+
// Restore preload.ts's template for any subsequent suites.
|
|
168
|
+
testTemplateGlobals.__testMigrationTemplate = testTemplateGlobals.__savedRateTemplate;
|
|
169
|
+
testTemplateGlobals.__savedRateTemplate = undefined;
|
|
170
|
+
if (serverProc) {
|
|
171
|
+
serverProc.kill();
|
|
172
|
+
try {
|
|
173
|
+
await serverProc.exited;
|
|
174
|
+
} catch {}
|
|
175
|
+
}
|
|
176
|
+
await Bun.sleep(50);
|
|
177
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
178
|
+
try {
|
|
179
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
180
|
+
} catch {}
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
beforeEach(() => {
|
|
185
|
+
// Reset per-test mutable state. agent_memory rows persist across tests;
|
|
186
|
+
// alpha/beta are reset so each test starts from the Beta(1,1) prior.
|
|
187
|
+
getDb().run("DELETE FROM memory_rating");
|
|
188
|
+
getDb().run("DELETE FROM memory_retrieval");
|
|
189
|
+
getDb().run("UPDATE agent_memory SET alpha = 1.0, beta = 1.0");
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
describe("POST /api/memory/rate", () => {
|
|
193
|
+
test("happy path: source=llm with valid memoryId → 200, applied=1, alpha bumped", async () => {
|
|
194
|
+
const m = makeMemory("rate-llm-1");
|
|
195
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
196
|
+
agentId: agentA,
|
|
197
|
+
body: {
|
|
198
|
+
events: [{ memoryId: m.id, signal: 1, weight: 1, source: "llm", taskId: taskA }],
|
|
199
|
+
},
|
|
200
|
+
});
|
|
201
|
+
expect(r.status).toBe(200);
|
|
202
|
+
expect(r.body.applied).toBe(1);
|
|
203
|
+
expect(r.body.rejected).toEqual([]);
|
|
204
|
+
expect(readPosterior(m.id).alpha).toBeCloseTo(2, 5);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
test("source=explicit-self with no retrieval row → 400 (R6 spam guard)", async () => {
|
|
208
|
+
const m = makeMemory("explicit-no-retr");
|
|
209
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
210
|
+
agentId: agentA,
|
|
211
|
+
body: {
|
|
212
|
+
events: [{ memoryId: m.id, signal: 1, weight: 1, source: "explicit-self", taskId: taskA }],
|
|
213
|
+
},
|
|
214
|
+
});
|
|
215
|
+
expect(r.status).toBe(400);
|
|
216
|
+
expect(String(r.body.error)).toMatch(/not present in memory_retrieval/);
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test("source=explicit-self without taskId → 400", async () => {
|
|
220
|
+
const m = makeMemory("explicit-no-task");
|
|
221
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
222
|
+
agentId: agentA,
|
|
223
|
+
body: {
|
|
224
|
+
events: [{ memoryId: m.id, signal: 1, weight: 1, source: "explicit-self" }],
|
|
225
|
+
},
|
|
226
|
+
});
|
|
227
|
+
expect(r.status).toBe(400);
|
|
228
|
+
expect(String(r.body.error)).toMatch(/requires taskId/);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
test("source=explicit-self with retrieval row → 200, applied=1", async () => {
|
|
232
|
+
const m = makeMemory("explicit-ok");
|
|
233
|
+
insertRetrieval(taskA, agentA, m.id);
|
|
234
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
235
|
+
agentId: agentA,
|
|
236
|
+
body: {
|
|
237
|
+
events: [{ memoryId: m.id, signal: 1, weight: 1, source: "explicit-self", taskId: taskA }],
|
|
238
|
+
},
|
|
239
|
+
});
|
|
240
|
+
expect(r.status).toBe(200);
|
|
241
|
+
expect(r.body.applied).toBe(1);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("duplicate explicit-self for same (taskId, memoryId) → 409", async () => {
|
|
245
|
+
const m = makeMemory("explicit-dup");
|
|
246
|
+
insertRetrieval(taskA, agentA, m.id);
|
|
247
|
+
const evt = {
|
|
248
|
+
memoryId: m.id,
|
|
249
|
+
signal: 1,
|
|
250
|
+
weight: 1,
|
|
251
|
+
source: "explicit-self",
|
|
252
|
+
taskId: taskA,
|
|
253
|
+
};
|
|
254
|
+
const first = await api("POST", "/api/memory/rate", {
|
|
255
|
+
agentId: agentA,
|
|
256
|
+
body: { events: [evt] },
|
|
257
|
+
});
|
|
258
|
+
expect(first.status).toBe(200);
|
|
259
|
+
const second = await api("POST", "/api/memory/rate", {
|
|
260
|
+
agentId: agentA,
|
|
261
|
+
body: { events: [evt] },
|
|
262
|
+
});
|
|
263
|
+
expect(second.status).toBe(409);
|
|
264
|
+
expect(String(second.body.error)).toMatch(/Duplicate explicit-self/);
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
test("51 events → 400 (cap enforced)", async () => {
|
|
268
|
+
const m = makeMemory("cap");
|
|
269
|
+
const events = Array.from({ length: 51 }, () => ({
|
|
270
|
+
memoryId: m.id,
|
|
271
|
+
signal: 1,
|
|
272
|
+
weight: 0.01,
|
|
273
|
+
source: "llm",
|
|
274
|
+
}));
|
|
275
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
276
|
+
agentId: agentA,
|
|
277
|
+
body: { events },
|
|
278
|
+
});
|
|
279
|
+
expect(r.status).toBe(400);
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
test("source=implicit-citation rejected at HTTP boundary → 400", async () => {
|
|
283
|
+
const m = makeMemory("impl-cit-spoof");
|
|
284
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
285
|
+
agentId: agentA,
|
|
286
|
+
body: {
|
|
287
|
+
events: [{ memoryId: m.id, signal: 1, weight: 1, source: "implicit-citation" }],
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
expect(r.status).toBe(400);
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
test("missing X-Agent-ID → 400", async () => {
|
|
294
|
+
const m = makeMemory("no-agent");
|
|
295
|
+
const r = await api("POST", "/api/memory/rate", {
|
|
296
|
+
body: { events: [{ memoryId: m.id, signal: 1, weight: 1, source: "llm" }] },
|
|
297
|
+
});
|
|
298
|
+
expect(r.status).toBe(400);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
describe("GET /api/memory/retrievals", () => {
|
|
303
|
+
test("requires taskId or sessionId → 400", async () => {
|
|
304
|
+
const r = await api("GET", "/api/memory/retrievals", { agentId: agentA });
|
|
305
|
+
expect(r.status).toBe(400);
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
test("returns rows for the requesting agent only (defence-in-depth)", async () => {
|
|
309
|
+
const m1 = makeMemory("retr-1");
|
|
310
|
+
const m2 = makeMemory("retr-2");
|
|
311
|
+
const mOther = makeMemory("retr-other");
|
|
312
|
+
insertRetrieval(taskA, agentA, m1.id);
|
|
313
|
+
insertRetrieval(taskA, agentA, m2.id);
|
|
314
|
+
insertRetrieval(taskA, agentB, mOther.id); // wrong agent — must NOT leak
|
|
315
|
+
|
|
316
|
+
const r = await api("GET", `/api/memory/retrievals?taskId=${taskA}`, {
|
|
317
|
+
agentId: agentA,
|
|
318
|
+
});
|
|
319
|
+
expect(r.status).toBe(200);
|
|
320
|
+
expect(Array.isArray(r.body.results)).toBe(true);
|
|
321
|
+
expect(r.body.results).toHaveLength(2);
|
|
322
|
+
const ids = (r.body.results as { id: string }[]).map((x) => x.id).sort();
|
|
323
|
+
expect(ids).toEqual([m1.id, m2.id].sort());
|
|
324
|
+
// Content snippet capped at 500 chars
|
|
325
|
+
for (const row of r.body.results as { content: string; name: string }[]) {
|
|
326
|
+
expect(row.content.length).toBeLessThanOrEqual(500);
|
|
327
|
+
expect(row.name).toMatch(/^retr-/);
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
});
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit + integration tests for the `memory_rate` MCP tool and the
|
|
3
|
+
* conditional system-prompt addendum from `src/prompts/memories.ts`.
|
|
4
|
+
*
|
|
5
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-5.md §7
|
|
6
|
+
*
|
|
7
|
+
* Strategy:
|
|
8
|
+
* - Tool tests stub `globalThis.fetch` to assert the request payload and
|
|
9
|
+
* simulate the 200/400/409 responses the step-3 endpoint emits. No
|
|
10
|
+
* network or server boot needed.
|
|
11
|
+
* - Prompt tests flip `MEMORY_RATERS` and assert the addendum is gated
|
|
12
|
+
* on `explicit-self` being present.
|
|
13
|
+
* - MCP handshake test registers the tool against a fresh `McpServer` and
|
|
14
|
+
* pulls the entry out of the SDK's registry to confirm the wiring.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
18
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
19
|
+
import { renderMemoriesPrompt } from "../prompts/memories";
|
|
20
|
+
import { registerMemoryRateTool } from "../tools/memory-rate";
|
|
21
|
+
|
|
22
|
+
type FetchInit = Parameters<typeof fetch>[1];
|
|
23
|
+
type CallRecord = { url: string; init: FetchInit };
|
|
24
|
+
|
|
25
|
+
const originalFetch = globalThis.fetch;
|
|
26
|
+
|
|
27
|
+
function installFetchStub(
|
|
28
|
+
responder: (url: string, init: FetchInit) => Response | Promise<Response>,
|
|
29
|
+
): { calls: CallRecord[] } {
|
|
30
|
+
const calls: CallRecord[] = [];
|
|
31
|
+
globalThis.fetch = (async (input: Parameters<typeof fetch>[0], init?: FetchInit) => {
|
|
32
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : "";
|
|
33
|
+
calls.push({ url, init: init ?? {} });
|
|
34
|
+
return responder(url, init ?? {});
|
|
35
|
+
}) as typeof fetch;
|
|
36
|
+
return { calls };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function buildServer() {
|
|
40
|
+
const server = new McpServer({ name: "memory-rate-test", version: "1.0.0" });
|
|
41
|
+
registerMemoryRateTool(server);
|
|
42
|
+
type RegisteredTool = {
|
|
43
|
+
handler: (args: unknown, extra: unknown) => Promise<unknown>;
|
|
44
|
+
};
|
|
45
|
+
const registered = (server as unknown as { _registeredTools: Record<string, RegisteredTool> })
|
|
46
|
+
._registeredTools;
|
|
47
|
+
const tool = registered.memory_rate;
|
|
48
|
+
if (!tool) throw new Error("memory_rate tool not registered");
|
|
49
|
+
return { server, tool, registered };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const fakeMeta = {
|
|
53
|
+
sessionId: "session-123",
|
|
54
|
+
requestInfo: {
|
|
55
|
+
headers: {
|
|
56
|
+
"x-agent-id": "agent-abc",
|
|
57
|
+
"x-source-task-id": "11111111-1111-4111-8111-111111111111",
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const memoryId = "22222222-2222-4222-8222-222222222222";
|
|
63
|
+
|
|
64
|
+
describe("memory_rate MCP tool", () => {
|
|
65
|
+
beforeEach(() => {
|
|
66
|
+
process.env.MCP_BASE_URL = "http://test-host:9999";
|
|
67
|
+
process.env.API_KEY = "test-key";
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
afterEach(() => {
|
|
71
|
+
globalThis.fetch = originalFetch;
|
|
72
|
+
delete process.env.MCP_BASE_URL;
|
|
73
|
+
delete process.env.API_KEY;
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("success path POSTs the canonical event shape and returns success=true", async () => {
|
|
77
|
+
const { tool } = buildServer();
|
|
78
|
+
const { calls } = installFetchStub(() => new Response("{}", { status: 200 }));
|
|
79
|
+
|
|
80
|
+
const result = (await tool.handler({ id: memoryId, useful: true }, fakeMeta)) as {
|
|
81
|
+
structuredContent: { success: boolean; message: string };
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
expect(result.structuredContent.success).toBe(true);
|
|
85
|
+
expect(result.structuredContent.message).toContain("useful");
|
|
86
|
+
|
|
87
|
+
expect(calls).toHaveLength(1);
|
|
88
|
+
expect(calls[0]!.url).toBe("http://test-host:9999/api/memory/rate");
|
|
89
|
+
expect(calls[0]!.init?.method).toBe("POST");
|
|
90
|
+
const headers = calls[0]!.init?.headers as Record<string, string>;
|
|
91
|
+
expect(headers["X-Agent-ID"]).toBe("agent-abc");
|
|
92
|
+
expect(headers.Authorization).toBe("Bearer test-key");
|
|
93
|
+
const body = JSON.parse(calls[0]!.init?.body as string);
|
|
94
|
+
expect(body).toEqual({
|
|
95
|
+
events: [
|
|
96
|
+
{
|
|
97
|
+
memoryId,
|
|
98
|
+
signal: 1,
|
|
99
|
+
weight: 1.0,
|
|
100
|
+
source: "explicit-self",
|
|
101
|
+
reasoning: "",
|
|
102
|
+
taskId: "11111111-1111-4111-8111-111111111111",
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test("useful=false flips signal to -1 and forwards the note as reasoning", async () => {
|
|
109
|
+
const { tool } = buildServer();
|
|
110
|
+
const { calls } = installFetchStub(() => new Response("{}", { status: 200 }));
|
|
111
|
+
|
|
112
|
+
await tool.handler({ id: memoryId, useful: false, note: "actually misleading" }, fakeMeta);
|
|
113
|
+
|
|
114
|
+
const body = JSON.parse(calls[0]!.init?.body as string);
|
|
115
|
+
expect(body.events[0].signal).toBe(-1);
|
|
116
|
+
expect(body.events[0].reasoning).toBe("actually misleading");
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("409 → success=false with the canned duplicate message", async () => {
|
|
120
|
+
const { tool } = buildServer();
|
|
121
|
+
installFetchStub(
|
|
122
|
+
() =>
|
|
123
|
+
new Response(JSON.stringify({ error: "Duplicate explicit-self" }), {
|
|
124
|
+
status: 409,
|
|
125
|
+
}),
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
const result = (await tool.handler({ id: memoryId, useful: true }, fakeMeta)) as {
|
|
129
|
+
structuredContent: { success: boolean; message: string };
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
expect(result.structuredContent.success).toBe(false);
|
|
133
|
+
expect(result.structuredContent.message).toBe(
|
|
134
|
+
"Memory already rated for this task. Use a follow-up memory_rerate tool (coming soon) to override.",
|
|
135
|
+
);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("400 → success=false with a clear message and the server error detail", async () => {
|
|
139
|
+
const { tool } = buildServer();
|
|
140
|
+
installFetchStub(
|
|
141
|
+
() =>
|
|
142
|
+
new Response(
|
|
143
|
+
JSON.stringify({
|
|
144
|
+
error: `explicit-self rating rejected: memoryId=${memoryId} not present in memory_retrieval for task=t`,
|
|
145
|
+
}),
|
|
146
|
+
{ status: 400 },
|
|
147
|
+
),
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const result = (await tool.handler({ id: memoryId, useful: true }, fakeMeta)) as {
|
|
151
|
+
structuredContent: { success: boolean; message: string };
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
expect(result.structuredContent.success).toBe(false);
|
|
155
|
+
expect(result.structuredContent.message).toMatch(/not present in memory_retrieval/);
|
|
156
|
+
expect(result.structuredContent.message).toMatch(/must have been retrieved/);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test("network failure does NOT throw — surfaces a structured error", async () => {
|
|
160
|
+
const { tool } = buildServer();
|
|
161
|
+
installFetchStub(() => {
|
|
162
|
+
throw new Error("connect ECONNREFUSED");
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const result = (await tool.handler({ id: memoryId, useful: true }, fakeMeta)) as {
|
|
166
|
+
structuredContent: { success: boolean; message: string };
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
expect(result.structuredContent.success).toBe(false);
|
|
170
|
+
expect(result.structuredContent.message).toMatch(/ECONNREFUSED/);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test("missing sourceTaskId → tool returns a clear failure without POSTing", async () => {
|
|
174
|
+
const { tool } = buildServer();
|
|
175
|
+
const { calls } = installFetchStub(() => new Response("{}", { status: 200 }));
|
|
176
|
+
|
|
177
|
+
const noTaskMeta = {
|
|
178
|
+
sessionId: "session-123",
|
|
179
|
+
requestInfo: { headers: { "x-agent-id": "agent-abc" } },
|
|
180
|
+
};
|
|
181
|
+
const result = (await tool.handler({ id: memoryId, useful: true }, noTaskMeta)) as {
|
|
182
|
+
structuredContent: { success: boolean; message: string };
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
expect(result.structuredContent.success).toBe(false);
|
|
186
|
+
expect(result.structuredContent.message).toMatch(/no source task ID/);
|
|
187
|
+
expect(calls).toHaveLength(0);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test("MCP handshake — memory_rate is registered with the expected name", () => {
|
|
191
|
+
const { registered } = buildServer();
|
|
192
|
+
expect(Object.keys(registered)).toContain("memory_rate");
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
describe("renderMemoriesPrompt — conditional rate-tool hint", () => {
|
|
197
|
+
const sampleMemories = [
|
|
198
|
+
{ id: "m-1", name: "Foo bug fix", content: "use Bun.serve not express", similarity: 0.9 },
|
|
199
|
+
{ id: "m-2", name: "Low signal", content: "trivial", similarity: 0.1 },
|
|
200
|
+
];
|
|
201
|
+
|
|
202
|
+
beforeEach(() => {
|
|
203
|
+
delete process.env.MEMORY_RATERS;
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
afterEach(() => {
|
|
207
|
+
delete process.env.MEMORY_RATERS;
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
test("no memories above threshold → returns null", () => {
|
|
211
|
+
const result = renderMemoriesPrompt([{ id: "x", name: "x", content: "x", similarity: 0.1 }]);
|
|
212
|
+
expect(result).toBeNull();
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test("MEMORY_RATERS unset → no rate-tool hint (byte-identical to pre-step-5)", () => {
|
|
216
|
+
const result = renderMemoriesPrompt(sampleMemories);
|
|
217
|
+
expect(result).not.toBeNull();
|
|
218
|
+
expect(result).not.toContain("memory_rate");
|
|
219
|
+
expect(result).toContain("### Relevant Past Knowledge");
|
|
220
|
+
expect(result).toContain("- **Foo bug fix** (id: m-1):");
|
|
221
|
+
// Snapshot — exact byte parity with main's runner.ts:1579 block.
|
|
222
|
+
expect(result).toBe(
|
|
223
|
+
`\n\n### Relevant Past Knowledge\n\nThese memories from your previous sessions may be useful. Use \`memory-get\` with the memory ID to retrieve full details.\n\n- **Foo bug fix** (id: m-1): use Bun.serve not express\n`,
|
|
224
|
+
);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("MEMORY_RATERS empty string → no hint", () => {
|
|
228
|
+
process.env.MEMORY_RATERS = "";
|
|
229
|
+
const result = renderMemoriesPrompt(sampleMemories);
|
|
230
|
+
expect(result).not.toContain("memory_rate");
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test("MEMORY_RATERS=noop → no hint (gate is on explicit-self only)", () => {
|
|
234
|
+
process.env.MEMORY_RATERS = "noop";
|
|
235
|
+
const result = renderMemoriesPrompt(sampleMemories);
|
|
236
|
+
expect(result).not.toContain("memory_rate");
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test("MEMORY_RATERS=explicit-self → hint appended verbatim", () => {
|
|
240
|
+
process.env.MEMORY_RATERS = "explicit-self";
|
|
241
|
+
const result = renderMemoriesPrompt(sampleMemories);
|
|
242
|
+
expect(result).toContain("memory_rate");
|
|
243
|
+
expect(result).toContain("trains the swarm to surface better memories");
|
|
244
|
+
expect(result).toContain("2-5 ratings per task is plenty.");
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
test("MEMORY_RATERS includes explicit-self alongside others → hint appended", () => {
|
|
248
|
+
process.env.MEMORY_RATERS = "implicit-citation, explicit-self ,llm";
|
|
249
|
+
const result = renderMemoriesPrompt(sampleMemories);
|
|
250
|
+
expect(result).toContain("memory_rate");
|
|
251
|
+
});
|
|
252
|
+
});
|