@desplega.ai/agent-swarm 1.74.4 → 1.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +1264 -46
- package/package.json +2 -2
- package/src/be/db.ts +563 -9
- package/src/be/memory/edges-store.ts +69 -0
- package/src/be/memory/providers/sqlite-store.ts +4 -0
- package/src/be/memory/raters/explicit-self.ts +22 -0
- package/src/be/memory/raters/implicit-citation.ts +44 -0
- package/src/be/memory/raters/llm-client.ts +172 -0
- package/src/be/memory/raters/llm-summarizer.ts +218 -0
- package/src/be/memory/raters/llm.ts +375 -0
- package/src/be/memory/raters/noop.ts +14 -0
- package/src/be/memory/raters/registry.ts +86 -0
- package/src/be/memory/raters/retrieval.ts +88 -0
- package/src/be/memory/raters/run-server-raters.ts +97 -0
- package/src/be/memory/raters/store.ts +228 -0
- package/src/be/memory/raters/types.ts +101 -0
- package/src/be/memory/reranker.ts +32 -2
- package/src/be/memory/retrieval-store.ts +116 -0
- package/src/be/memory/types.ts +3 -0
- package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
- package/src/be/migrations/052_memory_edges.sql +36 -0
- package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
- package/src/be/migrations/054_agent_harness_provider.sql +21 -0
- package/src/be/migrations/055_agent_cred_status.sql +15 -0
- package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
- package/src/be/migrations/057_inbox_item_state.sql +27 -0
- package/src/be/migrations/058_task_templates.sql +31 -0
- package/src/be/swarm-config-guard.ts +24 -0
- package/src/commands/credential-wait.ts +186 -0
- package/src/commands/provider-credentials.ts +434 -0
- package/src/commands/runner.ts +253 -21
- package/src/hooks/hook.ts +143 -66
- package/src/http/agents.ts +191 -1
- package/src/http/config.ts +11 -1
- package/src/http/core.ts +5 -0
- package/src/http/inbox-state.ts +89 -0
- package/src/http/index.ts +10 -0
- package/src/http/memory.ts +230 -1
- package/src/http/sessions.ts +86 -0
- package/src/http/status.ts +665 -0
- package/src/http/task-templates.ts +51 -0
- package/src/http/tasks.ts +85 -5
- package/src/http/users.ts +134 -0
- package/src/prompts/memories.ts +62 -0
- package/src/providers/claude-adapter.ts +22 -0
- package/src/providers/claude-managed-adapter.ts +24 -0
- package/src/providers/codex-adapter.ts +43 -1
- package/src/providers/devin-adapter.ts +18 -0
- package/src/providers/index.ts +7 -0
- package/src/providers/opencode-adapter.ts +60 -0
- package/src/providers/pi-mono-adapter.ts +71 -0
- package/src/providers/types.ts +34 -0
- package/src/server.ts +2 -0
- package/src/slack/handlers.ts +0 -1
- package/src/tests/agents-harness-provider.test.ts +333 -0
- package/src/tests/credential-check.test.ts +367 -0
- package/src/tests/credential-status-api.test.ts +223 -0
- package/src/tests/credential-status-routing.test.ts +150 -0
- package/src/tests/credential-wait.test.ts +282 -0
- package/src/tests/harness-provider-resolution.test.ts +242 -0
- package/src/tests/jira-sync.test.ts +1 -1
- package/src/tests/memory-edges.test.ts +722 -0
- package/src/tests/memory-rate-endpoint.test.ts +330 -0
- package/src/tests/memory-rate-tool.test.ts +252 -0
- package/src/tests/memory-rater-e2e.test.ts +578 -0
- package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
- package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
- package/src/tests/memory-rater-llm.test.ts +964 -0
- package/src/tests/memory-rater-store.test.ts +249 -0
- package/src/tests/memory-reranker.test.ts +161 -2
- package/src/tests/migration-runner-regressions.test.ts +17 -2
- package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
- package/src/tests/run-server-raters.test.ts +291 -0
- package/src/tests/sessions.test.ts +141 -0
- package/src/tests/status.test.ts +843 -0
- package/src/tests/stop-hook-task-resolution.test.ts +98 -0
- package/src/tests/template-recommendations.test.ts +148 -0
- package/src/tests/tool-annotations.test.ts +2 -2
- package/src/tests/use-dismissible-card.test.ts +140 -0
- package/src/tools/memory-rate.ts +166 -0
- package/src/tools/memory-search.ts +18 -0
- package/src/tools/store-progress.ts +37 -0
- package/src/tools/swarm-config/set-config.ts +17 -1
- package/src/tools/tool-config.ts +1 -0
- package/src/types.ts +122 -1
- package/src/utils/harness-provider.ts +32 -0
- package/tsconfig.json +0 -2
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import { closeDb, createAgent, getDb, initDb } from "../be/db";
|
|
4
|
+
import { SqliteMemoryStore } from "../be/memory/providers/sqlite-store";
|
|
5
|
+
import { applyRating, ExplicitSelfDuplicateError } from "../be/memory/raters/store";
|
|
6
|
+
import type { RatingEvent } from "../be/memory/raters/types";
|
|
7
|
+
|
|
8
|
+
const TEST_DB_PATH = "./test-memory-rater-store.sqlite";
|
|
9
|
+
|
|
10
|
+
describe("applyRating", () => {
|
|
11
|
+
const agentA = "aaaa0000-0000-4000-8000-000000000001";
|
|
12
|
+
const taskId = "00000000-0000-4000-8000-000000001234";
|
|
13
|
+
const taskIdAlt = "00000000-0000-4000-8000-000000abcdef";
|
|
14
|
+
let store: SqliteMemoryStore;
|
|
15
|
+
|
|
16
|
+
beforeAll(async () => {
|
|
17
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
18
|
+
try {
|
|
19
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
20
|
+
} catch {}
|
|
21
|
+
}
|
|
22
|
+
initDb(TEST_DB_PATH);
|
|
23
|
+
createAgent({ id: agentA, name: "Test Agent A", isLead: false, status: "idle" });
|
|
24
|
+
// Real agent_tasks rows so the memory_rating.taskId FK passes.
|
|
25
|
+
const insertTask = getDb().prepare(
|
|
26
|
+
`INSERT INTO agent_tasks (id, agentId, task, status, source, createdAt, lastUpdatedAt)
|
|
27
|
+
VALUES (?, ?, ?, 'in_progress', 'mcp', ?, ?)`,
|
|
28
|
+
);
|
|
29
|
+
const nowIso = new Date().toISOString();
|
|
30
|
+
insertTask.run(taskId, agentA, "test task", nowIso, nowIso);
|
|
31
|
+
insertTask.run(taskIdAlt, agentA, "test task alt", nowIso, nowIso);
|
|
32
|
+
store = new SqliteMemoryStore();
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
afterAll(async () => {
|
|
36
|
+
closeDb();
|
|
37
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
38
|
+
try {
|
|
39
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
40
|
+
} catch {}
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
beforeEach(() => {
|
|
45
|
+
// Reset memory_rating between tests so the partial unique index for
|
|
46
|
+
// explicit-self doesn't leak between cases.
|
|
47
|
+
getDb().run("DELETE FROM memory_rating");
|
|
48
|
+
// Reset Beta posteriors to (1,1) so each test starts from the prior.
|
|
49
|
+
getDb().run("UPDATE agent_memory SET alpha = 1.0, beta = 1.0");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
function makeMemory(name: string): { id: string } {
|
|
53
|
+
const memory = store.store({
|
|
54
|
+
agentId: agentA,
|
|
55
|
+
scope: "agent",
|
|
56
|
+
name,
|
|
57
|
+
content: `${name} content`,
|
|
58
|
+
source: "manual",
|
|
59
|
+
});
|
|
60
|
+
return { id: memory.id };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function readPosterior(id: string): { alpha: number; beta: number } {
|
|
64
|
+
const row = getDb()
|
|
65
|
+
.prepare<{ alpha: number; beta: number }, [string]>(
|
|
66
|
+
"SELECT alpha, beta FROM agent_memory WHERE id = ?",
|
|
67
|
+
)
|
|
68
|
+
.get(id);
|
|
69
|
+
if (!row) throw new Error(`memory ${id} not found`);
|
|
70
|
+
return { alpha: row.alpha, beta: row.beta };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function countRatings(memoryId: string): number {
|
|
74
|
+
const row = getDb()
|
|
75
|
+
.prepare<{ n: number }, [string]>(
|
|
76
|
+
"SELECT COUNT(*) AS n FROM memory_rating WHERE memoryId = ?",
|
|
77
|
+
)
|
|
78
|
+
.get(memoryId);
|
|
79
|
+
return row?.n ?? 0;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
test("signal=+1, weight=1 → alpha += 1, beta += 0; audit row written", () => {
|
|
83
|
+
const m = makeMemory("positive");
|
|
84
|
+
const events: RatingEvent[] = [{ memoryId: m.id, signal: 1, weight: 1, source: "test" }];
|
|
85
|
+
const result = applyRating(events);
|
|
86
|
+
expect(result.applied).toBe(1);
|
|
87
|
+
expect(result.rejected).toEqual([]);
|
|
88
|
+
expect(readPosterior(m.id)).toEqual({ alpha: 2, beta: 1 });
|
|
89
|
+
expect(countRatings(m.id)).toBe(1);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test("signal=-1, weight=0.5 → alpha += 0, beta += 0.5", () => {
|
|
93
|
+
const m = makeMemory("negative");
|
|
94
|
+
const result = applyRating([{ memoryId: m.id, signal: -1, weight: 0.5, source: "test" }]);
|
|
95
|
+
expect(result.applied).toBe(1);
|
|
96
|
+
expect(readPosterior(m.id)).toEqual({ alpha: 1, beta: 1.5 });
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test("signal=0 → no posterior movement, audit row still written", () => {
|
|
100
|
+
const m = makeMemory("neutral");
|
|
101
|
+
const result = applyRating([{ memoryId: m.id, signal: 0, weight: 1, source: "test" }]);
|
|
102
|
+
expect(result.applied).toBe(1);
|
|
103
|
+
expect(readPosterior(m.id)).toEqual({ alpha: 1, beta: 1 });
|
|
104
|
+
expect(countRatings(m.id)).toBe(1);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("batch of mixed signals applies in one transaction", () => {
|
|
108
|
+
const a = makeMemory("a");
|
|
109
|
+
const b = makeMemory("b");
|
|
110
|
+
const result = applyRating([
|
|
111
|
+
{ memoryId: a.id, signal: 1, weight: 1, source: "rater-x" },
|
|
112
|
+
{ memoryId: b.id, signal: -0.5, weight: 1, source: "rater-x" },
|
|
113
|
+
]);
|
|
114
|
+
expect(result.applied).toBe(2);
|
|
115
|
+
expect(readPosterior(a.id)).toEqual({ alpha: 2, beta: 1 });
|
|
116
|
+
expect(readPosterior(b.id)).toEqual({ alpha: 1, beta: 1.5 });
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("commutativity: parallel applies sum to deterministic posterior", async () => {
|
|
120
|
+
const m = makeMemory("hot");
|
|
121
|
+
const events: RatingEvent[] = Array.from({ length: 20 }, () => ({
|
|
122
|
+
memoryId: m.id,
|
|
123
|
+
signal: 1,
|
|
124
|
+
weight: 0.1,
|
|
125
|
+
source: "rater-x",
|
|
126
|
+
}));
|
|
127
|
+
await Promise.all(events.map((e) => Promise.resolve(applyRating([e]))));
|
|
128
|
+
const post = readPosterior(m.id);
|
|
129
|
+
expect(post.alpha).toBeCloseTo(1 + 20 * 0.1, 5);
|
|
130
|
+
expect(post.beta).toBe(1);
|
|
131
|
+
expect(countRatings(m.id)).toBe(20);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test("out-of-range signal=2 → returned in rejected[], no DB write", () => {
|
|
135
|
+
const m = makeMemory("oor-signal");
|
|
136
|
+
const result = applyRating([{ memoryId: m.id, signal: 2, weight: 1, source: "test" }]);
|
|
137
|
+
expect(result.applied).toBe(0);
|
|
138
|
+
expect(result.rejected).toHaveLength(1);
|
|
139
|
+
expect(result.rejected[0]!.reason).toMatch(/signal/);
|
|
140
|
+
expect(readPosterior(m.id)).toEqual({ alpha: 1, beta: 1 });
|
|
141
|
+
expect(countRatings(m.id)).toBe(0);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test("out-of-range weight=-1 → returned in rejected[], no DB write", () => {
|
|
145
|
+
const m = makeMemory("oor-weight");
|
|
146
|
+
const result = applyRating([{ memoryId: m.id, signal: 1, weight: -1, source: "test" }]);
|
|
147
|
+
expect(result.applied).toBe(0);
|
|
148
|
+
expect(result.rejected).toHaveLength(1);
|
|
149
|
+
expect(result.rejected[0]!.reason).toMatch(/weight/);
|
|
150
|
+
expect(countRatings(m.id)).toBe(0);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test("missing memoryId → returned in rejected[], no DB write", () => {
|
|
154
|
+
const result = applyRating([
|
|
155
|
+
{
|
|
156
|
+
memoryId: "00000000-0000-4000-8000-deadbeefdead",
|
|
157
|
+
signal: 1,
|
|
158
|
+
weight: 1,
|
|
159
|
+
source: "test",
|
|
160
|
+
},
|
|
161
|
+
]);
|
|
162
|
+
expect(result.applied).toBe(0);
|
|
163
|
+
expect(result.rejected).toHaveLength(1);
|
|
164
|
+
expect(result.rejected[0]!.reason).toMatch(/not found/i);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test("missing source → returned in rejected[]", () => {
|
|
168
|
+
const m = makeMemory("no-source");
|
|
169
|
+
const result = applyRating([{ memoryId: m.id, signal: 1, weight: 1, source: "" }]);
|
|
170
|
+
expect(result.applied).toBe(0);
|
|
171
|
+
expect(result.rejected).toHaveLength(1);
|
|
172
|
+
expect(result.rejected[0]!.reason).toMatch(/source/);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
test("partial batch: invalid events rejected, valid ones applied", () => {
|
|
176
|
+
const a = makeMemory("a-part");
|
|
177
|
+
const b = makeMemory("b-part");
|
|
178
|
+
const result = applyRating([
|
|
179
|
+
{ memoryId: a.id, signal: 1, weight: 1, source: "test" },
|
|
180
|
+
{ memoryId: b.id, signal: 5, weight: 1, source: "test" }, // out of range
|
|
181
|
+
{ memoryId: a.id, signal: -0.5, weight: 0.5, source: "test" },
|
|
182
|
+
]);
|
|
183
|
+
expect(result.applied).toBe(2);
|
|
184
|
+
expect(result.rejected).toHaveLength(1);
|
|
185
|
+
expect(readPosterior(a.id)).toEqual({ alpha: 2, beta: 1.25 });
|
|
186
|
+
expect(readPosterior(b.id)).toEqual({ alpha: 1, beta: 1 });
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("explicit-self duplicate raises ExplicitSelfDuplicateError", () => {
|
|
190
|
+
const m = makeMemory("explicit");
|
|
191
|
+
const event: RatingEvent = {
|
|
192
|
+
memoryId: m.id,
|
|
193
|
+
signal: 1,
|
|
194
|
+
weight: 1,
|
|
195
|
+
source: "explicit-self",
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
// First write succeeds.
|
|
199
|
+
expect(applyRating([event], { taskId }).applied).toBe(1);
|
|
200
|
+
|
|
201
|
+
// Second write hits the partial unique index.
|
|
202
|
+
expect(() => applyRating([event], { taskId })).toThrow(ExplicitSelfDuplicateError);
|
|
203
|
+
|
|
204
|
+
// Posterior moved exactly once.
|
|
205
|
+
expect(readPosterior(m.id)).toEqual({ alpha: 2, beta: 1 });
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
test("empty batch → applied=0, no DB calls, no error", () => {
|
|
209
|
+
const result = applyRating([]);
|
|
210
|
+
expect(result).toEqual({ applied: 0, rejected: [] });
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test("audit row carries source, signal, weight, reasoning, taskId", () => {
|
|
214
|
+
const m = makeMemory("audit");
|
|
215
|
+
applyRating(
|
|
216
|
+
[
|
|
217
|
+
{
|
|
218
|
+
memoryId: m.id,
|
|
219
|
+
signal: 0.7,
|
|
220
|
+
weight: 0.4,
|
|
221
|
+
source: "test-rater",
|
|
222
|
+
reasoning: "because reasons",
|
|
223
|
+
},
|
|
224
|
+
],
|
|
225
|
+
{ taskId: taskIdAlt },
|
|
226
|
+
);
|
|
227
|
+
const row = getDb()
|
|
228
|
+
.prepare<
|
|
229
|
+
{
|
|
230
|
+
memoryId: string;
|
|
231
|
+
taskId: string | null;
|
|
232
|
+
source: string;
|
|
233
|
+
signal: number;
|
|
234
|
+
weight: number;
|
|
235
|
+
reasoning: string | null;
|
|
236
|
+
},
|
|
237
|
+
[string]
|
|
238
|
+
>(
|
|
239
|
+
"SELECT memoryId, taskId, source, signal, weight, reasoning FROM memory_rating WHERE memoryId = ?",
|
|
240
|
+
)
|
|
241
|
+
.get(m.id);
|
|
242
|
+
expect(row).not.toBeNull();
|
|
243
|
+
expect(row!.taskId).toBe(taskIdAlt);
|
|
244
|
+
expect(row!.source).toBe("test-rater");
|
|
245
|
+
expect(row!.signal).toBe(0.7);
|
|
246
|
+
expect(row!.weight).toBe(0.4);
|
|
247
|
+
expect(row!.reasoning).toBe("because reasons");
|
|
248
|
+
});
|
|
249
|
+
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import { accessBoost, computeScore, recencyDecay, rerank } from "../be/memory/reranker";
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { accessBoost, computeScore, recencyDecay, rerank, usefulness } from "../be/memory/reranker";
|
|
3
3
|
import type { MemoryCandidate } from "../be/memory/types";
|
|
4
4
|
|
|
5
5
|
function makeCandidate(
|
|
@@ -23,6 +23,8 @@ function makeCandidate(
|
|
|
23
23
|
accessCount: 0,
|
|
24
24
|
expiresAt: null,
|
|
25
25
|
embeddingModel: null,
|
|
26
|
+
alpha: 1.0,
|
|
27
|
+
beta: 1.0,
|
|
26
28
|
...overrides,
|
|
27
29
|
};
|
|
28
30
|
}
|
|
@@ -190,3 +192,160 @@ describe("rerank", () => {
|
|
|
190
192
|
expect(result1[0]!.similarity).toBe(result2[0]!.similarity);
|
|
191
193
|
});
|
|
192
194
|
});
|
|
195
|
+
|
|
196
|
+
describe("usefulness", () => {
|
|
197
|
+
// The default-floor cases assume MEMORY_DEMOTION_FLOOR is unset/empty.
|
|
198
|
+
// The override case sets and restores the env var.
|
|
199
|
+
let originalFloor: string | undefined;
|
|
200
|
+
beforeEach(() => {
|
|
201
|
+
originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
|
|
202
|
+
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
203
|
+
});
|
|
204
|
+
afterEach(() => {
|
|
205
|
+
if (originalFloor === undefined) {
|
|
206
|
+
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
207
|
+
} else {
|
|
208
|
+
process.env.MEMORY_DEMOTION_FLOOR = originalFloor;
|
|
209
|
+
}
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
test("Beta(1,1) → exactly 1.0 (default prior is a no-op)", () => {
|
|
213
|
+
expect(usefulness(1, 1)).toBe(1.0);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
test("Beta(10,1) → clamp(2 * 10/11, 1, 2) ≈ 1.818", () => {
|
|
217
|
+
const expected = Math.max(1.0, Math.min(2.0, (2 * 10) / 11));
|
|
218
|
+
expect(usefulness(10, 1)).toBeCloseTo(expected, 5);
|
|
219
|
+
expect(usefulness(10, 1)).toBeCloseTo(1.8181818, 5);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
test("Beta(1,10) → 1.0 (floored at default MEMORY_DEMOTION_FLOOR=1.0)", () => {
|
|
223
|
+
expect(usefulness(1, 10)).toBe(1.0);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test("Beta(50,1) → 2 * 50/51 ≈ 1.961 (approaches ceiling, never above 2.0)", () => {
|
|
227
|
+
// NB: the clamp `Math.min(2.0, 2 * mean)` is a defensive ceiling — the
|
|
228
|
+
// formula 2 * α/(α+β) is bounded above by 2 for any finite β > 0, so the
|
|
229
|
+
// clamp only fires on degenerate inputs (β = 0). The plan's "===2.0"
|
|
230
|
+
// expectation was a numerical slip; the asymptote is what we ship.
|
|
231
|
+
expect(usefulness(50, 1)).toBeCloseTo((2 * 50) / 51, 10);
|
|
232
|
+
expect(usefulness(50, 1)).toBeLessThan(2.0);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test("ceiling clamp fires on degenerate β=0 (defensive)", () => {
|
|
236
|
+
expect(usefulness(10, 0)).toBe(2.0);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test("MEMORY_DEMOTION_FLOOR=0.5 lowers the floor and enables demotion", () => {
|
|
240
|
+
process.env.MEMORY_DEMOTION_FLOOR = "0.5";
|
|
241
|
+
expect(usefulness(1, 10)).toBe(0.5);
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
describe("backward-compat: MEMORY_RATERS unset → reranker is a no-op", () => {
|
|
246
|
+
// Litmus for step-1: with default Beta(1,1) priors and the default
|
|
247
|
+
// MEMORY_DEMOTION_FLOOR=1.0, computeScore must return EXACTLY the same value
|
|
248
|
+
// as a pre-rater build (similarity * recencyDecay * accessBoost).
|
|
249
|
+
const now = new Date("2026-04-12T12:00:00Z");
|
|
250
|
+
|
|
251
|
+
let originalFloor: string | undefined;
|
|
252
|
+
beforeEach(() => {
|
|
253
|
+
originalFloor = process.env.MEMORY_DEMOTION_FLOOR;
|
|
254
|
+
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
255
|
+
});
|
|
256
|
+
afterEach(() => {
|
|
257
|
+
if (originalFloor === undefined) {
|
|
258
|
+
delete process.env.MEMORY_DEMOTION_FLOOR;
|
|
259
|
+
} else {
|
|
260
|
+
process.env.MEMORY_DEMOTION_FLOOR = originalFloor;
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
test("computeScore equals similarity * recencyDecay * accessBoost (no usefulness drift)", () => {
|
|
265
|
+
const cases: MemoryCandidate[] = [
|
|
266
|
+
makeCandidate({
|
|
267
|
+
similarity: 0.8,
|
|
268
|
+
createdAt: now.toISOString(),
|
|
269
|
+
accessedAt: now.toISOString(),
|
|
270
|
+
accessCount: 0,
|
|
271
|
+
}),
|
|
272
|
+
makeCandidate({
|
|
273
|
+
similarity: 0.5,
|
|
274
|
+
createdAt: new Date(now.getTime() - 14 * 86400000).toISOString(),
|
|
275
|
+
accessedAt: new Date(now.getTime() - 24 * 3600000).toISOString(),
|
|
276
|
+
accessCount: 5,
|
|
277
|
+
}),
|
|
278
|
+
makeCandidate({
|
|
279
|
+
similarity: 0.99,
|
|
280
|
+
createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
|
|
281
|
+
accessedAt: new Date(now.getTime() - 72 * 3600000).toISOString(),
|
|
282
|
+
accessCount: 12,
|
|
283
|
+
}),
|
|
284
|
+
];
|
|
285
|
+
|
|
286
|
+
for (const c of cases) {
|
|
287
|
+
const expected =
|
|
288
|
+
c.similarity *
|
|
289
|
+
recencyDecay(c.createdAt, now) *
|
|
290
|
+
accessBoost(c.accessedAt, c.accessCount, now);
|
|
291
|
+
expect(computeScore(c, now)).toBe(expected);
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test("snapshot order + scores match a hard-coded pre-rater baseline", () => {
|
|
296
|
+
// Baseline computed from main (pre-step-1): similarity * recencyDecay * accessBoost.
|
|
297
|
+
// With alpha=beta=1 + default floor, the new code must produce identical numbers.
|
|
298
|
+
const candidates = [
|
|
299
|
+
makeCandidate({
|
|
300
|
+
similarity: 0.9,
|
|
301
|
+
createdAt: now.toISOString(),
|
|
302
|
+
accessedAt: now.toISOString(),
|
|
303
|
+
accessCount: 0,
|
|
304
|
+
}),
|
|
305
|
+
makeCandidate({
|
|
306
|
+
similarity: 0.6,
|
|
307
|
+
createdAt: new Date(now.getTime() - 7 * 86400000).toISOString(),
|
|
308
|
+
accessedAt: now.toISOString(),
|
|
309
|
+
accessCount: 0,
|
|
310
|
+
}),
|
|
311
|
+
makeCandidate({
|
|
312
|
+
similarity: 0.3,
|
|
313
|
+
createdAt: new Date(now.getTime() - 28 * 86400000).toISOString(),
|
|
314
|
+
accessedAt: now.toISOString(),
|
|
315
|
+
accessCount: 0,
|
|
316
|
+
}),
|
|
317
|
+
];
|
|
318
|
+
const result = rerank(candidates, { limit: 3, now });
|
|
319
|
+
|
|
320
|
+
// Expected scores: similarity * 2^(-ageDays/14) (no access boost, alpha=beta=1).
|
|
321
|
+
// 0.9 * 1.0 = 0.9
|
|
322
|
+
// 0.6 * 2^(-0.5) ≈ 0.4242640687
|
|
323
|
+
// 0.3 * 2^(-2) = 0.075
|
|
324
|
+
expect(result[0]!.similarity).toBeCloseTo(0.9, 10);
|
|
325
|
+
expect(result[1]!.similarity).toBeCloseTo(0.6 * 2 ** -0.5, 10);
|
|
326
|
+
expect(result[2]!.similarity).toBeCloseTo(0.075, 10);
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
test("usefulness multiplies into score when posteriors move", () => {
|
|
330
|
+
// Sanity: a memory with α=10, β=1 should score ~1.818× higher than the same
|
|
331
|
+
// memory at α=β=1, holding everything else constant. Other rows unchanged.
|
|
332
|
+
const proven = makeCandidate({
|
|
333
|
+
similarity: 0.5,
|
|
334
|
+
createdAt: now.toISOString(),
|
|
335
|
+
accessedAt: now.toISOString(),
|
|
336
|
+
accessCount: 0,
|
|
337
|
+
alpha: 10,
|
|
338
|
+
beta: 1,
|
|
339
|
+
});
|
|
340
|
+
const baseline = makeCandidate({
|
|
341
|
+
similarity: 0.5,
|
|
342
|
+
createdAt: now.toISOString(),
|
|
343
|
+
accessedAt: now.toISOString(),
|
|
344
|
+
accessCount: 0,
|
|
345
|
+
});
|
|
346
|
+
expect(computeScore(proven, now) / computeScore(baseline, now)).toBeCloseTo(
|
|
347
|
+
usefulness(10, 1),
|
|
348
|
+
10,
|
|
349
|
+
);
|
|
350
|
+
});
|
|
351
|
+
});
|
|
@@ -71,7 +71,12 @@ describe("migration regressions", () => {
|
|
|
71
71
|
expect(columns).toContain("setupScript");
|
|
72
72
|
});
|
|
73
73
|
|
|
74
|
-
test("fresh DB
|
|
74
|
+
test("fresh DB drops source CHECK constraint on agent_tasks (Zod is the gate)", () => {
|
|
75
|
+
// Migration 056 removes the SQL CHECK on agent_tasks.source — the Zod
|
|
76
|
+
// `AgentTaskSourceSchema` in src/types.ts is now the single source of
|
|
77
|
+
// truth for the allowed enum, and is enforced at the HTTP/MCP ingress.
|
|
78
|
+
// Direct SQL inserts no longer fail on unknown sources by design;
|
|
79
|
+
// adding a new source no longer requires a forward-only migration.
|
|
75
80
|
const database = initDb(FRESH_DB_PATH);
|
|
76
81
|
const now = new Date().toISOString();
|
|
77
82
|
|
|
@@ -81,6 +86,16 @@ describe("migration regressions", () => {
|
|
|
81
86
|
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
82
87
|
[crypto.randomUUID(), "invalid source", "pending", "not-valid", now, now],
|
|
83
88
|
);
|
|
84
|
-
}).toThrow();
|
|
89
|
+
}).not.toThrow();
|
|
90
|
+
|
|
91
|
+
// The requestedByUserId FK survives the table-rebuild in migration 056.
|
|
92
|
+
const fkList = database
|
|
93
|
+
.prepare<{ table: string; from: string; to: string }, []>(
|
|
94
|
+
'SELECT "table" as "table", "from", "to" FROM pragma_foreign_key_list(\'agent_tasks\')',
|
|
95
|
+
)
|
|
96
|
+
.all();
|
|
97
|
+
const requestedByFk = fkList.find((fk) => fk.from === "requestedByUserId");
|
|
98
|
+
expect(requestedByFk?.table).toBe("users");
|
|
99
|
+
expect(requestedByFk?.to).toBe("id");
|
|
85
100
|
});
|
|
86
101
|
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic in-memory `LlmRaterClient` for tests.
|
|
3
|
+
*
|
|
4
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §5
|
|
5
|
+
*
|
|
6
|
+
* Used by `memory-rater-llm.test.ts` (this step) and reused by the cross-
|
|
7
|
+
* cutting e2e in step-7. Keep it dependency-free and side-effect-free.
|
|
8
|
+
*/
|
|
9
|
+
import type {
|
|
10
|
+
LlmRaterClient,
|
|
11
|
+
LlmRaterInput,
|
|
12
|
+
LlmRaterResult,
|
|
13
|
+
} from "../../be/memory/raters/llm-client";
|
|
14
|
+
|
|
15
|
+
export type MockResultMap = Record<string, LlmRaterResult | null>;
|
|
16
|
+
|
|
17
|
+
export class MockLlmRaterClient implements LlmRaterClient {
|
|
18
|
+
/** Inputs received, in call order — for assertions. */
|
|
19
|
+
public readonly calls: LlmRaterInput[] = [];
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* @param map memoryId → fixed result. Missing keys → fallback.
|
|
23
|
+
* @param fallback result returned when a memoryId is not in the map.
|
|
24
|
+
* `null` simulates an LLM parse-failure (skip rating).
|
|
25
|
+
*/
|
|
26
|
+
constructor(
|
|
27
|
+
private readonly map: MockResultMap,
|
|
28
|
+
private readonly fallback: LlmRaterResult | null = null,
|
|
29
|
+
) {}
|
|
30
|
+
|
|
31
|
+
async rate(input: LlmRaterInput): Promise<LlmRaterResult | null> {
|
|
32
|
+
this.calls.push(input);
|
|
33
|
+
return Object.hasOwn(this.map, input.memory.id) ? this.map[input.memory.id] : this.fallback;
|
|
34
|
+
}
|
|
35
|
+
}
|