@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +1264 -46
  3. package/package.json +2 -2
  4. package/src/be/db.ts +563 -9
  5. package/src/be/memory/edges-store.ts +69 -0
  6. package/src/be/memory/providers/sqlite-store.ts +4 -0
  7. package/src/be/memory/raters/explicit-self.ts +22 -0
  8. package/src/be/memory/raters/implicit-citation.ts +44 -0
  9. package/src/be/memory/raters/llm-client.ts +172 -0
  10. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  11. package/src/be/memory/raters/llm.ts +375 -0
  12. package/src/be/memory/raters/noop.ts +14 -0
  13. package/src/be/memory/raters/registry.ts +86 -0
  14. package/src/be/memory/raters/retrieval.ts +88 -0
  15. package/src/be/memory/raters/run-server-raters.ts +97 -0
  16. package/src/be/memory/raters/store.ts +228 -0
  17. package/src/be/memory/raters/types.ts +101 -0
  18. package/src/be/memory/reranker.ts +32 -2
  19. package/src/be/memory/retrieval-store.ts +116 -0
  20. package/src/be/memory/types.ts +3 -0
  21. package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
  22. package/src/be/migrations/052_memory_edges.sql +36 -0
  23. package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
  24. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  25. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  26. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  27. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  28. package/src/be/migrations/058_task_templates.sql +31 -0
  29. package/src/be/swarm-config-guard.ts +24 -0
  30. package/src/commands/credential-wait.ts +186 -0
  31. package/src/commands/provider-credentials.ts +434 -0
  32. package/src/commands/runner.ts +253 -21
  33. package/src/hooks/hook.ts +143 -66
  34. package/src/http/agents.ts +191 -1
  35. package/src/http/config.ts +11 -1
  36. package/src/http/core.ts +5 -0
  37. package/src/http/inbox-state.ts +89 -0
  38. package/src/http/index.ts +10 -0
  39. package/src/http/memory.ts +230 -1
  40. package/src/http/sessions.ts +86 -0
  41. package/src/http/status.ts +665 -0
  42. package/src/http/task-templates.ts +51 -0
  43. package/src/http/tasks.ts +85 -5
  44. package/src/http/users.ts +134 -0
  45. package/src/prompts/memories.ts +62 -0
  46. package/src/providers/claude-adapter.ts +22 -0
  47. package/src/providers/claude-managed-adapter.ts +24 -0
  48. package/src/providers/codex-adapter.ts +43 -1
  49. package/src/providers/devin-adapter.ts +18 -0
  50. package/src/providers/index.ts +7 -0
  51. package/src/providers/opencode-adapter.ts +60 -0
  52. package/src/providers/pi-mono-adapter.ts +71 -0
  53. package/src/providers/types.ts +34 -0
  54. package/src/server.ts +2 -0
  55. package/src/slack/handlers.ts +0 -1
  56. package/src/tests/agents-harness-provider.test.ts +333 -0
  57. package/src/tests/credential-check.test.ts +367 -0
  58. package/src/tests/credential-status-api.test.ts +223 -0
  59. package/src/tests/credential-status-routing.test.ts +150 -0
  60. package/src/tests/credential-wait.test.ts +282 -0
  61. package/src/tests/harness-provider-resolution.test.ts +242 -0
  62. package/src/tests/jira-sync.test.ts +1 -1
  63. package/src/tests/memory-edges.test.ts +722 -0
  64. package/src/tests/memory-rate-endpoint.test.ts +330 -0
  65. package/src/tests/memory-rate-tool.test.ts +252 -0
  66. package/src/tests/memory-rater-e2e.test.ts +578 -0
  67. package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
  68. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  69. package/src/tests/memory-rater-llm.test.ts +964 -0
  70. package/src/tests/memory-rater-store.test.ts +249 -0
  71. package/src/tests/memory-reranker.test.ts +161 -2
  72. package/src/tests/migration-runner-regressions.test.ts +17 -2
  73. package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
  74. package/src/tests/run-server-raters.test.ts +291 -0
  75. package/src/tests/sessions.test.ts +141 -0
  76. package/src/tests/status.test.ts +843 -0
  77. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  78. package/src/tests/template-recommendations.test.ts +148 -0
  79. package/src/tests/tool-annotations.test.ts +2 -2
  80. package/src/tests/use-dismissible-card.test.ts +140 -0
  81. package/src/tools/memory-rate.ts +166 -0
  82. package/src/tools/memory-search.ts +18 -0
  83. package/src/tools/store-progress.ts +37 -0
  84. package/src/tools/swarm-config/set-config.ts +17 -1
  85. package/src/tools/tool-config.ts +1 -0
  86. package/src/types.ts +122 -1
  87. package/src/utils/harness-provider.ts +32 -0
  88. package/tsconfig.json +0 -2
@@ -0,0 +1,304 @@
1
+ import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
2
+ import { unlink } from "node:fs/promises";
3
+ import { closeDb, createAgent, createSessionLogs, getDb, initDb } from "../be/db";
4
+ import { SqliteMemoryStore } from "../be/memory/providers/sqlite-store";
5
+ import { ImplicitCitationRater } from "../be/memory/raters/implicit-citation";
6
+ import {
7
+ getRaterWeightMultiplier,
8
+ getRegisteredRaters,
9
+ SERVER_RATERS,
10
+ } from "../be/memory/raters/registry";
11
+ import { getRetrievalsForTask, recordRetrievals } from "../be/memory/raters/retrieval";
12
+ import { applyRating } from "../be/memory/raters/store";
13
+ import type { RatingEvent } from "../be/memory/raters/types";
14
+
15
+ const TEST_DB_PATH = "./test-memory-rater-implicit-citation.sqlite";
16
+
17
+ // ─────────────────────────────────────────────────────────────────────────────
18
+ // Pure-function unit tests — no DB required for these. Plan §7.
19
+ // ─────────────────────────────────────────────────────────────────────────────
20
+
21
+ describe("ImplicitCitationRater (pure)", () => {
22
+ const rater = new ImplicitCitationRater();
23
+
24
+ test("name is 'implicit-citation'", () => {
25
+ expect(rater.name).toBe("implicit-citation");
26
+ });
27
+
28
+ test("hit + miss: emits +1 weight=0.5 for cited memory, -1 weight=0.25 for uncited", async () => {
29
+ const events = await rater.rate({
30
+ agentId: "agent-x",
31
+ taskId: "task-y",
32
+ retrievedMemoryIds: ["mem-A", "mem-B"],
33
+ evidence: "I used mem-A here when solving the task.",
34
+ });
35
+ expect(events).toHaveLength(2);
36
+ const a = events.find((e) => e.memoryId === "mem-A")!;
37
+ const b = events.find((e) => e.memoryId === "mem-B")!;
38
+ expect(a).toEqual({ memoryId: "mem-A", signal: 1, weight: 0.5, source: "" });
39
+ expect(b).toEqual({ memoryId: "mem-B", signal: -1, weight: 0.25, source: "" });
40
+ });
41
+
42
+ test("rater leaves source empty — framework stamps it (anti-spoof)", async () => {
43
+ const events = await rater.rate({
44
+ agentId: "agent-x",
45
+ retrievedMemoryIds: ["mem-X"],
46
+ evidence: "mem-X cited",
47
+ });
48
+ expect(events[0]!.source).toBe("");
49
+ });
50
+
51
+ test("empty evidence → all misses (negative)", async () => {
52
+ const events = await rater.rate({
53
+ agentId: "agent-x",
54
+ retrievedMemoryIds: ["mem-A", "mem-B", "mem-C"],
55
+ evidence: "",
56
+ });
57
+ expect(events).toHaveLength(3);
58
+ for (const e of events) {
59
+ expect(e.signal).toBe(-1);
60
+ expect(e.weight).toBe(0.25);
61
+ }
62
+ });
63
+
64
+ test("null evidence → all misses (negative)", async () => {
65
+ const events = await rater.rate({
66
+ agentId: "agent-x",
67
+ retrievedMemoryIds: ["mem-A"],
68
+ evidence: null,
69
+ });
70
+ expect(events).toHaveLength(1);
71
+ expect(events[0]!.signal).toBe(-1);
72
+ });
73
+
74
+ test("empty retrievedMemoryIds → no events", async () => {
75
+ const events = await rater.rate({
76
+ agentId: "agent-x",
77
+ retrievedMemoryIds: [],
78
+ evidence: "anything",
79
+ });
80
+ expect(events).toEqual([]);
81
+ });
82
+
83
+ test("substring-prefix collision: citing 'mem-AB' counts as a hit for 'mem-A'", async () => {
84
+ // Documented behaviour: literal substring match. UUID call sites never
85
+ // collide; this test locks the rule for synthetic IDs so a future change
86
+ // (e.g. word-boundary regex) is intentional.
87
+ const events = await rater.rate({
88
+ agentId: "agent-x",
89
+ retrievedMemoryIds: ["mem-A", "mem-AB"],
90
+ evidence: "mem-AB only",
91
+ });
92
+ const a = events.find((e) => e.memoryId === "mem-A")!;
93
+ const ab = events.find((e) => e.memoryId === "mem-AB")!;
94
+ expect(a.signal).toBe(1);
95
+ expect(ab.signal).toBe(1);
96
+ });
97
+ });
98
+
99
+ // ─────────────────────────────────────────────────────────────────────────────
100
+ // Integration tests — DB-backed end-to-end through the same path as
101
+ // store-progress.ts §5 fires.
102
+ // ─────────────────────────────────────────────────────────────────────────────
103
+
104
+ describe("retrieval → ImplicitCitationRater → posterior shift", () => {
105
+ const agentId = "aaaa0000-0000-4000-8000-000000000ic1";
106
+ const taskId = "00000000-0000-4000-8000-0000000ic001";
107
+ const taskIdMiss = "00000000-0000-4000-8000-0000000ic002";
108
+ let store: SqliteMemoryStore;
109
+
110
+ beforeAll(async () => {
111
+ for (const suffix of ["", "-wal", "-shm"]) {
112
+ try {
113
+ await unlink(TEST_DB_PATH + suffix);
114
+ } catch {}
115
+ }
116
+ initDb(TEST_DB_PATH);
117
+ createAgent({ id: agentId, name: "Citation Test Agent", isLead: false, status: "idle" });
118
+ const insertTask = getDb().prepare(
119
+ `INSERT INTO agent_tasks (id, agentId, task, status, source, createdAt, lastUpdatedAt)
120
+ VALUES (?, ?, ?, 'in_progress', 'mcp', ?, ?)`,
121
+ );
122
+ const nowIso = new Date().toISOString();
123
+ insertTask.run(taskId, agentId, "test task with citation", nowIso, nowIso);
124
+ insertTask.run(taskIdMiss, agentId, "test task without citation", nowIso, nowIso);
125
+ store = new SqliteMemoryStore();
126
+ });
127
+
128
+ afterAll(async () => {
129
+ closeDb();
130
+ for (const suffix of ["", "-wal", "-shm"]) {
131
+ try {
132
+ await unlink(TEST_DB_PATH + suffix);
133
+ } catch {}
134
+ }
135
+ });
136
+
137
+ beforeEach(() => {
138
+ getDb().run("DELETE FROM memory_rating");
139
+ getDb().run("DELETE FROM memory_retrieval");
140
+ getDb().run("DELETE FROM session_logs");
141
+ getDb().run("UPDATE agent_memory SET alpha = 1.0, beta = 1.0");
142
+ });
143
+
144
+ function makeMemory(name: string): { id: string } {
145
+ const memory = store.store({
146
+ agentId,
147
+ scope: "agent",
148
+ name,
149
+ content: `${name} content`,
150
+ source: "manual",
151
+ });
152
+ return { id: memory.id };
153
+ }
154
+
155
+ function readPosterior(id: string): { alpha: number; beta: number } {
156
+ const row = getDb()
157
+ .prepare<{ alpha: number; beta: number }, [string]>(
158
+ "SELECT alpha, beta FROM agent_memory WHERE id = ?",
159
+ )
160
+ .get(id);
161
+ if (!row) throw new Error(`memory ${id} not found`);
162
+ return { alpha: row.alpha, beta: row.beta };
163
+ }
164
+
165
+ function getRatings(taskId: string) {
166
+ return getDb()
167
+ .prepare<
168
+ {
169
+ memoryId: string;
170
+ source: string;
171
+ signal: number;
172
+ weight: number;
173
+ },
174
+ [string]
175
+ >("SELECT memoryId, source, signal, weight FROM memory_rating WHERE taskId = ?")
176
+ .all(taskId);
177
+ }
178
+
179
+ test("recordRetrievals writes one row per result for the task", () => {
180
+ const m1 = makeMemory("retrieval-target-1");
181
+ const m2 = makeMemory("retrieval-target-2");
182
+ recordRetrievals(taskId, agentId, [
183
+ { memoryId: m1.id, similarity: 0.9 },
184
+ { memoryId: m2.id, similarity: 0.7 },
185
+ ]);
186
+ const rows = getRetrievalsForTask(taskId);
187
+ expect(rows).toHaveLength(2);
188
+ expect(rows.map((r) => r.memoryId).sort()).toEqual([m1.id, m2.id].sort());
189
+ });
190
+
191
+ test("recordRetrievals is a no-op when taskId is undefined", () => {
192
+ const m = makeMemory("no-task");
193
+ recordRetrievals(undefined, agentId, [{ memoryId: m.id, similarity: 0.9 }]);
194
+ const rows = getDb().prepare("SELECT COUNT(*) as n FROM memory_retrieval").get() as {
195
+ n: number;
196
+ };
197
+ expect(rows.n).toBe(0);
198
+ });
199
+
200
+ test("recordRetrievals is a no-op when results is empty", () => {
201
+ recordRetrievals(taskId, agentId, []);
202
+ const rows = getDb().prepare("SELECT COUNT(*) as n FROM memory_retrieval").get() as {
203
+ n: number;
204
+ };
205
+ expect(rows.n).toBe(0);
206
+ });
207
+
208
+ test("end-to-end: cited memory shifts alpha by 0.5; uncited memory shifts beta by 0.25", async () => {
209
+ const cited = makeMemory("cited");
210
+ const uncited = makeMemory("uncited");
211
+
212
+ // 1. Search-time: log the retrievals.
213
+ recordRetrievals(taskId, agentId, [
214
+ { memoryId: cited.id, similarity: 0.9 },
215
+ { memoryId: uncited.id, similarity: 0.85 },
216
+ ]);
217
+
218
+ // 2. During the task: session_logs accumulate text mentioning ONE of them.
219
+ createSessionLogs({
220
+ taskId,
221
+ sessionId: "session-1",
222
+ iteration: 1,
223
+ cli: "claude",
224
+ lines: [`Looking up memory ${cited.id} for context.`, "Doing the work."],
225
+ });
226
+
227
+ // 3. Task completion: simulate the store-progress server-rater fire.
228
+ const retrievals = getRetrievalsForTask(taskId);
229
+ const retrievedMemoryIds = retrievals.map((r) => r.memoryId);
230
+ const evidence = getDb()
231
+ .prepare<{ content: string }, [string]>(
232
+ "SELECT content FROM session_logs WHERE taskId = ? ORDER BY iteration, lineNumber",
233
+ )
234
+ .all(taskId)
235
+ .map((row) => row.content)
236
+ .join("\n");
237
+
238
+ const rater = new ImplicitCitationRater();
239
+ const events = await rater.rate({
240
+ taskId,
241
+ agentId,
242
+ retrievedMemoryIds,
243
+ evidence,
244
+ });
245
+ const stamped: RatingEvent[] = events.map((e) => ({ ...e, source: rater.name }));
246
+ const result = applyRating(stamped, { taskId });
247
+ expect(result.applied).toBe(2);
248
+
249
+ // 4. Posteriors moved as documented.
250
+ expect(readPosterior(cited.id)).toEqual({ alpha: 1.5, beta: 1.0 });
251
+ expect(readPosterior(uncited.id)).toEqual({ alpha: 1.0, beta: 1.25 });
252
+
253
+ // 5. Audit rows written with `source = 'implicit-citation'`.
254
+ const ratings = getRatings(taskId);
255
+ expect(ratings).toHaveLength(2);
256
+ for (const r of ratings) {
257
+ expect(r.source).toBe("implicit-citation");
258
+ }
259
+ const citedRow = ratings.find((r) => r.memoryId === cited.id)!;
260
+ const uncitedRow = ratings.find((r) => r.memoryId === uncited.id)!;
261
+ expect(citedRow).toMatchObject({ signal: 1, weight: 0.5 });
262
+ expect(uncitedRow).toMatchObject({ signal: -1, weight: 0.25 });
263
+ });
264
+
265
+ test("negative path: no citation in session_logs → only beta moves", async () => {
266
+ const m = makeMemory("never-cited");
267
+ recordRetrievals(taskIdMiss, agentId, [{ memoryId: m.id, similarity: 0.9 }]);
268
+ createSessionLogs({
269
+ taskId: taskIdMiss,
270
+ sessionId: "session-2",
271
+ iteration: 1,
272
+ cli: "claude",
273
+ lines: ["completely unrelated content"],
274
+ });
275
+
276
+ const rater = new ImplicitCitationRater();
277
+ const events = await rater.rate({
278
+ taskId: taskIdMiss,
279
+ agentId,
280
+ retrievedMemoryIds: [m.id],
281
+ evidence: "completely unrelated content",
282
+ });
283
+ const stamped: RatingEvent[] = events.map((e) => ({ ...e, source: rater.name }));
284
+ applyRating(stamped, { taskId: taskIdMiss });
285
+
286
+ expect(readPosterior(m.id)).toEqual({ alpha: 1.0, beta: 1.25 });
287
+ });
288
+
289
+ test("registry: implicit-citation is in SERVER_RATERS and instantiable via MEMORY_RATERS", () => {
290
+ expect(SERVER_RATERS.has("implicit-citation")).toBe(true);
291
+
292
+ const previous = process.env.MEMORY_RATERS;
293
+ process.env.MEMORY_RATERS = "implicit-citation";
294
+ try {
295
+ const raters = getRegisteredRaters();
296
+ expect(raters.map((r) => r.name)).toContain("implicit-citation");
297
+ // Multiplier defaults to 1.0 when MEMORY_RATER_WEIGHTS is unset.
298
+ expect(getRaterWeightMultiplier("implicit-citation")).toBe(1.0);
299
+ } finally {
300
+ if (previous === undefined) delete process.env.MEMORY_RATERS;
301
+ else process.env.MEMORY_RATERS = previous;
302
+ }
303
+ });
304
+ });
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Unit tests for the `runMemoryRater` helper extracted from `src/hooks/hook.ts`
3
+ * in the PR #450 review-feedback amendment. The helper owns the OpenRouter
4
+ * direct-HTTP path: request shape (model, `response_format: json_object`,
5
+ * Authorization header), tolerant JSON parse on the assistant content, schema
6
+ * validation, and the env-driven model override.
7
+ *
8
+ * All tests stub `fetch` — no network calls.
9
+ */
10
+ import { describe, expect, test } from "bun:test";
11
+ import {
12
+ DEFAULT_MEMORY_RATER_MODEL,
13
+ getMemoryRaterModel,
14
+ MEMORY_RATER_JSON_SCHEMA,
15
+ MEMORY_RATER_SCHEMA_NAME,
16
+ runMemoryRater,
17
+ tryParseLooseJson,
18
+ } from "../be/memory/raters/llm-summarizer";
19
+
20
+ function makeOpenRouterResponse(content: string, init: ResponseInit = { status: 200 }): Response {
21
+ return new Response(
22
+ JSON.stringify({
23
+ choices: [{ message: { role: "assistant", content } }],
24
+ }),
25
+ {
26
+ status: 200,
27
+ headers: { "Content-Type": "application/json" },
28
+ ...init,
29
+ },
30
+ );
31
+ }
32
+
33
+ describe("getMemoryRaterModel", () => {
34
+ test("returns the default when MEMORY_RATER_MODEL is unset", () => {
35
+ expect(getMemoryRaterModel({})).toBe(DEFAULT_MEMORY_RATER_MODEL);
36
+ expect(DEFAULT_MEMORY_RATER_MODEL).toBe("google/gemini-3-flash-preview");
37
+ });
38
+
39
+ test("returns the env override when set", () => {
40
+ expect(getMemoryRaterModel({ MEMORY_RATER_MODEL: "anthropic/claude-haiku-4.5" })).toBe(
41
+ "anthropic/claude-haiku-4.5",
42
+ );
43
+ });
44
+
45
+ test("trims whitespace in the env override", () => {
46
+ expect(getMemoryRaterModel({ MEMORY_RATER_MODEL: " openai/gpt-5-mini " })).toBe(
47
+ "openai/gpt-5-mini",
48
+ );
49
+ });
50
+
51
+ test("falls back to the default when env var is empty / whitespace-only", () => {
52
+ expect(getMemoryRaterModel({ MEMORY_RATER_MODEL: "" })).toBe(DEFAULT_MEMORY_RATER_MODEL);
53
+ expect(getMemoryRaterModel({ MEMORY_RATER_MODEL: " " })).toBe(DEFAULT_MEMORY_RATER_MODEL);
54
+ });
55
+
56
+ test("respects process.env when no env arg is provided", () => {
57
+ const prev = process.env.MEMORY_RATER_MODEL;
58
+ process.env.MEMORY_RATER_MODEL = "fake/model-from-process-env";
59
+ try {
60
+ expect(getMemoryRaterModel()).toBe("fake/model-from-process-env");
61
+ } finally {
62
+ if (prev === undefined) delete process.env.MEMORY_RATER_MODEL;
63
+ else process.env.MEMORY_RATER_MODEL = prev;
64
+ }
65
+ });
66
+ });
67
+
68
+ describe("tryParseLooseJson", () => {
69
+ test("strict JSON parses unchanged", () => {
70
+ expect(tryParseLooseJson('{"a":1}')).toEqual({ a: 1 });
71
+ });
72
+
73
+ test("strips ```json fences", () => {
74
+ expect(tryParseLooseJson('```json\n{"a":1}\n```')).toEqual({ a: 1 });
75
+ });
76
+
77
+ test("strips plain ``` fences", () => {
78
+ expect(tryParseLooseJson('```\n{"a":1}\n```')).toEqual({ a: 1 });
79
+ });
80
+
81
+ test("recovers from prose preamble via brace-slice", () => {
82
+ expect(tryParseLooseJson('Here you go: {"a":1}')).toEqual({ a: 1 });
83
+ });
84
+
85
+ test("recovers from preamble + trailing chatter via brace-slice", () => {
86
+ expect(tryParseLooseJson('preamble\n{"a":1}\nthanks')).toEqual({ a: 1 });
87
+ });
88
+
89
+ test("returns null on genuine garbage", () => {
90
+ expect(tryParseLooseJson("not json at all")).toBeNull();
91
+ });
92
+
93
+ test("returns null on broken JSON inside fences", () => {
94
+ expect(tryParseLooseJson("```json\n{broken,}\n```")).toBeNull();
95
+ });
96
+
97
+ test("never throws — even on adversarial input", () => {
98
+ expect(() => tryParseLooseJson("{[}}}}")).not.toThrow();
99
+ expect(() => tryParseLooseJson("```")).not.toThrow();
100
+ expect(() => tryParseLooseJson("")).not.toThrow();
101
+ });
102
+ });
103
+
104
+ describe("runMemoryRater — request shape", () => {
105
+ test("POSTs to OpenRouter chat-completions with the right model, strict json_schema response_format, and Authorization header", async () => {
106
+ let capturedUrl: string | URL | Request | undefined;
107
+ let capturedInit: RequestInit | undefined;
108
+ const fakeFetch: typeof fetch = async (url, init) => {
109
+ capturedUrl = url;
110
+ capturedInit = init;
111
+ return makeOpenRouterResponse(JSON.stringify({ summary: "ok", ratings: [] }));
112
+ };
113
+
114
+ const result = await runMemoryRater({
115
+ prompt: "test prompt",
116
+ apiKey: "test-api-key-123",
117
+ fetchImpl: fakeFetch,
118
+ });
119
+
120
+ expect(result.ok).toBe(true);
121
+ if (!result.ok) return;
122
+
123
+ expect(String(capturedUrl)).toBe("https://openrouter.ai/api/v1/chat/completions");
124
+ expect(capturedInit?.method).toBe("POST");
125
+
126
+ const headers = capturedInit?.headers as Record<string, string>;
127
+ expect(headers["Content-Type"]).toBe("application/json");
128
+ expect(headers.Authorization).toBe("Bearer test-api-key-123");
129
+
130
+ const body = JSON.parse(String(capturedInit?.body));
131
+ expect(body.model).toBe(DEFAULT_MEMORY_RATER_MODEL);
132
+ expect(body.messages).toEqual([{ role: "user", content: "test prompt" }]);
133
+
134
+ // OpenRouter strict json_schema mode — assert the wrapper shape.
135
+ expect(body.response_format.type).toBe("json_schema");
136
+ expect(body.response_format.json_schema.name).toBe(MEMORY_RATER_SCHEMA_NAME);
137
+ expect(body.response_format.json_schema.strict).toBe(true);
138
+ // Schema is the canonical one derived from SummaryWithRatingsSchema.
139
+ expect(body.response_format.json_schema.schema).toEqual(MEMORY_RATER_JSON_SCHEMA);
140
+ });
141
+
142
+ test("MEMORY_RATER_JSON_SCHEMA reflects SummaryWithRatingsSchema (key shape only)", () => {
143
+ // Don't assert exact JSON Schema bytes — Zod's emitter can change with
144
+ // version bumps. Lock down the contract that matters for the OpenRouter
145
+ // call: top-level keys, required fields, additionalProperties: false,
146
+ // and the per-rating shape.
147
+ expect(MEMORY_RATER_JSON_SCHEMA.type).toBe("object");
148
+ expect(MEMORY_RATER_JSON_SCHEMA.additionalProperties).toBe(false);
149
+ expect(Array.isArray(MEMORY_RATER_JSON_SCHEMA.required)).toBe(true);
150
+ expect(MEMORY_RATER_JSON_SCHEMA.required as string[]).toEqual(
151
+ expect.arrayContaining(["summary", "ratings"]),
152
+ );
153
+ const props = MEMORY_RATER_JSON_SCHEMA.properties as Record<string, Record<string, unknown>>;
154
+ expect(props.summary.type).toBe("string");
155
+ expect(props.ratings.type).toBe("array");
156
+ const items = props.ratings.items as Record<string, unknown>;
157
+ expect(items.type).toBe("object");
158
+ expect(items.additionalProperties).toBe(false);
159
+ const itemProps = items.properties as Record<string, Record<string, unknown>>;
160
+ expect(itemProps.id.type).toBe("string");
161
+ expect(itemProps.score.type).toBe("number");
162
+ expect(itemProps.score.minimum).toBe(0);
163
+ expect(itemProps.score.maximum).toBe(1);
164
+ expect(itemProps.reasoning.type).toBe("string");
165
+ // referencesSource is optional → present in properties but not required.
166
+ expect(itemProps.referencesSource.type).toBe("string");
167
+ expect(items.required as string[]).toEqual(
168
+ expect.arrayContaining(["id", "score", "reasoning"]),
169
+ );
170
+ expect((items.required as string[]).includes("referencesSource")).toBe(false);
171
+ });
172
+
173
+ test("schema does NOT carry a $schema metadata key (OpenRouter rejects extras at the root)", () => {
174
+ expect("$schema" in MEMORY_RATER_JSON_SCHEMA).toBe(false);
175
+ });
176
+
177
+ test("explicit `model` opt overrides the env default", async () => {
178
+ let capturedBody: { model?: string } = {};
179
+ const fakeFetch: typeof fetch = async (_url, init) => {
180
+ capturedBody = JSON.parse(String(init?.body));
181
+ return makeOpenRouterResponse(JSON.stringify({ summary: "x", ratings: [] }));
182
+ };
183
+
184
+ const result = await runMemoryRater({
185
+ prompt: "p",
186
+ apiKey: "k",
187
+ model: "anthropic/claude-haiku-4.5",
188
+ fetchImpl: fakeFetch,
189
+ });
190
+ expect(result.ok).toBe(true);
191
+ if (result.ok) expect(result.model).toBe("anthropic/claude-haiku-4.5");
192
+ expect(capturedBody.model).toBe("anthropic/claude-haiku-4.5");
193
+ });
194
+
195
+ test("MEMORY_RATER_MODEL env var changes the model when no opt is passed", async () => {
196
+ const prev = process.env.MEMORY_RATER_MODEL;
197
+ process.env.MEMORY_RATER_MODEL = "openai/gpt-5-mini";
198
+ try {
199
+ let capturedBody: { model?: string } = {};
200
+ const fakeFetch: typeof fetch = async (_url, init) => {
201
+ capturedBody = JSON.parse(String(init?.body));
202
+ return makeOpenRouterResponse(JSON.stringify({ summary: "x", ratings: [] }));
203
+ };
204
+ const result = await runMemoryRater({
205
+ prompt: "p",
206
+ apiKey: "k",
207
+ fetchImpl: fakeFetch,
208
+ });
209
+ expect(result.ok).toBe(true);
210
+ if (result.ok) expect(result.model).toBe("openai/gpt-5-mini");
211
+ expect(capturedBody.model).toBe("openai/gpt-5-mini");
212
+ } finally {
213
+ if (prev === undefined) delete process.env.MEMORY_RATER_MODEL;
214
+ else process.env.MEMORY_RATER_MODEL = prev;
215
+ }
216
+ });
217
+ });
218
+
219
+ describe("runMemoryRater — response handling", () => {
220
+ test("happy path — strict JSON content parses + validates", async () => {
221
+ const fakeFetch: typeof fetch = async () =>
222
+ makeOpenRouterResponse(
223
+ JSON.stringify({
224
+ summary: "found two patterns",
225
+ ratings: [{ id: "mem-A", score: 0.9, reasoning: "directly answered" }],
226
+ }),
227
+ );
228
+
229
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
230
+ expect(result.ok).toBe(true);
231
+ if (!result.ok) return;
232
+ expect(result.data.summary).toBe("found two patterns");
233
+ expect(result.data.ratings).toHaveLength(1);
234
+ expect(result.data.ratings[0]!.score).toBeCloseTo(0.9, 6);
235
+ });
236
+
237
+ test("tolerant parser recovers from ```json fences (PR #447 regression)", async () => {
238
+ const inner = JSON.stringify({
239
+ summary: "fenced summary",
240
+ ratings: [{ id: "m", score: 0.7, reasoning: "useful" }],
241
+ });
242
+ const fakeFetch: typeof fetch = async () =>
243
+ makeOpenRouterResponse(`\`\`\`json\n${inner}\n\`\`\``);
244
+
245
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
246
+ expect(result.ok).toBe(true);
247
+ if (!result.ok) return;
248
+ expect(result.data.summary).toBe("fenced summary");
249
+ expect(result.data.ratings[0]!.score).toBeCloseTo(0.7, 6);
250
+ });
251
+
252
+ test("tolerant parser recovers from prose preamble (PR #447 regression)", async () => {
253
+ const inner = JSON.stringify({
254
+ summary: "preambled summary",
255
+ ratings: [{ id: "m", score: 0, reasoning: "irrelevant" }],
256
+ });
257
+ const fakeFetch: typeof fetch = async () =>
258
+ makeOpenRouterResponse(`Here is the JSON:\n\n${inner}`);
259
+
260
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
261
+ expect(result.ok).toBe(true);
262
+ if (!result.ok) return;
263
+ expect(result.data.summary).toBe("preambled summary");
264
+ });
265
+
266
+ test("schema-invalid content returns ok:false / reason:'schema'", async () => {
267
+ // score = 5 violates the [0, 1] range in SummaryWithRatingsSchema.
268
+ const fakeFetch: typeof fetch = async () =>
269
+ makeOpenRouterResponse(
270
+ JSON.stringify({
271
+ summary: "x",
272
+ ratings: [{ id: "m", score: 5, reasoning: "bogus" }],
273
+ }),
274
+ );
275
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
276
+ expect(result.ok).toBe(false);
277
+ if (!result.ok) expect(result.reason).toBe("schema");
278
+ });
279
+
280
+ test("genuinely garbage content returns ok:false / reason:'parse'", async () => {
281
+ const fakeFetch: typeof fetch = async () =>
282
+ makeOpenRouterResponse("totally not JSON at all, just words");
283
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
284
+ expect(result.ok).toBe(false);
285
+ if (!result.ok) expect(result.reason).toBe("parse");
286
+ });
287
+
288
+ test("HTTP 5xx returns ok:false / reason:'http_error' with status", async () => {
289
+ const fakeFetch: typeof fetch = async () => new Response("upstream blew up", { status: 502 });
290
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
291
+ expect(result.ok).toBe(false);
292
+ if (!result.ok) {
293
+ expect(result.reason).toBe("http_error");
294
+ expect(result.status).toBe(502);
295
+ }
296
+ });
297
+
298
+ test("transport failure returns ok:false / reason:'transport'", async () => {
299
+ const fakeFetch: typeof fetch = async () => {
300
+ throw new Error("ECONNREFUSED");
301
+ };
302
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
303
+ expect(result.ok).toBe(false);
304
+ if (!result.ok) expect(result.reason).toBe("transport");
305
+ });
306
+
307
+ test("missing choices[0].message.content returns ok:false / reason:'empty_content'", async () => {
308
+ const fakeFetch: typeof fetch = async () =>
309
+ new Response(JSON.stringify({ choices: [] }), {
310
+ status: 200,
311
+ headers: { "Content-Type": "application/json" },
312
+ });
313
+ const result = await runMemoryRater({ prompt: "p", apiKey: "k", fetchImpl: fakeFetch });
314
+ expect(result.ok).toBe(false);
315
+ if (!result.ok) expect(result.reason).toBe("empty_content");
316
+ });
317
+ });