@desplega.ai/agent-swarm 1.75.0 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +973 -36
  3. package/package.json +2 -2
  4. package/src/be/db.ts +527 -9
  5. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  6. package/src/be/memory/raters/llm.ts +56 -75
  7. package/src/be/memory/retrieval-store.ts +21 -0
  8. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  9. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  10. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  11. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  12. package/src/be/migrations/058_task_templates.sql +31 -0
  13. package/src/be/swarm-config-guard.ts +24 -0
  14. package/src/commands/credential-wait.ts +1 -1
  15. package/src/commands/provider-credentials.ts +434 -0
  16. package/src/commands/runner.ts +229 -42
  17. package/src/hooks/hook.ts +115 -95
  18. package/src/http/agents.ts +82 -2
  19. package/src/http/config.ts +11 -1
  20. package/src/http/inbox-state.ts +89 -0
  21. package/src/http/index.ts +10 -0
  22. package/src/http/sessions.ts +86 -0
  23. package/src/http/status.ts +665 -0
  24. package/src/http/task-templates.ts +51 -0
  25. package/src/http/tasks.ts +85 -5
  26. package/src/http/users.ts +134 -0
  27. package/src/providers/claude-adapter.ts +5 -0
  28. package/src/providers/codex-adapter.ts +1 -1
  29. package/src/providers/index.ts +1 -1
  30. package/src/slack/handlers.ts +0 -1
  31. package/src/tests/agents-harness-provider.test.ts +333 -0
  32. package/src/tests/credential-check.test.ts +32 -1
  33. package/src/tests/credential-status-api.test.ts +42 -0
  34. package/src/tests/harness-provider-resolution.test.ts +242 -0
  35. package/src/tests/jira-sync.test.ts +1 -1
  36. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  37. package/src/tests/memory-rater-llm.test.ts +265 -107
  38. package/src/tests/migration-runner-regressions.test.ts +17 -2
  39. package/src/tests/sessions.test.ts +141 -0
  40. package/src/tests/status.test.ts +843 -0
  41. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  42. package/src/tests/template-recommendations.test.ts +148 -0
  43. package/src/tests/use-dismissible-card.test.ts +140 -0
  44. package/src/tools/swarm-config/set-config.ts +17 -1
  45. package/src/types.ts +117 -0
  46. package/src/utils/harness-provider.ts +32 -0
  47. package/tsconfig.json +0 -2
  48. package/src/providers/credentials.ts +0 -74
@@ -8,9 +8,10 @@
8
8
  * mapping, prompt construction.
9
9
  * 2. `LlmRater.rate(ctx)` per-memory path with `MockLlmRaterClient`.
10
10
  * 3. HTTP integration: spawn the API server against an isolated SQLite
11
- * file, simulate the hook's piggyback flow (mock `claude -p` by feeding
12
- * stdout directly into `parseSummaryWithRatings`), and assert
13
- * `agent_memory.alpha/beta` move + `memory_rating` rows are written.
11
+ * file, simulate the hook's piggyback flow (`generateObject` is mocked
12
+ * by feeding the parsed object directly into `buildRatingsFromLlm`),
13
+ * and assert `agent_memory.alpha/beta` move + `memory_rating` rows are
14
+ * written.
14
15
  * 4. Negative path: `MEMORY_RATERS` unset → no `/api/memory/rate` call.
15
16
  */
16
17
  import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
@@ -22,13 +23,13 @@ import { SqliteMemoryStore } from "../be/memory/providers/sqlite-store";
22
23
  import {
23
24
  buildRatingsFromLlm,
24
25
  buildSummaryWithRatingsPrompt,
25
- extractSummaryFromClaudeStdout,
26
+ dedupeRetrievalsForRater,
26
27
  fetchRetrievalsForTask,
27
28
  isLlmRaterEnabled,
28
29
  LLM_RATER_WEIGHT,
29
30
  LlmRater,
30
- parseSummaryWithRatings,
31
31
  postRatings,
32
+ type RetrievalRow,
32
33
  SummaryWithRatingsSchema,
33
34
  } from "../be/memory/raters/llm";
34
35
  import { getRegisteredRaters, SERVER_RATERS } from "../be/memory/raters/registry";
@@ -210,108 +211,153 @@ describe("buildSummaryWithRatingsPrompt", () => {
210
211
  });
211
212
  });
212
213
 
213
- describe("parseSummaryWithRatings", () => {
214
- test("parses a well-formed claude -p envelope (inner JSON as string)", () => {
215
- const inner = JSON.stringify({
216
- summary: "S",
217
- ratings: [{ id: "m", score: 0.5, reasoning: "ok" }],
218
- });
219
- const envelope = JSON.stringify({ result: inner });
220
- const out = parseSummaryWithRatings(envelope);
221
- expect(out).not.toBeNull();
222
- expect(out?.summary).toBe("S");
223
- expect(out?.ratings).toHaveLength(1);
224
- });
214
+ describe("dedupeRetrievalsForRater", () => {
215
+ // Regression: the LLM rater audit (post-PR #450) found scheduled-task self-
216
+ // similarity inflated alpha posteriors 5x in one rater pass — the Claude
217
+ // Code Changelog Monitor cron surfaced 5 memories from prior hourly runs
218
+ // and got each rated +1.0. Dedup keys on `scheduleId` so only memories
219
+ // from the same scheduled job collapse; distinct one-shot tasks pass
220
+ // through even when their truncated 80-char names collide.
221
+
222
+ test("happy path: 5 cron memories sharing scheduleId + 1 distinct → 2 rows", () => {
223
+ const cronName = "Task: Claude Code Changelog Monitor — check for new entries";
224
+ const cronScheduleId = "sched-claude-code-changelog";
225
+ const rows: RetrievalRow[] = [
226
+ // Newest cron run first (API returns DESC by retrievedAt).
227
+ {
228
+ id: "cron-5",
229
+ name: cronName,
230
+ content: "run 5",
231
+ scheduleId: cronScheduleId,
232
+ retrievedAt: "2026-05-08T05:00:00Z",
233
+ },
234
+ {
235
+ id: "cron-4",
236
+ name: cronName,
237
+ content: "run 4",
238
+ scheduleId: cronScheduleId,
239
+ retrievedAt: "2026-05-08T04:00:00Z",
240
+ },
241
+ {
242
+ id: "cron-3",
243
+ name: cronName,
244
+ content: "run 3",
245
+ scheduleId: cronScheduleId,
246
+ retrievedAt: "2026-05-08T03:00:00Z",
247
+ },
248
+ {
249
+ id: "cron-2",
250
+ name: cronName,
251
+ content: "run 2",
252
+ scheduleId: cronScheduleId,
253
+ retrievedAt: "2026-05-08T02:00:00Z",
254
+ },
255
+ {
256
+ id: "cron-1",
257
+ name: cronName,
258
+ content: "run 1",
259
+ scheduleId: cronScheduleId,
260
+ retrievedAt: "2026-05-08T01:00:00Z",
261
+ },
262
+ // Different one-shot task — null scheduleId, must pass through.
263
+ {
264
+ id: "distinct",
265
+ name: "Task: Refactor MCP tool list",
266
+ content: "x",
267
+ scheduleId: null,
268
+ retrievedAt: "2026-05-07T12:00:00Z",
269
+ },
270
+ ];
225
271
 
226
- test("parses an envelope where `result` is an object (not stringified)", () => {
227
- const envelope = JSON.stringify({
228
- result: { summary: "S", ratings: [{ id: "m", score: 1, reasoning: "yes" }] },
229
- });
230
- const out = parseSummaryWithRatings(envelope);
231
- expect(out).not.toBeNull();
232
- if (!out) return;
233
- expect(out.ratings[0]!.score).toBe(1);
234
- });
272
+ const out = dedupeRetrievalsForRater(rows);
235
273
 
236
- test("returns null when envelope is not JSON", () => {
237
- expect(parseSummaryWithRatings("not json")).toBeNull();
274
+ expect(out).toHaveLength(2);
275
+ // First-seen wins → freshest cron run is the representative.
276
+ expect(out.map((r) => r.id)).toEqual(["cron-5", "distinct"]);
238
277
  });
239
278
 
240
- test("returns null when inner is not JSON", () => {
241
- const envelope = JSON.stringify({ result: "this is not json either" });
242
- expect(parseSummaryWithRatings(envelope)).toBeNull();
279
+ test("two distinct one-shot tasks sharing the truncated 80-char name prefix → both kept", () => {
280
+ // Reviewer's flagged false-positive: `Task: ${task.task.slice(0, 80)}`
281
+ // collapses two distinct tasks whose first 80 chars happen to match. With
282
+ // scheduleId-keyed dedup, both have `null` scheduleId and pass through.
283
+ const sharedPrefix = `Task: ${"x".repeat(80)}`;
284
+ const rows: RetrievalRow[] = [
285
+ {
286
+ id: "task-a",
287
+ name: sharedPrefix,
288
+ content: `Task: ${"x".repeat(80)} unique-suffix-A\n\nOutput:\n…`,
289
+ scheduleId: null,
290
+ retrievedAt: "2026-05-08T05:00:00Z",
291
+ },
292
+ {
293
+ id: "task-b",
294
+ name: sharedPrefix,
295
+ content: `Task: ${"x".repeat(80)} unique-suffix-B\n\nOutput:\n…`,
296
+ scheduleId: null,
297
+ retrievedAt: "2026-05-08T04:00:00Z",
298
+ },
299
+ ];
300
+
301
+ const out = dedupeRetrievalsForRater(rows);
302
+
303
+ expect(out).toHaveLength(2);
304
+ expect(out.map((r) => r.id)).toEqual(["task-a", "task-b"]);
243
305
  });
244
306
 
245
- test("returns null when inner fails schema (out-of-range score)", () => {
246
- const inner = JSON.stringify({
247
- summary: "S",
248
- ratings: [{ id: "m", score: 5, reasoning: "bogus" }],
249
- });
250
- const envelope = JSON.stringify({ result: inner });
251
- expect(parseSummaryWithRatings(envelope)).toBeNull();
307
+ test("Task: vs Session: with the same prefix → both kept (different memory types)", () => {
308
+ // Both names share their first 80 chars after the type prefix; both have
309
+ // null scheduleId (one-shot work). Must pass through.
310
+ const sharedSuffix = "Refactor MCP tool list to use deferred discovery";
311
+ const rows: RetrievalRow[] = [
312
+ {
313
+ id: "task",
314
+ name: `Task: ${sharedSuffix}`,
315
+ content: "task body",
316
+ source: "task_completion",
317
+ scheduleId: null,
318
+ retrievedAt: "2026-05-08T05:00:00Z",
319
+ },
320
+ {
321
+ id: "session",
322
+ name: `Session: ${sharedSuffix}`,
323
+ content: "session summary",
324
+ source: "session_summary",
325
+ scheduleId: null,
326
+ retrievedAt: "2026-05-08T04:00:00Z",
327
+ },
328
+ ];
329
+
330
+ const out = dedupeRetrievalsForRater(rows);
331
+
332
+ expect(out).toHaveLength(2);
333
+ expect(out.map((r) => r.id)).toEqual(["task", "session"]);
252
334
  });
253
- });
254
335
 
255
- describe("extractSummaryFromClaudeStdout (hook fallback path)", () => {
256
- // Regression: PR #429 review feedback. When the structured-output piggyback
257
- // returns a valid envelope but the inner ratings fail SummaryWithRatingsSchema,
258
- // the hook MUST index the human-readable `summary` text not the raw inner
259
- // JSON blob. See src/hooks/hook.ts ~L1148.
260
- test("structured envelope with invalid ratings extracts inner summary string", () => {
261
- const summaryText = "Found a couple of helpful patterns; one was misleading.";
262
- const inner = JSON.stringify({
263
- summary: summaryText,
264
- // Out-of-range score makes SummaryWithRatingsSchema.safeParse fail.
265
- ratings: [{ id: "mem-A", score: 5, reasoning: "bogus" }],
266
- });
267
- const envelope = JSON.stringify({ result: inner });
268
- expect(parseSummaryWithRatings(envelope)).toBeNull();
269
- const out = extractSummaryFromClaudeStdout(envelope);
270
- expect(out).toBe(summaryText);
271
- // Hard guarantee for the indexer: must NOT be raw JSON.
272
- expect(out.startsWith("{")).toBe(false);
273
- expect(out.includes('"ratings"')).toBe(false);
274
- });
275
-
276
- test("structured envelope missing the `ratings` field entirely → extracts summary", () => {
277
- const summaryText = "No retrievals this session.";
278
- const inner = JSON.stringify({ summary: summaryText });
279
- const envelope = JSON.stringify({ result: inner });
280
- const out = extractSummaryFromClaudeStdout(envelope);
281
- expect(out).toBe(summaryText);
282
- });
283
-
284
- test("structured envelope with non-string summary field → falls through to inner string", () => {
285
- // Defensive: if `summary` itself is malformed, we still don't crash; the
286
- // best-effort fallback is to return the inner JSON as a string. The
287
- // length/keyword heuristics in the hook will likely skip indexing.
288
- const inner = JSON.stringify({ summary: 42, ratings: [] });
289
- const envelope = JSON.stringify({ result: inner });
290
- const out = extractSummaryFromClaudeStdout(envelope);
291
- expect(out).toBe(inner);
292
- });
293
-
294
- test("unstructured envelope with plain text result → returns the text unchanged", () => {
295
- const text = "- Discovered that the API requires Bearer prefix.\n- No other learnings.";
296
- const envelope = JSON.stringify({ result: text });
297
- expect(extractSummaryFromClaudeStdout(envelope)).toBe(text);
298
- });
299
-
300
- test("envelope.result is an object with a string summary field → extracts it", () => {
301
- const envelope = JSON.stringify({
302
- result: { summary: "object form", ratings: [] },
303
- });
304
- expect(extractSummaryFromClaudeStdout(envelope)).toBe("object form");
336
+ test("two different scheduled jobs surface in the same set → both representatives kept", () => {
337
+ const rows: RetrievalRow[] = [
338
+ { id: "j1-r2", name: "Task: Job One", content: "", scheduleId: "sched-1" },
339
+ { id: "j1-r1", name: "Task: Job One", content: "", scheduleId: "sched-1" },
340
+ { id: "j2-r2", name: "Task: Job Two", content: "", scheduleId: "sched-2" },
341
+ { id: "j2-r1", name: "Task: Job Two", content: "", scheduleId: "sched-2" },
342
+ ];
343
+
344
+ const out = dedupeRetrievalsForRater(rows);
345
+
346
+ expect(out).toHaveLength(2);
347
+ expect(out.map((r) => r.id)).toEqual(["j1-r2", "j2-r2"]);
305
348
  });
306
349
 
307
- test("envelope is not JSON returns the raw stdout", () => {
308
- const stdout = "totally not json";
309
- expect(extractSummaryFromClaudeStdout(stdout)).toBe(stdout);
350
+ test("rows without scheduleId pass through unchanged (manual / file_index memories)", () => {
351
+ const rows: RetrievalRow[] = [
352
+ { id: "m1", name: "Manual note", content: "", source: "manual" },
353
+ { id: "m2", name: "Manual note", content: "", source: "manual" },
354
+ { id: "m3", name: "Indexed file", content: "", source: "file_index", scheduleId: null },
355
+ ];
356
+ expect(dedupeRetrievalsForRater(rows)).toEqual(rows);
310
357
  });
311
358
 
312
- test("envelope is JSON but lacks `result` field returns the raw stdout", () => {
313
- const stdout = JSON.stringify({ other: "field" });
314
- expect(extractSummaryFromClaudeStdout(stdout)).toBe(stdout);
359
+ test("empty inputempty output", () => {
360
+ expect(dedupeRetrievalsForRater([])).toEqual([]);
315
361
  });
316
362
  });
317
363
 
@@ -644,7 +690,7 @@ describe("HTTP integration: hook-piggyback dry-run", () => {
644
690
  expect(rows).toEqual([]);
645
691
  });
646
692
 
647
- test("postRatings → applies events; alpha/beta posteriors move per mocked score", async () => {
693
+ test("postRatings → applies events; alpha/beta posteriors move per mocked generateObject result", async () => {
648
694
  const useful = makeMemory("piggyback-useful");
649
695
  const misleading = makeMemory("piggyback-misleading");
650
696
  const neutral = makeMemory("piggyback-neutral");
@@ -654,7 +700,10 @@ describe("HTTP integration: hook-piggyback dry-run", () => {
654
700
  insertRetrieval(taskA, misleading.id);
655
701
  insertRetrieval(taskA, neutral.id);
656
702
 
657
- // Simulate hook flow: fetch retrievals, mock the LLM stdout, parse, POST.
703
+ // Simulate hook flow: fetch retrievals, run schema validation against a
704
+ // mocked `generateObject` result (object — not stringified envelope —
705
+ // because the AI SDK returns a parsed/validated object directly), then
706
+ // POST.
658
707
  const retrievals = await fetchRetrievalsForTask({
659
708
  apiUrl: BASE,
660
709
  apiKey: API_KEY,
@@ -663,20 +712,22 @@ describe("HTTP integration: hook-piggyback dry-run", () => {
663
712
  });
664
713
  expect(retrievals).toHaveLength(3);
665
714
 
666
- // Mocked claude -p stdout — the same shape parseSummaryWithRatings expects.
667
- const mockedSummaryJson = JSON.stringify({
715
+ const mockedGenerateObjectResult = {
668
716
  summary: "Found a couple of helpful patterns; one memory was misleading.",
669
717
  ratings: [
670
718
  { id: useful.id, score: 1, reasoning: "directly answered the question" },
671
719
  { id: misleading.id, score: 0, reasoning: "this memory contradicted the docs" },
672
720
  { id: neutral.id, score: 0.5, reasoning: "tangential but interesting" },
673
721
  ],
674
- });
675
- const mockedClaudeStdout = JSON.stringify({ result: mockedSummaryJson });
676
- const parsed = parseSummaryWithRatings(mockedClaudeStdout);
677
- expect(parsed).not.toBeNull();
678
-
679
- const events = buildRatingsFromLlm(parsed!.ratings, retrievals);
722
+ };
723
+ // The AI SDK's `generateObject` validates against the Zod schema before
724
+ // returning; mirror that contract here so the test fails fast if the
725
+ // schema drifts.
726
+ const parsed = SummaryWithRatingsSchema.safeParse(mockedGenerateObjectResult);
727
+ expect(parsed.success).toBe(true);
728
+ if (!parsed.success) return;
729
+
730
+ const events = buildRatingsFromLlm(parsed.data.ratings, retrievals);
680
731
  expect(events).toHaveLength(3);
681
732
  for (const e of events) {
682
733
  expect(e.weight).toBe(0.8);
@@ -748,8 +799,8 @@ describe("HTTP integration: hook-piggyback dry-run", () => {
748
799
  delete process.env.MEMORY_RATERS;
749
800
  try {
750
801
  // Mirror the hook's gate: when isLlmRaterEnabled() is false, the hook
751
- // never calls fetchRetrievalsForTask / parseSummaryWithRatings /
752
- // postRatings — it falls back to the existing summary-only path.
802
+ // never calls fetchRetrievalsForTask / generateObject / postRatings —
803
+ // it falls back to the existing summary-only path.
753
804
  let postCalled = false;
754
805
  const fakeFetch: typeof fetch = async () => {
755
806
  postCalled = true;
@@ -803,4 +854,111 @@ describe("HTTP integration: hook-piggyback dry-run", () => {
803
854
  // Posterior unchanged — 400 means nothing was applied.
804
855
  expect(readPosterior(m.id)).toEqual({ alpha: 1.0, beta: 1.0 });
805
856
  });
857
+
858
+ test("OPENROUTER_API_KEY unset → hook is a no-op (no fetch, no index, no rate POST)", async () => {
859
+ const m = makeMemory("piggyback-openrouter-unset");
860
+ insertRetrieval(taskA, m.id);
861
+
862
+ // Mirror the hook's outer gate exactly: when OPENROUTER_API_KEY is unset,
863
+ // the entire summary + rating block must early-return. No call to
864
+ // /api/memory/index, no call to /api/memory/rate, no LLM invocation.
865
+ const prev = process.env.OPENROUTER_API_KEY;
866
+ delete process.env.OPENROUTER_API_KEY;
867
+ try {
868
+ let anyFetchCalled = false;
869
+ const fakeFetch: typeof fetch = async () => {
870
+ anyFetchCalled = true;
871
+ return new Response("{}", { status: 200 });
872
+ };
873
+
874
+ const skip = !process.env.OPENROUTER_API_KEY;
875
+ expect(skip).toBe(true);
876
+
877
+ // The hook block is entirely guarded — no fetch, no postRatings.
878
+ // We never reach fetchRetrievalsForTask or postRatings, so neither is
879
+ // exercised in this branch.
880
+ if (!skip) {
881
+ // Unreachable in this test — defensive assertion only.
882
+ await fetchRetrievalsForTask({
883
+ apiUrl: BASE,
884
+ apiKey: API_KEY,
885
+ agentId: agentA,
886
+ taskId: taskA,
887
+ fetchImpl: fakeFetch,
888
+ });
889
+ }
890
+ expect(anyFetchCalled).toBe(false);
891
+ } finally {
892
+ if (prev !== undefined) process.env.OPENROUTER_API_KEY = prev;
893
+ }
894
+
895
+ // No memory_rating rows for taskA, posterior unchanged.
896
+ expect(getRatings(taskA)).toHaveLength(0);
897
+ expect(readPosterior(m.id)).toEqual({ alpha: 1.0, beta: 1.0 });
898
+ });
899
+
900
+ test("happy path: mocked generateObject result → postRatings called with expected events", async () => {
901
+ const useful = makeMemory("happy-useful");
902
+ const misleading = makeMemory("happy-misleading");
903
+
904
+ insertRetrieval(taskB, useful.id);
905
+ insertRetrieval(taskB, misleading.id);
906
+
907
+ const retrievals = await fetchRetrievalsForTask({
908
+ apiUrl: BASE,
909
+ apiKey: API_KEY,
910
+ agentId: agentA,
911
+ taskId: taskB,
912
+ });
913
+ expect(retrievals).toHaveLength(2);
914
+
915
+ // Stand in for `const { object } = await generateObject(...)` — the AI
916
+ // SDK guarantees `object` is already validated against the Zod schema.
917
+ const generateObjectResult: {
918
+ object: { summary: string; ratings: Array<{ id: string; score: number; reasoning: string }> };
919
+ } = {
920
+ object: {
921
+ summary: "Two patterns surfaced; one was misleading.",
922
+ ratings: [
923
+ { id: useful.id, score: 1, reasoning: "directly answered" },
924
+ { id: misleading.id, score: 0, reasoning: "contradicted the docs" },
925
+ ],
926
+ },
927
+ };
928
+
929
+ // Schema gate is implicit in the SDK, but assert here so a future schema
930
+ // change doesn't silently make this test pass on garbage data.
931
+ const validated = SummaryWithRatingsSchema.parse(generateObjectResult.object);
932
+
933
+ const events = buildRatingsFromLlm(validated.ratings, retrievals);
934
+ expect(events).toHaveLength(2);
935
+ const usefulEvent = events.find((e) => e.memoryId === useful.id)!;
936
+ const misleadingEvent = events.find((e) => e.memoryId === misleading.id)!;
937
+ expect(usefulEvent.signal).toBeCloseTo(1, 6);
938
+ expect(misleadingEvent.signal).toBeCloseTo(-1, 6);
939
+ expect(usefulEvent.source).toBe("llm");
940
+ expect(misleadingEvent.source).toBe("llm");
941
+
942
+ // Track that postRatings actually attempts the POST with our events.
943
+ let postedEvents: RatingEvent[] | null = null;
944
+ const trackingFetch: typeof fetch = async (url, init) => {
945
+ if (typeof url === "string" && url.endsWith("/api/memory/rate")) {
946
+ const body = JSON.parse(String(init?.body ?? "{}"));
947
+ postedEvents = body.events;
948
+ }
949
+ return new Response("{}", { status: 200 });
950
+ };
951
+ const r = await postRatings({
952
+ apiUrl: BASE,
953
+ apiKey: API_KEY,
954
+ agentId: agentA,
955
+ taskId: taskB,
956
+ events,
957
+ fetchImpl: trackingFetch,
958
+ });
959
+ expect(r.ok).toBe(true);
960
+ expect(postedEvents).not.toBeNull();
961
+ expect(postedEvents!).toHaveLength(2);
962
+ expect(postedEvents!.map((e) => e.memoryId).sort()).toEqual([useful.id, misleading.id].sort());
963
+ });
806
964
  });
@@ -71,7 +71,12 @@ describe("migration regressions", () => {
71
71
  expect(columns).toContain("setupScript");
72
72
  });
73
73
 
74
- test("fresh DB preserves source CHECK constraint on agent_tasks", () => {
74
+ test("fresh DB drops source CHECK constraint on agent_tasks (Zod is the gate)", () => {
75
+ // Migration 056 removes the SQL CHECK on agent_tasks.source — the Zod
76
+ // `AgentTaskSourceSchema` in src/types.ts is now the single source of
77
+ // truth for the allowed enum, and is enforced at the HTTP/MCP ingress.
78
+ // Direct SQL inserts no longer fail on unknown sources by design;
79
+ // adding a new source no longer requires a forward-only migration.
75
80
  const database = initDb(FRESH_DB_PATH);
76
81
  const now = new Date().toISOString();
77
82
 
@@ -81,6 +86,16 @@ describe("migration regressions", () => {
81
86
  VALUES (?, ?, ?, ?, ?, ?)`,
82
87
  [crypto.randomUUID(), "invalid source", "pending", "not-valid", now, now],
83
88
  );
84
- }).toThrow();
89
+ }).not.toThrow();
90
+
91
+ // The requestedByUserId FK survives the table-rebuild in migration 056.
92
+ const fkList = database
93
+ .prepare<{ table: string; from: string; to: string }, []>(
94
+ 'SELECT "table" as "table", "from", "to" FROM pragma_foreign_key_list(\'agent_tasks\')',
95
+ )
96
+ .all();
97
+ const requestedByFk = fkList.find((fk) => fk.from === "requestedByUserId");
98
+ expect(requestedByFk?.table).toBe("users");
99
+ expect(requestedByFk?.to).toBe("id");
85
100
  });
86
101
  });
@@ -0,0 +1,141 @@
1
+ import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
+ import { unlink } from "node:fs/promises";
3
+ import {
4
+ closeDb,
5
+ createAgent,
6
+ createTaskExtended,
7
+ getRootTaskChain,
8
+ initDb,
9
+ listRecentSessions,
10
+ } from "../be/db";
11
+
12
+ const TEST_DB_PATH = "./test-sessions.sqlite";
13
+
14
+ describe("sessions — getRootTaskChain + listRecentSessions", () => {
15
+ beforeAll(async () => {
16
+ for (const suffix of ["", "-wal", "-shm"]) {
17
+ try {
18
+ await unlink(`${TEST_DB_PATH}${suffix}`);
19
+ } catch {}
20
+ }
21
+ initDb(TEST_DB_PATH);
22
+ });
23
+
24
+ afterAll(async () => {
25
+ closeDb();
26
+ for (const suffix of ["", "-wal", "-shm"]) {
27
+ try {
28
+ await unlink(`${TEST_DB_PATH}${suffix}`);
29
+ } catch {}
30
+ }
31
+ });
32
+
33
+ test("empty chain — no rows for non-existent root", () => {
34
+ const chain = getRootTaskChain("nonexistent-root-id");
35
+ expect(chain).toEqual([]);
36
+ });
37
+
38
+ test("single-root chain — chain length 1", () => {
39
+ const agent = createAgent({
40
+ id: "sessions-test-agent-1",
41
+ name: "Sessions Test Agent 1",
42
+ isLead: false,
43
+ status: "idle",
44
+ });
45
+ const root = createTaskExtended("root only", { agentId: agent.id });
46
+
47
+ const chain = getRootTaskChain(root.id);
48
+ expect(chain).toHaveLength(1);
49
+ expect(chain[0].id).toBe(root.id);
50
+ expect(chain[0].parentTaskId).toBeUndefined();
51
+ });
52
+
53
+ test("3-level chain — root → child → grandchild", () => {
54
+ const agent = createAgent({
55
+ id: "sessions-test-agent-2",
56
+ name: "Sessions Test Agent 2",
57
+ isLead: false,
58
+ status: "idle",
59
+ });
60
+ const root = createTaskExtended("root", { agentId: agent.id });
61
+ const child = createTaskExtended("child", {
62
+ agentId: agent.id,
63
+ parentTaskId: root.id,
64
+ });
65
+ const grandchild = createTaskExtended("grandchild", {
66
+ agentId: agent.id,
67
+ parentTaskId: child.id,
68
+ });
69
+
70
+ const chain = getRootTaskChain(root.id);
71
+ expect(chain).toHaveLength(3);
72
+
73
+ // ordered by createdAt — root first, then child, then grandchild
74
+ expect(chain.map((t) => t.id)).toEqual([root.id, child.id, grandchild.id]);
75
+ expect(chain[0].parentTaskId).toBeUndefined();
76
+ expect(chain[1].parentTaskId).toBe(root.id);
77
+ expect(chain[2].parentTaskId).toBe(child.id);
78
+ });
79
+
80
+ test("parallel siblings — root with two children", () => {
81
+ const agent = createAgent({
82
+ id: "sessions-test-agent-3",
83
+ name: "Sessions Test Agent 3",
84
+ isLead: false,
85
+ status: "idle",
86
+ });
87
+ const root = createTaskExtended("parallel root", { agentId: agent.id });
88
+ const sibA = createTaskExtended("sibling A", {
89
+ agentId: agent.id,
90
+ parentTaskId: root.id,
91
+ });
92
+ const sibB = createTaskExtended("sibling B", {
93
+ agentId: agent.id,
94
+ parentTaskId: root.id,
95
+ });
96
+
97
+ const chain = getRootTaskChain(root.id);
98
+ expect(chain).toHaveLength(3);
99
+ expect(chain[0].id).toBe(root.id);
100
+ // siblings appear in createdAt order (sibA before sibB)
101
+ const ids = chain.map((t) => t.id);
102
+ expect(ids.indexOf(sibA.id)).toBeLessThan(ids.indexOf(sibB.id));
103
+ });
104
+
105
+ test("listRecentSessions returns root tasks with chain summary", () => {
106
+ const sessions = listRecentSessions({ limit: 50 });
107
+ // We've created multiple roots above; each non-empty session must surface.
108
+ expect(sessions.length).toBeGreaterThanOrEqual(3);
109
+
110
+ for (const s of sessions) {
111
+ // Root tasks only — never have parentTaskId
112
+ expect(s.root.parentTaskId).toBeUndefined();
113
+ expect(typeof s.chainTaskCount).toBe("number");
114
+ expect(s.chainTaskCount).toBeGreaterThanOrEqual(1);
115
+ expect(typeof s.lastActivityAt).toBe("string");
116
+ expect(typeof s.latestStatus).toBe("string");
117
+ }
118
+
119
+ // The 3-level chain must report chainTaskCount of 3
120
+ const threeLevel = sessions.find((s) => s.root.task === "root");
121
+ expect(threeLevel).toBeDefined();
122
+ expect(threeLevel?.chainTaskCount).toBe(3);
123
+
124
+ // The parallel-root must report chainTaskCount of 3 (root + 2 siblings)
125
+ const parallel = sessions.find((s) => s.root.task === "parallel root");
126
+ expect(parallel).toBeDefined();
127
+ expect(parallel?.chainTaskCount).toBe(3);
128
+
129
+ // The single-root chain must report chainTaskCount of 1
130
+ const single = sessions.find((s) => s.root.task === "root only");
131
+ expect(single).toBeDefined();
132
+ expect(single?.chainTaskCount).toBe(1);
133
+ });
134
+
135
+ test("listRecentSessions ordered by lastActivityAt DESC", () => {
136
+ const sessions = listRecentSessions({ limit: 50 });
137
+ for (let i = 1; i < sessions.length; i++) {
138
+ expect(sessions[i - 1].lastActivityAt >= sessions[i].lastActivityAt).toBe(true);
139
+ }
140
+ });
141
+ });