@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +1264 -46
  3. package/package.json +2 -2
  4. package/src/be/db.ts +563 -9
  5. package/src/be/memory/edges-store.ts +69 -0
  6. package/src/be/memory/providers/sqlite-store.ts +4 -0
  7. package/src/be/memory/raters/explicit-self.ts +22 -0
  8. package/src/be/memory/raters/implicit-citation.ts +44 -0
  9. package/src/be/memory/raters/llm-client.ts +172 -0
  10. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  11. package/src/be/memory/raters/llm.ts +375 -0
  12. package/src/be/memory/raters/noop.ts +14 -0
  13. package/src/be/memory/raters/registry.ts +86 -0
  14. package/src/be/memory/raters/retrieval.ts +88 -0
  15. package/src/be/memory/raters/run-server-raters.ts +97 -0
  16. package/src/be/memory/raters/store.ts +228 -0
  17. package/src/be/memory/raters/types.ts +101 -0
  18. package/src/be/memory/reranker.ts +32 -2
  19. package/src/be/memory/retrieval-store.ts +116 -0
  20. package/src/be/memory/types.ts +3 -0
  21. package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
  22. package/src/be/migrations/052_memory_edges.sql +36 -0
  23. package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
  24. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  25. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  26. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  27. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  28. package/src/be/migrations/058_task_templates.sql +31 -0
  29. package/src/be/swarm-config-guard.ts +24 -0
  30. package/src/commands/credential-wait.ts +186 -0
  31. package/src/commands/provider-credentials.ts +434 -0
  32. package/src/commands/runner.ts +253 -21
  33. package/src/hooks/hook.ts +143 -66
  34. package/src/http/agents.ts +191 -1
  35. package/src/http/config.ts +11 -1
  36. package/src/http/core.ts +5 -0
  37. package/src/http/inbox-state.ts +89 -0
  38. package/src/http/index.ts +10 -0
  39. package/src/http/memory.ts +230 -1
  40. package/src/http/sessions.ts +86 -0
  41. package/src/http/status.ts +665 -0
  42. package/src/http/task-templates.ts +51 -0
  43. package/src/http/tasks.ts +85 -5
  44. package/src/http/users.ts +134 -0
  45. package/src/prompts/memories.ts +62 -0
  46. package/src/providers/claude-adapter.ts +22 -0
  47. package/src/providers/claude-managed-adapter.ts +24 -0
  48. package/src/providers/codex-adapter.ts +43 -1
  49. package/src/providers/devin-adapter.ts +18 -0
  50. package/src/providers/index.ts +7 -0
  51. package/src/providers/opencode-adapter.ts +60 -0
  52. package/src/providers/pi-mono-adapter.ts +71 -0
  53. package/src/providers/types.ts +34 -0
  54. package/src/server.ts +2 -0
  55. package/src/slack/handlers.ts +0 -1
  56. package/src/tests/agents-harness-provider.test.ts +333 -0
  57. package/src/tests/credential-check.test.ts +367 -0
  58. package/src/tests/credential-status-api.test.ts +223 -0
  59. package/src/tests/credential-status-routing.test.ts +150 -0
  60. package/src/tests/credential-wait.test.ts +282 -0
  61. package/src/tests/harness-provider-resolution.test.ts +242 -0
  62. package/src/tests/jira-sync.test.ts +1 -1
  63. package/src/tests/memory-edges.test.ts +722 -0
  64. package/src/tests/memory-rate-endpoint.test.ts +330 -0
  65. package/src/tests/memory-rate-tool.test.ts +252 -0
  66. package/src/tests/memory-rater-e2e.test.ts +578 -0
  67. package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
  68. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  69. package/src/tests/memory-rater-llm.test.ts +964 -0
  70. package/src/tests/memory-rater-store.test.ts +249 -0
  71. package/src/tests/memory-reranker.test.ts +161 -2
  72. package/src/tests/migration-runner-regressions.test.ts +17 -2
  73. package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
  74. package/src/tests/run-server-raters.test.ts +291 -0
  75. package/src/tests/sessions.test.ts +141 -0
  76. package/src/tests/status.test.ts +843 -0
  77. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  78. package/src/tests/template-recommendations.test.ts +148 -0
  79. package/src/tests/tool-annotations.test.ts +2 -2
  80. package/src/tests/use-dismissible-card.test.ts +140 -0
  81. package/src/tools/memory-rate.ts +166 -0
  82. package/src/tools/memory-search.ts +18 -0
  83. package/src/tools/store-progress.ts +37 -0
  84. package/src/tools/swarm-config/set-config.ts +17 -1
  85. package/src/tools/tool-config.ts +1 -0
  86. package/src/types.ts +122 -1
  87. package/src/utils/harness-provider.ts +32 -0
  88. package/tsconfig.json +0 -2
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Stop-hook task-context resolution.
3
+ *
4
+ * Regression for the silent-drop bug PR #444's gate trace surfaced: every Stop
5
+ * hook logged `hasTaskId: false` because TASK_FILE on disk had been cleaned up
6
+ * mid-session, so `Bun.file(taskFile).text()` threw and the catch swallowed it.
7
+ * Fix: prefer the AGENT_SWARM_TASK_ID env var (set by `claude-adapter.ts`) and
8
+ * only fall back to the file. See `resolveStopHookTaskContext` in hook.ts.
9
+ */
10
+ import { describe, expect, test } from "bun:test";
11
+ import { unlink } from "node:fs/promises";
12
+ import { resolveStopHookTaskContext } from "../hooks/hook";
13
+
14
+ describe("resolveStopHookTaskContext", () => {
15
+ test("prefers AGENT_SWARM_TASK_ID env var when TASK_FILE is missing on disk", async () => {
16
+ const missingPath = `/tmp/stop-hook-missing-${Date.now()}.json`;
17
+ // Sanity: file must not exist.
18
+ try {
19
+ await unlink(missingPath);
20
+ } catch {}
21
+
22
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
23
+ AGENT_SWARM_TASK_ID: "task-from-env-123",
24
+ TASK_FILE: missingPath,
25
+ });
26
+
27
+ expect(taskId).toBe("task-from-env-123");
28
+ // taskContext stays empty because the file (which carries the human task
29
+ // text) wasn't readable. That's fine — the LLM rater only needs taskId.
30
+ expect(taskContext).toBe("");
31
+ });
32
+
33
+ test("env var alone (no TASK_FILE) still populates taskId", async () => {
34
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
35
+ AGENT_SWARM_TASK_ID: "task-env-only",
36
+ });
37
+ expect(taskId).toBe("task-env-only");
38
+ expect(taskContext).toBe("");
39
+ });
40
+
41
+ test("falls back to TASK_FILE.id when env var unset", async () => {
42
+ const path = `/tmp/stop-hook-file-${Date.now()}.json`;
43
+ await Bun.write(path, JSON.stringify({ id: "task-from-file-456", task: "do the thing" }));
44
+ try {
45
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
46
+ TASK_FILE: path,
47
+ });
48
+ expect(taskId).toBe("task-from-file-456");
49
+ expect(taskContext).toBe("Task: do the thing");
50
+ } finally {
51
+ await unlink(path).catch(() => {});
52
+ }
53
+ });
54
+
55
+ test("env var wins over TASK_FILE.id but file still seeds taskContext", async () => {
56
+ const path = `/tmp/stop-hook-both-${Date.now()}.json`;
57
+ await Bun.write(path, JSON.stringify({ id: "task-from-file", task: "human task text" }));
58
+ try {
59
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
60
+ AGENT_SWARM_TASK_ID: "task-from-env",
61
+ TASK_FILE: path,
62
+ });
63
+ expect(taskId).toBe("task-from-env");
64
+ expect(taskContext).toBe("Task: human task text");
65
+ } finally {
66
+ await unlink(path).catch(() => {});
67
+ }
68
+ });
69
+
70
+ test("missing file with no env var → both undefined/empty (no throw)", async () => {
71
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
72
+ TASK_FILE: `/tmp/stop-hook-nope-${Date.now()}.json`,
73
+ });
74
+ expect(taskId).toBeUndefined();
75
+ expect(taskContext).toBe("");
76
+ });
77
+
78
+ test("no env at all → both undefined/empty", async () => {
79
+ const { taskContext, taskId } = await resolveStopHookTaskContext({});
80
+ expect(taskId).toBeUndefined();
81
+ expect(taskContext).toBe("");
82
+ });
83
+
84
+ test("malformed TASK_FILE JSON does not throw, env var still wins", async () => {
85
+ const path = `/tmp/stop-hook-bad-${Date.now()}.json`;
86
+ await Bun.write(path, "not json {");
87
+ try {
88
+ const { taskContext, taskId } = await resolveStopHookTaskContext({
89
+ AGENT_SWARM_TASK_ID: "task-env-survives",
90
+ TASK_FILE: path,
91
+ });
92
+ expect(taskId).toBe("task-env-survives");
93
+ expect(taskContext).toBe("");
94
+ } finally {
95
+ await unlink(path).catch(() => {});
96
+ }
97
+ });
98
+ });
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Phase 3 — unit tests for `ui/src/lib/template-recommendations.ts`.
3
+ *
4
+ * Lives in `src/tests/` (not under `ui/`) because `ui/` has no test runner
5
+ * configured. The recommendation lib is pure logic with only a `StatusResponse`
6
+ * type import, so the cross-tree relative import works without aliases.
7
+ */
8
+
9
+ import { describe, expect, test } from "bun:test";
10
+ import type { StatusResponse } from "../../ui/src/api/types.ts";
11
+ import {
12
+ type DetectedIntegration,
13
+ detectedFromStatus,
14
+ recommendTemplates,
15
+ topRecommendation,
16
+ } from "../../ui/src/lib/template-recommendations.ts";
17
+
18
+ function makeStatus(overrides: {
19
+ slack?: "unverified" | "configured" | "verified";
20
+ github?: "unverified" | "configured" | "verified";
21
+ linear?: "unverified" | "configured" | "verified";
22
+ jira?: "unverified" | "configured" | "verified";
23
+ }): StatusResponse {
24
+ return {
25
+ identity: {
26
+ name: "Swarm",
27
+ logo_url: null,
28
+ brand_color: null,
29
+ is_cloud: false,
30
+ marketing_url: null,
31
+ hide_cloud_promo: false,
32
+ },
33
+ setup: [
34
+ { id: "harness", label: "Harness", state: "unverified" },
35
+ { id: "slack", label: "Slack", state: overrides.slack ?? "unverified" },
36
+ { id: "github", label: "GitHub", state: overrides.github ?? "unverified" },
37
+ { id: "linear", label: "Linear", state: overrides.linear ?? "unverified" },
38
+ { id: "jira", label: "Jira", state: overrides.jira ?? "unverified" },
39
+ { id: "workers", label: "Workers", state: "unverified" },
40
+ { id: "first_task", label: "First task", state: "unverified" },
41
+ ],
42
+ activity: { agents_online: 0, leads_online: 0, recent_tasks_count: 0 },
43
+ agent_fs: { configured: false, base_url: null },
44
+ health: "broken",
45
+ };
46
+ }
47
+
48
+ describe("recommendTemplates — priority rules", () => {
49
+ test("slack + github → pr-triage", () => {
50
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["slack", "github"]));
51
+ expect(recs[0]?.templateId).toBe("pr-triage");
52
+ });
53
+
54
+ test("linear + github → issue-to-pr", () => {
55
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["linear", "github"]));
56
+ expect(recs[0]?.templateId).toBe("issue-to-pr");
57
+ });
58
+
59
+ test("jira → bug-intake", () => {
60
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["jira"]));
61
+ expect(recs[0]?.templateId).toBe("bug-intake");
62
+ });
63
+
64
+ test("empty set → hello-world fallback", () => {
65
+ const recs = recommendTemplates(new Set<DetectedIntegration>());
66
+ expect(recs).toHaveLength(1);
67
+ expect(recs[0]?.templateId).toBe("hello-world");
68
+ expect(recs[0]?.reason).toMatch(/hello world/i);
69
+ });
70
+
71
+ test("slack alone falls through to hello-world (no PR-triage promo without GitHub)", () => {
72
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["slack"]));
73
+ expect(recs[0]?.templateId).toBe("hello-world");
74
+ });
75
+
76
+ test("github alone falls through to hello-world", () => {
77
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["github"]));
78
+ expect(recs[0]?.templateId).toBe("hello-world");
79
+ });
80
+
81
+ test("linear alone falls through to hello-world", () => {
82
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["linear"]));
83
+ expect(recs[0]?.templateId).toBe("hello-world");
84
+ });
85
+
86
+ test("priority — slack+github+linear matches pr-triage first, also matches issue-to-pr", () => {
87
+ const recs = recommendTemplates(new Set<DetectedIntegration>(["slack", "github", "linear"]));
88
+ // pr-triage comes first because slack+github rule is listed before linear+github.
89
+ expect(recs[0]?.templateId).toBe("pr-triage");
90
+ expect(recs.map((r) => r.templateId)).toContain("issue-to-pr");
91
+ });
92
+
93
+ test("all four detected — all three rule-based recs returned, no fallback", () => {
94
+ const recs = recommendTemplates(
95
+ new Set<DetectedIntegration>(["slack", "github", "linear", "jira"]),
96
+ );
97
+ const ids = recs.map((r) => r.templateId);
98
+ expect(ids).toEqual(["pr-triage", "issue-to-pr", "bug-intake"]);
99
+ expect(ids).not.toContain("hello-world");
100
+ });
101
+ });
102
+
103
+ describe("detectedFromStatus", () => {
104
+ test("verified milestones count as detected", () => {
105
+ const status = makeStatus({ slack: "verified", github: "verified" });
106
+ const detected = detectedFromStatus(status);
107
+ expect(detected.has("slack")).toBe(true);
108
+ expect(detected.has("github")).toBe(true);
109
+ });
110
+
111
+ test("configured milestones count as detected (live-call not required)", () => {
112
+ const status = makeStatus({ slack: "configured", jira: "configured" });
113
+ const detected = detectedFromStatus(status);
114
+ expect(detected.has("slack")).toBe(true);
115
+ expect(detected.has("jira")).toBe(true);
116
+ });
117
+
118
+ test("unverified milestones do NOT count as detected", () => {
119
+ const status = makeStatus({ slack: "unverified", github: "unverified" });
120
+ const detected = detectedFromStatus(status);
121
+ expect(detected.size).toBe(0);
122
+ });
123
+
124
+ test("non-integration milestones (harness, workers, first_task) are excluded", () => {
125
+ const status = makeStatus({});
126
+ // All four integration milestones are unverified by default; harness etc.
127
+ // are also unverified — none should leak into the detected set.
128
+ const detected = detectedFromStatus(status);
129
+ expect(detected.size).toBe(0);
130
+ });
131
+ });
132
+
133
+ describe("topRecommendation — end-to-end from a /status payload", () => {
134
+ test("slack+github verified → pr-triage", () => {
135
+ const status = makeStatus({ slack: "verified", github: "verified" });
136
+ expect(topRecommendation(status).templateId).toBe("pr-triage");
137
+ });
138
+
139
+ test("linear configured + github verified → issue-to-pr", () => {
140
+ const status = makeStatus({ linear: "configured", github: "verified" });
141
+ expect(topRecommendation(status).templateId).toBe("issue-to-pr");
142
+ });
143
+
144
+ test("nothing connected → hello-world", () => {
145
+ const status = makeStatus({});
146
+ expect(topRecommendation(status).templateId).toBe("hello-world");
147
+ });
148
+ });
@@ -179,8 +179,8 @@ describe("Tool Annotations & Classification", () => {
179
179
  expect(overlap).toEqual([]);
180
180
  });
181
181
 
182
- test("CORE_TOOLS contains exactly 14 tools", () => {
183
- expect(CORE_TOOLS.size).toBe(14);
182
+ test("CORE_TOOLS contains exactly 15 tools", () => {
183
+ expect(CORE_TOOLS.size).toBe(15);
184
184
  });
185
185
 
186
186
  test("ALL_TOOLS equals CORE_TOOLS union DEFERRED_TOOLS", () => {
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Phase 4 — pure-logic tests for `ui/src/hooks/use-dismissible-card.ts`.
3
+ *
4
+ * Lives in `src/tests/` (not under `ui/`) because:
5
+ * - `ui/` has no test runner configured (no vitest/jest).
6
+ * - The repo-root `bun test` already wires preload + DB fixtures.
7
+ * - We test the pure `deriveStorageKey()` helper plus localStorage-shape
8
+ * semantics by stubbing `globalThis.localStorage` — no React renderer.
9
+ *
10
+ * Hook semantics covered:
11
+ * - Namespace key derivation (format + uniqueness across apiUrls).
12
+ * - Dismiss / restore round-trip via the underlying localStorage shape.
13
+ * - Namespace isolation between two distinct apiUrls.
14
+ * - Graceful failure when `localStorage` throws.
15
+ *
16
+ * Cross-tab `storage` event handling lives in the React layer and is
17
+ * covered by the qa-use sessions in Success Criteria; pure-logic tests
18
+ * cannot exercise the `addEventListener("storage", …)` wiring meaningfully.
19
+ */
20
+
21
+ import { afterEach, describe, expect, test } from "bun:test";
22
+ // Import the pure helper directly — the parent `use-dismissible-card.ts`
23
+ // pulls in React + the `@/lib/config` alias chain via `useConfig`, which
24
+ // the bun-test runner can't resolve outside Vite.
25
+ import { deriveStorageKey } from "../../ui/src/hooks/use-dismissible-card-key.ts";
26
+
27
+ // Minimal in-memory localStorage shim for the round-trip / failure tests.
28
+ class MemoryStorage {
29
+ private store = new Map<string, string>();
30
+ private throwOnSet = false;
31
+
32
+ setThrowOnSet(value: boolean) {
33
+ this.throwOnSet = value;
34
+ }
35
+ getItem(key: string): string | null {
36
+ return this.store.has(key) ? (this.store.get(key) as string) : null;
37
+ }
38
+ setItem(key: string, value: string): void {
39
+ if (this.throwOnSet) throw new Error("QuotaExceededError (simulated)");
40
+ this.store.set(key, value);
41
+ }
42
+ removeItem(key: string): void {
43
+ this.store.delete(key);
44
+ }
45
+ clear(): void {
46
+ this.store.clear();
47
+ }
48
+ }
49
+
50
+ afterEach(() => {
51
+ // Clean up the global between tests so leakage can't mask bugs.
52
+ // biome-ignore lint/suspicious/noExplicitAny: test-only shim
53
+ delete (globalThis as any).localStorage;
54
+ });
55
+
56
+ describe("deriveStorageKey", () => {
57
+ test("namespaces by apiUrl + cardKey under swarm:v1 prefix", () => {
58
+ expect(deriveStorageKey("http://localhost:3013", "home-welcome")).toBe(
59
+ "swarm:v1:http://localhost:3013:home-welcome",
60
+ );
61
+ });
62
+
63
+ test("two distinct apiUrls produce distinct keys for the same cardKey", () => {
64
+ const a = deriveStorageKey("http://a.local:3013", "home-welcome");
65
+ const b = deriveStorageKey("http://b.local:3013", "home-welcome");
66
+ expect(a).not.toBe(b);
67
+ });
68
+
69
+ test("two distinct cardKeys produce distinct keys for the same apiUrl", () => {
70
+ const a = deriveStorageKey("http://localhost:3013", "home-welcome");
71
+ const b = deriveStorageKey("http://localhost:3013", "setup:row:harness");
72
+ expect(a).not.toBe(b);
73
+ });
74
+
75
+ test("structured cardKey separators (colons) survive the round-trip", () => {
76
+ expect(deriveStorageKey("http://x", "setup:tour-complete")).toBe(
77
+ "swarm:v1:http://x:setup:tour-complete",
78
+ );
79
+ });
80
+ });
81
+
82
+ describe("dismiss / restore round-trip via localStorage shape", () => {
83
+ test("dismiss writes '1' under the namespaced key; restore removes it", () => {
84
+ const storage = new MemoryStorage();
85
+ // biome-ignore lint/suspicious/noExplicitAny: test-only shim
86
+ (globalThis as any).localStorage = storage;
87
+
88
+ const key = deriveStorageKey("http://localhost:3013", "home-welcome");
89
+
90
+ // Initially undismissed.
91
+ expect(storage.getItem(key)).toBeNull();
92
+
93
+ // Simulate dismiss.
94
+ storage.setItem(key, "1");
95
+ expect(storage.getItem(key)).toBe("1");
96
+
97
+ // Simulate restore.
98
+ storage.removeItem(key);
99
+ expect(storage.getItem(key)).toBeNull();
100
+ });
101
+
102
+ test("namespace isolation: dismissing on apiUrl A does not affect apiUrl B", () => {
103
+ const storage = new MemoryStorage();
104
+ // biome-ignore lint/suspicious/noExplicitAny: test-only shim
105
+ (globalThis as any).localStorage = storage;
106
+
107
+ const keyA = deriveStorageKey("http://a.local:3013", "home-welcome");
108
+ const keyB = deriveStorageKey("http://b.local:3013", "home-welcome");
109
+
110
+ storage.setItem(keyA, "1");
111
+
112
+ expect(storage.getItem(keyA)).toBe("1");
113
+ expect(storage.getItem(keyB)).toBeNull();
114
+ });
115
+ });
116
+
117
+ describe("graceful failure when localStorage throws", () => {
118
+ test("setItem throw is swallowed by the hook's try/catch contract", () => {
119
+ const storage = new MemoryStorage();
120
+ storage.setThrowOnSet(true);
121
+ // biome-ignore lint/suspicious/noExplicitAny: test-only shim
122
+ (globalThis as any).localStorage = storage;
123
+
124
+ const key = deriveStorageKey("http://localhost:3013", "home-welcome");
125
+
126
+ // Direct call DOES throw — confirm the test shim is wired up.
127
+ expect(() => storage.setItem(key, "1")).toThrow();
128
+
129
+ // The hook contract is `try { localStorage.setItem(...) } catch {}` —
130
+ // emulate that wrapper and assert no error escapes to the caller.
131
+ const swallow = () => {
132
+ try {
133
+ storage.setItem(key, "1");
134
+ } catch {
135
+ // intentionally swallow
136
+ }
137
+ };
138
+ expect(swallow).not.toThrow();
139
+ });
140
+ });
@@ -0,0 +1,166 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import * as z from "zod";
3
+ import { REFERENCES_SOURCE_MAX_LENGTH, sanitizeReferencesSource } from "@/be/memory/raters/types";
4
+ import { createToolRegistrar } from "@/tools/utils";
5
+
6
+ /**
7
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-5.md §1
8
+ * thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §5
9
+ *
10
+ * Worker-facing MCP tool. Posts a single explicit-self `RatingEvent` to the
11
+ * existing `POST /api/memory/rate` endpoint shipped in step-3 and surfaces
12
+ * server status codes as structured `{ success, message }` output instead of
13
+ * throwing — so an agent that mis-uses the tool gets a clear, recoverable
14
+ * answer rather than a tool-call exception.
15
+ *
16
+ * Step-6 added the optional `referencesSource` field — Q2 free-form contract:
17
+ * ≤512 chars, control-char strip, NUL byte rejection. Convention-only shape
18
+ * `<source>:<identifier>` is documentation, NOT enforcement.
19
+ */
20
+
21
+ const DUPLICATE_MESSAGE =
22
+ "Memory already rated for this task. Use a follow-up memory_rerate tool (coming soon) to override.";
23
+
24
+ const REFERENCES_SOURCE_DESCRIPTION =
25
+ "Optional external source ID this memory references. Free-form string, " +
26
+ 'convention "<source>:<identifier>" (e.g. "github:owner/repo#N", ' +
27
+ '"linear:KEY-N", "customer:<slug>", "slack:<channel>:<ts>", ' +
28
+ '"agentmail:<thread-id>"). Pick any prefix that fits — no closed enum. ' +
29
+ "When present, an edge from this memory to the external source is " +
30
+ "created/updated.";
31
+
32
+ export const registerMemoryRateTool = (server: McpServer) => {
33
+ createToolRegistrar(server)(
34
+ "memory_rate",
35
+ {
36
+ title: "Rate a memory",
37
+ description:
38
+ "Rate a memory you used in the current task. Call this when a " +
39
+ "retrieved memory was clearly useful (or actively misleading) so " +
40
+ "the swarm learns to surface better memories next time.",
41
+ annotations: { destructiveHint: false },
42
+ inputSchema: z.object({
43
+ id: z.string().describe("Memory ID returned by memory_search."),
44
+ useful: z
45
+ .boolean()
46
+ .describe("true = this memory helped solve the task; false = misled or wasted time."),
47
+ note: z
48
+ .string()
49
+ .max(280)
50
+ .optional()
51
+ .describe("Short reason. Captured for telemetry; not surfaced to other agents."),
52
+ referencesSource: z
53
+ .string()
54
+ .min(1)
55
+ .max(REFERENCES_SOURCE_MAX_LENGTH)
56
+ .optional()
57
+ .describe(REFERENCES_SOURCE_DESCRIPTION),
58
+ }),
59
+ outputSchema: z.object({
60
+ success: z.boolean(),
61
+ message: z.string(),
62
+ }),
63
+ },
64
+ async ({ id, useful, note, referencesSource }, requestInfo, _meta) => {
65
+ if (!requestInfo.agentId) {
66
+ const msg = "Agent ID required. Are you registered in the swarm?";
67
+ return {
68
+ content: [{ type: "text", text: msg }],
69
+ structuredContent: { success: false, message: msg },
70
+ };
71
+ }
72
+ if (!requestInfo.sourceTaskId) {
73
+ const msg = "memory_rate must be called from within a task — no source task ID was found.";
74
+ return {
75
+ content: [{ type: "text", text: msg }],
76
+ structuredContent: { success: false, message: msg },
77
+ };
78
+ }
79
+
80
+ let cleanedReferencesSource: string | undefined;
81
+ if (referencesSource !== undefined) {
82
+ const cleaned = sanitizeReferencesSource(referencesSource);
83
+ if (cleaned === null) {
84
+ const msg =
85
+ "referencesSource must not contain NUL bytes or strip to empty after control-char removal.";
86
+ return {
87
+ content: [{ type: "text", text: msg }],
88
+ structuredContent: { success: false, message: msg },
89
+ };
90
+ }
91
+ cleanedReferencesSource = cleaned;
92
+ }
93
+
94
+ const apiUrl = process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
95
+ const apiKey = process.env.API_KEY || "";
96
+
97
+ const event = {
98
+ memoryId: id,
99
+ signal: useful ? 1 : -1,
100
+ weight: 1.0,
101
+ source: "explicit-self" as const,
102
+ reasoning: note ?? "",
103
+ taskId: requestInfo.sourceTaskId,
104
+ ...(cleanedReferencesSource !== undefined
105
+ ? { referencesSource: cleanedReferencesSource }
106
+ : {}),
107
+ };
108
+
109
+ try {
110
+ const response = await fetch(`${apiUrl}/api/memory/rate`, {
111
+ method: "POST",
112
+ headers: {
113
+ "Content-Type": "application/json",
114
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
115
+ "X-Agent-ID": requestInfo.agentId,
116
+ },
117
+ body: JSON.stringify({ events: [event] }),
118
+ });
119
+
120
+ if (response.status === 409) {
121
+ return {
122
+ content: [{ type: "text", text: DUPLICATE_MESSAGE }],
123
+ structuredContent: { success: false, message: DUPLICATE_MESSAGE },
124
+ };
125
+ }
126
+
127
+ if (response.status === 400) {
128
+ let serverError = "";
129
+ try {
130
+ const body = (await response.json()) as { error?: string };
131
+ serverError = body?.error ?? "";
132
+ } catch {
133
+ // body wasn't JSON
134
+ }
135
+ const msg = serverError
136
+ ? `Memory rating rejected: ${serverError}. The memory must have been retrieved by this task before it can be rated.`
137
+ : "Memory rating rejected. The memory must have been retrieved by this task before it can be rated.";
138
+ return {
139
+ content: [{ type: "text", text: msg }],
140
+ structuredContent: { success: false, message: msg },
141
+ };
142
+ }
143
+
144
+ if (!response.ok) {
145
+ const msg = `Memory rating failed (HTTP ${response.status}).`;
146
+ return {
147
+ content: [{ type: "text", text: msg }],
148
+ structuredContent: { success: false, message: msg },
149
+ };
150
+ }
151
+
152
+ const successMsg = `Memory ${id} rated as ${useful ? "useful" : "not useful"}.`;
153
+ return {
154
+ content: [{ type: "text", text: successMsg }],
155
+ structuredContent: { success: true, message: successMsg },
156
+ };
157
+ } catch (err) {
158
+ const msg = `Memory rating failed: ${(err as Error).message}`;
159
+ return {
160
+ content: [{ type: "text", text: msg }],
161
+ structuredContent: { success: false, message: msg },
162
+ };
163
+ }
164
+ },
165
+ );
166
+ };
@@ -3,6 +3,7 @@ import * as z from "zod";
3
3
  import { getAgentById } from "@/be/db";
4
4
  import { getEmbeddingProvider, getMemoryStore } from "@/be/memory";
5
5
  import { CANDIDATE_SET_MULTIPLIER } from "@/be/memory/constants";
6
+ import { recordRetrievals } from "@/be/memory/raters/retrieval";
6
7
  import { rerank } from "@/be/memory/reranker";
7
8
  import { createToolRegistrar } from "@/tools/utils";
8
9
  import { AgentMemoryScopeSchema, AgentMemorySourceSchema } from "@/types";
@@ -76,6 +77,23 @@ export const registerMemorySearchTool = (server: McpServer) => {
76
77
  });
77
78
  const ranked = rerank(candidates, { limit });
78
79
 
80
+ // Retrieval bridge — when called inside a task scope, log one
81
+ // `memory_retrieval` row per returned memory so server-side raters
82
+ // (ImplicitCitationRater) can score them at task completion.
83
+ // Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-2.md §3
84
+ if (requestInfo.sourceTaskId) {
85
+ try {
86
+ recordRetrievals(
87
+ requestInfo.sourceTaskId,
88
+ requestInfo.agentId,
89
+ ranked.map((r) => ({ memoryId: r.id, similarity: r.similarity })),
90
+ requestInfo.sessionId,
91
+ );
92
+ } catch (err) {
93
+ console.error("[memory-search] recordRetrievals failed:", (err as Error).message);
94
+ }
95
+ }
96
+
79
97
  const mapped = ranked.map((r) => ({
80
98
  id: r.id,
81
99
  name: r.name,
@@ -9,11 +9,14 @@ import {
9
9
  getAgentById,
10
10
  getDb,
11
11
  getLeadAgent,
12
+ getSessionLogsByTaskId,
12
13
  getTaskById,
13
14
  updateAgentStatusFromCapacity,
14
15
  updateTaskProgress,
15
16
  } from "@/be/db";
16
17
  import { getEmbeddingProvider, getMemoryStore } from "@/be/memory";
18
+ import { getRetrievalsForTask } from "@/be/memory/raters/retrieval";
19
+ import { runServerRaters } from "@/be/memory/raters/run-server-raters";
17
20
  import { resolveTemplate } from "@/prompts/resolver";
18
21
  import { createToolRegistrar } from "@/tools/utils";
19
22
  import { AgentTaskSchema } from "@/types";
@@ -356,6 +359,40 @@ export const registerStoreProgressTool = (server: McpServer) => {
356
359
  // Non-blocking — task completion memory failure should not affect task status
357
360
  }
358
361
  })();
362
+
363
+ // Memory rater v1.5 — fire server-side raters on task completion.
364
+ // Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-2.md §5
365
+ //
366
+ // Read `memory_retrieval` rows for this task + concatenated session_logs
367
+ // and hand both to `runServerRaters`, which iterates the allow-listed
368
+ // server raters (currently just `implicit-citation`), stamps source,
369
+ // applies the configured weight multiplier, and persists via
370
+ // `applyRating`. The orchestration is extracted so it can be unit-tested
371
+ // with stub raters (see `src/tests/run-server-raters.test.ts`).
372
+ //
373
+ // Fire-and-forget: rater failure must NEVER affect task status.
374
+ (async () => {
375
+ try {
376
+ const retrievals = getRetrievalsForTask(taskId);
377
+ if (retrievals.length === 0) return;
378
+
379
+ const retrievedMemoryIds = retrievals.map((r) => r.memoryId);
380
+ const logs = getSessionLogsByTaskId(taskId);
381
+ const evidence = logs.map((l) => l.content).join("\n");
382
+
383
+ await runServerRaters({
384
+ taskId,
385
+ agentId: requestInfo.agentId ?? "",
386
+ retrievedMemoryIds,
387
+ evidence,
388
+ });
389
+ } catch (err) {
390
+ console.error(
391
+ "[store-progress] server-rater fire failed:",
392
+ err instanceof Error ? err.message : String(err),
393
+ );
394
+ }
395
+ })();
359
396
  }
360
397
 
361
398
  // Create follow-up task for the lead when a worker task finishes.
@@ -1,7 +1,11 @@
1
1
  import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import * as z from "zod";
3
3
  import { maskSecrets, upsertSwarmConfig } from "@/be/db";
4
- import { isReservedConfigKey, reservedKeyError } from "@/be/swarm-config-guard";
4
+ import {
5
+ isReservedConfigKey,
6
+ reservedKeyError,
7
+ validateConfigValue,
8
+ } from "@/be/swarm-config-guard";
5
9
  import { createToolRegistrar } from "@/tools/utils";
6
10
  import { SwarmConfigSchema, SwarmConfigScopeSchema } from "@/types";
7
11
 
@@ -89,6 +93,18 @@ export const registerSetConfigTool = (server: McpServer) => {
89
93
  };
90
94
  }
91
95
 
96
+ const validationError = validateConfigValue(key, value);
97
+ if (validationError) {
98
+ return {
99
+ content: [{ type: "text", text: validationError }],
100
+ structuredContent: {
101
+ yourAgentId: requestInfo.agentId,
102
+ success: false,
103
+ message: validationError,
104
+ },
105
+ };
106
+ }
107
+
92
108
  const config = upsertSwarmConfig({
93
109
  scope,
94
110
  scopeId: scope === "global" ? null : scopeId,