@desplega.ai/agent-swarm 1.74.4 → 1.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/openapi.json +282 -1
  2. package/package.json +1 -1
  3. package/src/be/db.ts +36 -0
  4. package/src/be/memory/edges-store.ts +69 -0
  5. package/src/be/memory/providers/sqlite-store.ts +4 -0
  6. package/src/be/memory/raters/explicit-self.ts +22 -0
  7. package/src/be/memory/raters/implicit-citation.ts +44 -0
  8. package/src/be/memory/raters/llm-client.ts +172 -0
  9. package/src/be/memory/raters/llm.ts +394 -0
  10. package/src/be/memory/raters/noop.ts +14 -0
  11. package/src/be/memory/raters/registry.ts +86 -0
  12. package/src/be/memory/raters/retrieval.ts +88 -0
  13. package/src/be/memory/raters/run-server-raters.ts +97 -0
  14. package/src/be/memory/raters/store.ts +228 -0
  15. package/src/be/memory/raters/types.ts +101 -0
  16. package/src/be/memory/reranker.ts +32 -2
  17. package/src/be/memory/retrieval-store.ts +95 -0
  18. package/src/be/memory/types.ts +3 -0
  19. package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
  20. package/src/be/migrations/052_memory_edges.sql +36 -0
  21. package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
  22. package/src/commands/credential-wait.ts +186 -0
  23. package/src/commands/runner.ts +54 -9
  24. package/src/hooks/hook.ts +67 -10
  25. package/src/http/agents.ts +110 -0
  26. package/src/http/core.ts +5 -0
  27. package/src/http/memory.ts +230 -1
  28. package/src/prompts/memories.ts +62 -0
  29. package/src/providers/claude-adapter.ts +17 -0
  30. package/src/providers/claude-managed-adapter.ts +24 -0
  31. package/src/providers/codex-adapter.ts +42 -0
  32. package/src/providers/credentials.ts +74 -0
  33. package/src/providers/devin-adapter.ts +18 -0
  34. package/src/providers/index.ts +7 -0
  35. package/src/providers/opencode-adapter.ts +60 -0
  36. package/src/providers/pi-mono-adapter.ts +71 -0
  37. package/src/providers/types.ts +34 -0
  38. package/src/server.ts +2 -0
  39. package/src/tests/credential-check.test.ts +336 -0
  40. package/src/tests/credential-status-api.test.ts +181 -0
  41. package/src/tests/credential-status-routing.test.ts +150 -0
  42. package/src/tests/credential-wait.test.ts +282 -0
  43. package/src/tests/memory-edges.test.ts +722 -0
  44. package/src/tests/memory-rate-endpoint.test.ts +330 -0
  45. package/src/tests/memory-rate-tool.test.ts +252 -0
  46. package/src/tests/memory-rater-e2e.test.ts +578 -0
  47. package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
  48. package/src/tests/memory-rater-llm.test.ts +806 -0
  49. package/src/tests/memory-rater-store.test.ts +249 -0
  50. package/src/tests/memory-reranker.test.ts +161 -2
  51. package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
  52. package/src/tests/run-server-raters.test.ts +291 -0
  53. package/src/tests/tool-annotations.test.ts +2 -2
  54. package/src/tools/memory-rate.ts +166 -0
  55. package/src/tools/memory-search.ts +18 -0
  56. package/src/tools/store-progress.ts +37 -0
  57. package/src/tools/tool-config.ts +1 -0
  58. package/src/types.ts +5 -1
@@ -0,0 +1,172 @@
1
+ /**
2
+ * `LlmRaterClient` — pluggable LLM driver used by `LlmRater` to score the
3
+ * usefulness of a single retrieved memory against a (query, response) pair.
4
+ *
5
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §1
6
+ *
7
+ * This module is imported from worker-side `src/hooks/hook.ts` (the session-
8
+ * summary piggyback path), so it MUST NOT touch `bun:sqlite` or `src/be/db`.
9
+ * The DB-boundary check in `scripts/check-db-boundary.sh` enforces this.
10
+ *
11
+ * Default implementation shells out to the same `claude -p` CLI the hook
12
+ * already uses for session summarization — zero new SDK dependencies.
13
+ */
14
+
15
+ export type LlmRaterInput = {
16
+ /** What the agent asked the memory system for. */
17
+ query: string;
18
+ /** The memory we're scoring. */
19
+ memory: {
20
+ id: string;
21
+ name: string;
22
+ content: string;
23
+ };
24
+ /** The agent's eventual response (or session summary) — the "did this help?" signal. */
25
+ response: string;
26
+ };
27
+
28
+ export type LlmRaterResult = {
29
+ /** Usefulness score in [0, 1]. 0 = misleading, 1 = highly useful. */
30
+ score: number;
31
+ /** Short human-readable explanation. */
32
+ reasoning: string;
33
+ };
34
+
35
+ export interface LlmRaterClient {
36
+ /**
37
+ * Score one memory. Returns null on parse failure / non-JSON output / timeout
38
+ * — the caller (`LlmRater`) treats `null` as "skip this rating", no posterior
39
+ * change. Implementations MUST NOT throw on transport errors; swallow + log
40
+ * + return null so the worker hook can never crash on rater failure.
41
+ */
42
+ rate(input: LlmRaterInput): Promise<LlmRaterResult | null>;
43
+ }
44
+
45
+ /**
46
+ * Configuration for the Claude-CLI implementation.
47
+ */
48
+ export type ClaudeCliLlmRaterClientOptions = {
49
+ /** Override the model. Defaults to `MEMORY_LLM_RATER_MODEL` env var or "haiku". */
50
+ model?: string;
51
+ /** Soft timeout (ms) for the `claude -p` shell-out. Default 30s. */
52
+ timeoutMs?: number;
53
+ };
54
+
55
+ const DEFAULT_TIMEOUT_MS = 30000;
56
+
57
+ const PROMPT_TEMPLATE = `You are scoring the usefulness of one retrieved memory.
58
+
59
+ Return ONLY a JSON object with these fields (no prose, no markdown):
60
+ {
61
+ "score": number, // 0 = misleading/unhelpful, 1 = highly useful
62
+ "reasoning": string // 1..500 chars, why
63
+ }
64
+
65
+ QUERY:
66
+ \${query}
67
+
68
+ MEMORY:
69
+ id: \${memoryId}
70
+ name: \${memoryName}
71
+ content: \${memoryContent}
72
+
73
+ AGENT RESPONSE / SUMMARY:
74
+ \${response}
75
+
76
+ Score 0..1.`;
77
+
78
+ /**
79
+ * `claude -p --output-format json` returns a JSON envelope of the shape
80
+ * `{ result: string, ... }`. We parse the envelope, then JSON-parse the
81
+ * inner `result` to recover the score+reasoning object.
82
+ */
83
+ type ClaudeCliEnvelope = { result?: unknown };
84
+
85
+ function buildPrompt(input: LlmRaterInput): string {
86
+ return PROMPT_TEMPLATE.replace("${query}", input.query)
87
+ .replace("${memoryId}", input.memory.id)
88
+ .replace("${memoryName}", input.memory.name)
89
+ .replace("${memoryContent}", input.memory.content)
90
+ .replace("${response}", input.response);
91
+ }
92
+
93
+ function parseScoreAndReasoning(raw: unknown): LlmRaterResult | null {
94
+ if (typeof raw !== "string") return null;
95
+ let parsed: unknown;
96
+ try {
97
+ parsed = JSON.parse(raw.trim());
98
+ } catch {
99
+ return null;
100
+ }
101
+ if (!parsed || typeof parsed !== "object") return null;
102
+ const obj = parsed as { score?: unknown; reasoning?: unknown };
103
+ const score = typeof obj.score === "number" ? obj.score : null;
104
+ const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : null;
105
+ if (score == null || reasoning == null) return null;
106
+ if (!Number.isFinite(score) || score < 0 || score > 1) return null;
107
+ if (reasoning.length === 0 || reasoning.length > 500) return null;
108
+ return { score, reasoning };
109
+ }
110
+
111
+ export class ClaudeCliLlmRaterClient implements LlmRaterClient {
112
+ private readonly model: string;
113
+ private readonly timeoutMs: number;
114
+
115
+ constructor(opts: ClaudeCliLlmRaterClientOptions = {}) {
116
+ this.model = opts.model ?? process.env.MEMORY_LLM_RATER_MODEL ?? "haiku";
117
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
118
+ }
119
+
120
+ async rate(input: LlmRaterInput): Promise<LlmRaterResult | null> {
121
+ const prompt = buildPrompt(input);
122
+ const tmpFile = `/tmp/llm-rater-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`;
123
+
124
+ let stdout = "";
125
+ try {
126
+ await Bun.write(tmpFile, prompt);
127
+ const proc = Bun.spawn(
128
+ ["bash", "-c", `cat "${tmpFile}" | claude -p --model ${this.model} --output-format json`],
129
+ {
130
+ stdout: "pipe",
131
+ stderr: "pipe",
132
+ env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
133
+ },
134
+ );
135
+ const timeoutId = setTimeout(() => proc.kill(), this.timeoutMs);
136
+ stdout = await new Response(proc.stdout).text();
137
+ clearTimeout(timeoutId);
138
+ } catch (err) {
139
+ console.error("[memory-rater:llm] claude -p shell-out failed:", (err as Error).message);
140
+ return null;
141
+ } finally {
142
+ try {
143
+ await Bun.$`rm -f ${tmpFile}`.quiet();
144
+ } catch {
145
+ // best-effort
146
+ }
147
+ }
148
+
149
+ let envelope: ClaudeCliEnvelope;
150
+ try {
151
+ envelope = JSON.parse(stdout) as ClaudeCliEnvelope;
152
+ } catch {
153
+ return null;
154
+ }
155
+ return parseScoreAndReasoning(envelope.result);
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Factory honouring `MEMORY_LLM_RATER_PROVIDER` — defaults to `claude-cli`.
161
+ * Unknown providers fall back to the Claude CLI default and log a warning so
162
+ * misconfiguration never crashes the worker.
163
+ */
164
+ export function getDefaultLlmRaterClient(): LlmRaterClient {
165
+ const provider = (process.env.MEMORY_LLM_RATER_PROVIDER ?? "claude-cli").trim();
166
+ if (provider !== "claude-cli") {
167
+ console.warn(
168
+ `[memory-rater:llm] Unknown MEMORY_LLM_RATER_PROVIDER "${provider}" — falling back to claude-cli`,
169
+ );
170
+ }
171
+ return new ClaudeCliLlmRaterClient();
172
+ }
@@ -0,0 +1,394 @@
1
+ /**
2
+ * `LlmRater` — second live rater, source = "llm".
3
+ *
4
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §2-3
5
+ *
6
+ * The worker-side flow does NOT call `LlmRater.rate(ctx)` from the in-process
7
+ * server-rater orchestrator. Instead, the rating LLM call is piggybacked on
8
+ * the existing session-summary call in `src/hooks/hook.ts` (cost optimization
9
+ * — same Haiku invocation produces both summary text + per-memory ratings).
10
+ * The hook then POSTs the constructed `RatingEvent[]` to `/api/memory/rate`.
11
+ *
12
+ * `LlmRater.rate(ctx)` is wired up so the class still satisfies `MemoryRater`
13
+ * for registry consistency / future direct integrations / unit tests, but is
14
+ * never invoked by `runServerRaters` (LlmRater is NOT in `SERVER_RATERS`).
15
+ *
16
+ * This module is imported from worker-side `src/hooks/hook.ts` so it MUST NOT
17
+ * touch `bun:sqlite` or `src/be/db`. The boundary check enforces it.
18
+ */
19
+ import { z } from "zod";
20
+ import { ClaudeCliLlmRaterClient, type LlmRaterClient, type LlmRaterResult } from "./llm-client";
21
+ import {
22
+ type MemoryRater,
23
+ type RatingContext,
24
+ type RatingEvent,
25
+ REFERENCES_SOURCE_MAX_LENGTH,
26
+ sanitizeReferencesSource,
27
+ } from "./types";
28
+
29
+ /**
30
+ * Per-rating weight, fixed at 0.8 per the research-doc convention
31
+ * ("LLM intent_weight"). Encoded here once so neither callers nor tests can
32
+ * silently drift the constant.
33
+ */
34
+ export const LLM_RATER_WEIGHT = 0.8;
35
+
36
+ const RatingSchema = z.object({
37
+ id: z.string().min(1),
38
+ score: z.number().min(0).max(1),
39
+ reasoning: z.string().min(1).max(500),
40
+ // Step-6 §6 — optional free-form external source ID. Q2 contract: ≤512
41
+ // chars, no closed enum, no prefix parser. Sanitization (control-char
42
+ // strip + NUL rejection) happens in `buildRatingsFromLlm` so a single
43
+ // bad rating drops the field rather than failing the whole batch.
44
+ referencesSource: z.string().min(1).max(REFERENCES_SOURCE_MAX_LENGTH).optional(),
45
+ });
46
+
47
+ /**
48
+ * Zod schema for the structured-output piggyback prompt. The hook asks the
49
+ * summarizer LLM to return summary + per-memory ratings in one JSON object so
50
+ * we don't pay for N additional LLM calls.
51
+ */
52
+ export const SummaryWithRatingsSchema = z.object({
53
+ summary: z.string(),
54
+ ratings: z.array(RatingSchema).default([]),
55
+ });
56
+
57
+ export type LlmRating = z.infer<typeof RatingSchema>;
58
+ export type SummaryWithRatings = z.infer<typeof SummaryWithRatingsSchema>;
59
+
60
+ /** Context augmentations LlmRater consumes when called directly (per-memory path). */
61
+ export type LlmRatingContext = RatingContext & {
62
+ /** What the agent asked the memory system. */
63
+ query?: string;
64
+ /** Final agent response / summary used as the "did this help?" signal. */
65
+ response?: string;
66
+ /** Snapshots for memories listed in `retrievedMemoryIds` (id-aligned by id). */
67
+ retrievedMemories?: { id: string; name: string; content: string }[];
68
+ };
69
+
70
+ export class LlmRater implements MemoryRater {
71
+ readonly name = "llm";
72
+
73
+ constructor(public readonly client: LlmRaterClient = new ClaudeCliLlmRaterClient()) {}
74
+
75
+ /**
76
+ * Per-memory scoring path. The production hook bypasses this method and
77
+ * calls {@link buildRatingsFromLlm} on the piggybacked summarizer JSON
78
+ * (one LLM invocation, not N). Direct callers (tests, future integrations)
79
+ * MUST pass {@link LlmRatingContext} — the base `RatingContext` carries
80
+ * only memory IDs, which is insufficient to drive `LlmRaterClient.rate`.
81
+ *
82
+ * Returns `[]` when the augmented fields are missing so the rater stays a
83
+ * no-op rather than crashing on a `RatingContext`-only invocation.
84
+ */
85
+ async rate(ctx: RatingContext): Promise<RatingEvent[]> {
86
+ const enriched = ctx as LlmRatingContext;
87
+ if (enriched.retrievedMemoryIds.length === 0) return [];
88
+ const memories = enriched.retrievedMemories;
89
+ if (!memories || memories.length === 0) return [];
90
+
91
+ const events: RatingEvent[] = [];
92
+ for (const memoryId of enriched.retrievedMemoryIds) {
93
+ const memory = memories.find((m) => m.id === memoryId);
94
+ if (!memory) continue;
95
+ let result: LlmRaterResult | null;
96
+ try {
97
+ result = await this.client.rate({
98
+ query: enriched.query ?? "",
99
+ memory,
100
+ response: enriched.response ?? enriched.evidence ?? "",
101
+ });
102
+ } catch (err) {
103
+ console.error(
104
+ `[memory-rater:llm] client.rate threw for memoryId=${memoryId}:`,
105
+ (err as Error).message,
106
+ );
107
+ continue;
108
+ }
109
+ if (!result) continue;
110
+ events.push({
111
+ memoryId,
112
+ signal: 2 * result.score - 1,
113
+ weight: LLM_RATER_WEIGHT,
114
+ // Framework stamps `source = rater.name` in `runServerRaters`. Raters
115
+ // that populate `source` themselves are rejected by `applyRating`.
116
+ source: "",
117
+ reasoning: result.reasoning,
118
+ });
119
+ }
120
+ return events;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Convert the piggybacked summary's `ratings` array into `RatingEvent[]` for
126
+ * `POST /api/memory/rate`. Drops ratings whose `id` was not in the original
127
+ * retrieval set (defence-in-depth — the LLM occasionally hallucinates memory
128
+ * IDs; the server-side R6 check catches it too, but rejecting upstream keeps
129
+ * the audit log cleaner).
130
+ *
131
+ * Mapping: `signal = 2 * score - 1` (0 → -1, 0.5 → 0, 1 → +1).
132
+ * Weight = {@link LLM_RATER_WEIGHT} (0.8).
133
+ * Source = `"llm"` (the HTTP rate endpoint enums `["llm", "explicit-self"]`).
134
+ */
135
+ export function buildRatingsFromLlm(
136
+ ratings: LlmRating[],
137
+ retrievals: { id: string }[],
138
+ ): RatingEvent[] {
139
+ const allowed = new Set(retrievals.map((r) => r.id));
140
+ const events: RatingEvent[] = [];
141
+ for (const r of ratings) {
142
+ if (!allowed.has(r.id)) continue;
143
+ // Step-6 §6 — sanitize before propagation. If the LLM emits a NUL byte
144
+ // or an all-control-chars string, drop the edge but keep the rating
145
+ // (best-effort: the memory's own posterior still gets the signal).
146
+ let cleanedReferencesSource: string | undefined;
147
+ if (r.referencesSource !== undefined) {
148
+ const cleaned = sanitizeReferencesSource(r.referencesSource);
149
+ if (cleaned !== null) {
150
+ cleanedReferencesSource = cleaned;
151
+ }
152
+ }
153
+ events.push({
154
+ memoryId: r.id,
155
+ signal: 2 * r.score - 1,
156
+ weight: LLM_RATER_WEIGHT,
157
+ source: "llm",
158
+ reasoning: r.reasoning,
159
+ ...(cleanedReferencesSource !== undefined
160
+ ? { referencesSource: cleanedReferencesSource }
161
+ : {}),
162
+ });
163
+ }
164
+ return events;
165
+ }
166
+
167
+ /**
168
+ * Append a structured-output instruction to the existing summary prompt so
169
+ * the same `claude -p` invocation produces both summary text AND per-memory
170
+ * ratings against `SummaryWithRatingsSchema`.
171
+ *
172
+ * Memory `content` is truncated to {@link RETRIEVAL_PROMPT_CONTENT_CAP} chars
173
+ * to keep the prompt within Haiku's context budget on long sessions; the
174
+ * server already truncates `agent_memory.content` to 500 chars in the
175
+ * retrievals endpoint, so this is the typical case.
176
+ */
177
+ const RETRIEVAL_PROMPT_CONTENT_CAP = 600;
178
+
179
+ export function buildSummaryWithRatingsPrompt(
180
+ basePrompt: string,
181
+ retrievals: { id: string; name: string; content: string }[],
182
+ ): string {
183
+ if (retrievals.length === 0) return basePrompt;
184
+ const memoryBlock = retrievals
185
+ .map((m, i) => {
186
+ const content =
187
+ m.content.length > RETRIEVAL_PROMPT_CONTENT_CAP
188
+ ? `${m.content.slice(0, RETRIEVAL_PROMPT_CONTENT_CAP)}…`
189
+ : m.content;
190
+ return `Memory #${i + 1}\n id: ${m.id}\n name: ${m.name}\n content: ${content}`;
191
+ })
192
+ .join("\n\n");
193
+
194
+ return `${basePrompt}
195
+
196
+ CRITICAL: Return JSON conforming to this schema (no prose outside the JSON, no markdown fences):
197
+ {
198
+ "summary": string, // your existing summary text
199
+ "ratings": [ // one entry per memory you can score
200
+ {
201
+ "id": string, // memory id, copied from the list below
202
+ "score": number, // 0 = misleading/unhelpful, 1 = highly useful
203
+ "reasoning": string, // 1..500 chars, why
204
+ "referencesSource": string // OPTIONAL — see note below
205
+ }
206
+ ]
207
+ }
208
+
209
+ Score ONLY memories present in the list below. Use the exact ids. Omit any you cannot evaluate.
210
+
211
+ Optionally for each rating, if the memory clearly references a specific external source (a GitHub PR/issue, a Linear issue, a customer, a Slack thread, an AgentMail thread, etc.), include a \`referencesSource\` string using the convention "<source>:<identifier>" (e.g. "github:owner/repo#N", "linear:KEY-N", "customer:<slug>"). Any prefix is fine — pick what matches the source. Omit the field if no clear external source.
212
+
213
+ Memories retrieved during this session:
214
+
215
+ ${memoryBlock}`;
216
+ }
217
+
218
+ /**
219
+ * Best-effort parse of the structured `SummaryWithRatingsSchema` JSON out of
220
+ * the `claude -p --output-format json` envelope (`{ result: "<inner json>" }`).
221
+ *
222
+ * Returns `null` on any parse failure — the caller falls back to the existing
223
+ * summary-only path. NEVER throws.
224
+ */
225
+ export function parseSummaryWithRatings(claudeStdout: string): SummaryWithRatings | null {
226
+ let envelope: { result?: unknown };
227
+ try {
228
+ envelope = JSON.parse(claudeStdout) as { result?: unknown };
229
+ } catch {
230
+ return null;
231
+ }
232
+ const inner = envelope.result;
233
+ let candidate: unknown;
234
+ if (typeof inner === "string") {
235
+ try {
236
+ candidate = JSON.parse(inner.trim());
237
+ } catch {
238
+ return null;
239
+ }
240
+ } else if (inner && typeof inner === "object") {
241
+ candidate = inner;
242
+ } else {
243
+ return null;
244
+ }
245
+ const parsed = SummaryWithRatingsSchema.safeParse(candidate);
246
+ return parsed.success ? parsed.data : null;
247
+ }
248
+
249
+ /**
250
+ * Fallback summary-text extractor for the hook's `claude -p` envelope. Used
251
+ * when {@link parseSummaryWithRatings} returns null — i.e., when the LLM
252
+ * returned a valid envelope but the inner payload either wasn't structured
253
+ * JSON (unstructured prompt path) OR was structured JSON whose ratings failed
254
+ * `SummaryWithRatingsSchema` validation (e.g., out-of-range scores).
255
+ *
256
+ * In the latter case `envelope.result` is the full inner JSON STRING such as
257
+ * `{"summary":"...","ratings":[...]}`; indexing that verbatim into agent
258
+ * memory would violate the step-4 contract that ratings are best-effort and
259
+ * the existing summary-indexing behavior remains unchanged. We extract the
260
+ * inner `summary` field if present, else return the inner string (treating
261
+ * it as plain summary text). NEVER throws.
262
+ */
263
+ export function extractSummaryFromClaudeStdout(claudeStdout: string): string {
264
+ let envelope: { result?: unknown };
265
+ try {
266
+ envelope = JSON.parse(claudeStdout) as { result?: unknown };
267
+ } catch {
268
+ return claudeStdout;
269
+ }
270
+ const inner = envelope.result;
271
+ if (typeof inner === "string") {
272
+ try {
273
+ const innerParsed = JSON.parse(inner.trim()) as { summary?: unknown };
274
+ if (innerParsed && typeof innerParsed.summary === "string") {
275
+ return innerParsed.summary;
276
+ }
277
+ } catch {
278
+ // inner wasn't JSON — treat it as plain summary text
279
+ }
280
+ return inner;
281
+ }
282
+ if (
283
+ inner &&
284
+ typeof inner === "object" &&
285
+ typeof (inner as { summary?: unknown }).summary === "string"
286
+ ) {
287
+ return (inner as { summary: string }).summary;
288
+ }
289
+ return claudeStdout;
290
+ }
291
+
292
+ /**
293
+ * `MEMORY_RATERS=...` includes `llm`? Used by the hook to gate the piggyback
294
+ * path — strict opt-in so existing deployments are byte-identical when unset.
295
+ */
296
+ export function isLlmRaterEnabled(): boolean {
297
+ const raw = process.env.MEMORY_RATERS;
298
+ if (!raw || raw.trim() === "") return false;
299
+ return raw
300
+ .split(",")
301
+ .map((s) => s.trim())
302
+ .includes("llm");
303
+ }
304
+
305
+ /** Memory snapshot returned by `GET /api/memory/retrievals`. */
306
+ export type RetrievalRow = {
307
+ id: string;
308
+ name: string;
309
+ content: string;
310
+ scope?: string;
311
+ similarity?: number | null;
312
+ retrievedAt?: string;
313
+ };
314
+
315
+ /**
316
+ * GET `/api/memory/retrievals?taskId=` — best-effort. Returns `[]` on any
317
+ * failure so a transient API outage never blocks the summary-indexing path.
318
+ */
319
+ export async function fetchRetrievalsForTask(opts: {
320
+ apiUrl: string;
321
+ apiKey: string;
322
+ agentId: string;
323
+ taskId: string;
324
+ fetchImpl?: typeof fetch;
325
+ }): Promise<RetrievalRow[]> {
326
+ const fetchFn = opts.fetchImpl ?? fetch;
327
+ try {
328
+ const url = `${opts.apiUrl}/api/memory/retrievals?taskId=${encodeURIComponent(opts.taskId)}`;
329
+ const res = await fetchFn(url, {
330
+ headers: {
331
+ "X-Agent-ID": opts.agentId,
332
+ ...(opts.apiKey ? { Authorization: `Bearer ${opts.apiKey}` } : {}),
333
+ },
334
+ });
335
+ if (!res.ok) {
336
+ console.error(
337
+ `[memory-rater:llm] GET /api/memory/retrievals failed: ${res.status} ${res.statusText}`,
338
+ );
339
+ return [];
340
+ }
341
+ const body = (await res.json()) as { results?: RetrievalRow[] };
342
+ return body.results ?? [];
343
+ } catch (err) {
344
+ console.error("[memory-rater:llm] fetchRetrievalsForTask threw:", (err as Error).message);
345
+ return [];
346
+ }
347
+ }
348
+
349
+ /**
350
+ * POST `/api/memory/rate` — best-effort. Logs on 4xx/5xx, never throws. The
351
+ * worker hook wraps the whole rating block in its own try/catch as a final
352
+ * line of defence — rater failure must never block summary indexing.
353
+ */
354
+ export async function postRatings(opts: {
355
+ apiUrl: string;
356
+ apiKey: string;
357
+ agentId: string;
358
+ taskId?: string;
359
+ events: RatingEvent[];
360
+ fetchImpl?: typeof fetch;
361
+ }): Promise<{ ok: boolean; status: number }> {
362
+ if (opts.events.length === 0) return { ok: true, status: 0 };
363
+ const fetchFn = opts.fetchImpl ?? fetch;
364
+ const events = opts.events.map((e) => ({
365
+ memoryId: e.memoryId,
366
+ signal: e.signal,
367
+ weight: e.weight,
368
+ source: e.source,
369
+ ...(e.reasoning !== undefined ? { reasoning: e.reasoning } : {}),
370
+ ...(e.referencesSource !== undefined ? { referencesSource: e.referencesSource } : {}),
371
+ ...(opts.taskId ? { taskId: opts.taskId } : {}),
372
+ }));
373
+ try {
374
+ const res = await fetchFn(`${opts.apiUrl}/api/memory/rate`, {
375
+ method: "POST",
376
+ headers: {
377
+ "Content-Type": "application/json",
378
+ "X-Agent-ID": opts.agentId,
379
+ ...(opts.apiKey ? { Authorization: `Bearer ${opts.apiKey}` } : {}),
380
+ },
381
+ body: JSON.stringify({ events }),
382
+ });
383
+ if (!res.ok) {
384
+ const text = await res.text().catch(() => "");
385
+ console.error(
386
+ `[memory-rater:llm] POST /api/memory/rate failed: ${res.status} ${res.statusText} ${text.slice(0, 200)}`,
387
+ );
388
+ }
389
+ return { ok: res.ok, status: res.status };
390
+ } catch (err) {
391
+ console.error("[memory-rater:llm] postRatings threw:", (err as Error).message);
392
+ return { ok: false, status: 0 };
393
+ }
394
+ }
@@ -0,0 +1,14 @@
1
+ import type { MemoryRater, RatingEvent } from "./types";
2
+
3
+ /**
4
+ * Default rater. Emits no events, makes no DB calls. Selected when
5
+ * MEMORY_RATERS is unset or empty so the framework defaults to behaving
6
+ * byte-identically to pre-rater builds.
7
+ */
8
+ export class NoopRater implements MemoryRater {
9
+ readonly name = "noop";
10
+
11
+ async rate(): Promise<RatingEvent[]> {
12
+ return [];
13
+ }
14
+ }
@@ -0,0 +1,86 @@
1
+ import { ExplicitSelfRatingRater } from "./explicit-self";
2
+ import { ImplicitCitationRater } from "./implicit-citation";
3
+ import { LlmRater } from "./llm";
4
+ import { NoopRater } from "./noop";
5
+ import type { MemoryRater } from "./types";
6
+
7
+ /**
8
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §4
9
+ *
10
+ * `MEMORY_RATERS` env — comma-separated list of rater names. Defaults to
11
+ * `[NoopRater]` when unset/empty so existing deployments stay byte-identical.
12
+ *
13
+ * `MEMORY_RATER_WEIGHTS` env — optional `name:multiplier,...` overrides.
14
+ * Multiplier is applied to every emitted RatingEvent.weight before
15
+ * `applyRating`. Default = 1.0.
16
+ *
17
+ * Each later step touches *only* its own line in the factory map:
18
+ * - step-1: noop only (this PR).
19
+ * - step-2: implicit-citation.
20
+ * - step-4: llm.
21
+ * - step-5: explicit-self.
22
+ *
23
+ * Unknown names are logged and skipped — startup never fails on this.
24
+ */
25
+
26
+ type RaterFactory = () => MemoryRater;
27
+
28
+ const FACTORIES: Record<string, RaterFactory> = {
29
+ noop: () => new NoopRater(),
30
+ "implicit-citation": () => new ImplicitCitationRater(),
31
+ "explicit-self": () => new ExplicitSelfRatingRater(),
32
+ llm: () => new LlmRater(),
33
+ };
34
+
35
+ /**
36
+ * Raters whose `rate(ctx)` runs server-side (in `store-progress.ts` after task
37
+ * completion). Worker-driven raters (e.g. step-4's `LlmRater`, step-5's
38
+ * `ExplicitSelfRater`) emit events from outside this set and POST them to
39
+ * `/api/memory/rate`. The store-progress hook only fires raters listed here.
40
+ *
41
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-2.md §6
42
+ */
43
+ export const SERVER_RATERS = new Set<string>(["implicit-citation"]);
44
+
45
+ export function getRegisteredRaters(): MemoryRater[] {
46
+ const raw = process.env.MEMORY_RATERS;
47
+ if (!raw || raw.trim() === "") {
48
+ return [new NoopRater()];
49
+ }
50
+
51
+ const names = raw
52
+ .split(",")
53
+ .map((s) => s.trim())
54
+ .filter((s) => s.length > 0);
55
+
56
+ const raters: MemoryRater[] = [];
57
+ for (const name of names) {
58
+ const factory = FACTORIES[name];
59
+ if (!factory) {
60
+ console.warn(`[memory-rater] Unknown rater "${name}" in MEMORY_RATERS — skipping`);
61
+ continue;
62
+ }
63
+ raters.push(factory());
64
+ }
65
+
66
+ if (raters.length === 0) {
67
+ return [new NoopRater()];
68
+ }
69
+ return raters;
70
+ }
71
+
72
+ export function getRaterWeightMultiplier(name: string): number {
73
+ const raw = process.env.MEMORY_RATER_WEIGHTS;
74
+ if (!raw || raw.trim() === "") return 1.0;
75
+
76
+ for (const pair of raw.split(",")) {
77
+ const trimmed = pair.trim();
78
+ if (trimmed === "") continue;
79
+ const [rawName, rawMult] = trimmed.split(":");
80
+ if (!rawName || !rawMult) continue;
81
+ if (rawName.trim() !== name) continue;
82
+ const mult = Number(rawMult);
83
+ if (Number.isFinite(mult) && mult >= 0) return mult;
84
+ }
85
+ return 1.0;
86
+ }