@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +1264 -46
  3. package/package.json +2 -2
  4. package/src/be/db.ts +563 -9
  5. package/src/be/memory/edges-store.ts +69 -0
  6. package/src/be/memory/providers/sqlite-store.ts +4 -0
  7. package/src/be/memory/raters/explicit-self.ts +22 -0
  8. package/src/be/memory/raters/implicit-citation.ts +44 -0
  9. package/src/be/memory/raters/llm-client.ts +172 -0
  10. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  11. package/src/be/memory/raters/llm.ts +375 -0
  12. package/src/be/memory/raters/noop.ts +14 -0
  13. package/src/be/memory/raters/registry.ts +86 -0
  14. package/src/be/memory/raters/retrieval.ts +88 -0
  15. package/src/be/memory/raters/run-server-raters.ts +97 -0
  16. package/src/be/memory/raters/store.ts +228 -0
  17. package/src/be/memory/raters/types.ts +101 -0
  18. package/src/be/memory/reranker.ts +32 -2
  19. package/src/be/memory/retrieval-store.ts +116 -0
  20. package/src/be/memory/types.ts +3 -0
  21. package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
  22. package/src/be/migrations/052_memory_edges.sql +36 -0
  23. package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
  24. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  25. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  26. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  27. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  28. package/src/be/migrations/058_task_templates.sql +31 -0
  29. package/src/be/swarm-config-guard.ts +24 -0
  30. package/src/commands/credential-wait.ts +186 -0
  31. package/src/commands/provider-credentials.ts +434 -0
  32. package/src/commands/runner.ts +253 -21
  33. package/src/hooks/hook.ts +143 -66
  34. package/src/http/agents.ts +191 -1
  35. package/src/http/config.ts +11 -1
  36. package/src/http/core.ts +5 -0
  37. package/src/http/inbox-state.ts +89 -0
  38. package/src/http/index.ts +10 -0
  39. package/src/http/memory.ts +230 -1
  40. package/src/http/sessions.ts +86 -0
  41. package/src/http/status.ts +665 -0
  42. package/src/http/task-templates.ts +51 -0
  43. package/src/http/tasks.ts +85 -5
  44. package/src/http/users.ts +134 -0
  45. package/src/prompts/memories.ts +62 -0
  46. package/src/providers/claude-adapter.ts +22 -0
  47. package/src/providers/claude-managed-adapter.ts +24 -0
  48. package/src/providers/codex-adapter.ts +43 -1
  49. package/src/providers/devin-adapter.ts +18 -0
  50. package/src/providers/index.ts +7 -0
  51. package/src/providers/opencode-adapter.ts +60 -0
  52. package/src/providers/pi-mono-adapter.ts +71 -0
  53. package/src/providers/types.ts +34 -0
  54. package/src/server.ts +2 -0
  55. package/src/slack/handlers.ts +0 -1
  56. package/src/tests/agents-harness-provider.test.ts +333 -0
  57. package/src/tests/credential-check.test.ts +367 -0
  58. package/src/tests/credential-status-api.test.ts +223 -0
  59. package/src/tests/credential-status-routing.test.ts +150 -0
  60. package/src/tests/credential-wait.test.ts +282 -0
  61. package/src/tests/harness-provider-resolution.test.ts +242 -0
  62. package/src/tests/jira-sync.test.ts +1 -1
  63. package/src/tests/memory-edges.test.ts +722 -0
  64. package/src/tests/memory-rate-endpoint.test.ts +330 -0
  65. package/src/tests/memory-rate-tool.test.ts +252 -0
  66. package/src/tests/memory-rater-e2e.test.ts +578 -0
  67. package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
  68. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  69. package/src/tests/memory-rater-llm.test.ts +964 -0
  70. package/src/tests/memory-rater-store.test.ts +249 -0
  71. package/src/tests/memory-reranker.test.ts +161 -2
  72. package/src/tests/migration-runner-regressions.test.ts +17 -2
  73. package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
  74. package/src/tests/run-server-raters.test.ts +291 -0
  75. package/src/tests/sessions.test.ts +141 -0
  76. package/src/tests/status.test.ts +843 -0
  77. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  78. package/src/tests/template-recommendations.test.ts +148 -0
  79. package/src/tests/tool-annotations.test.ts +2 -2
  80. package/src/tests/use-dismissible-card.test.ts +140 -0
  81. package/src/tools/memory-rate.ts +166 -0
  82. package/src/tools/memory-search.ts +18 -0
  83. package/src/tools/store-progress.ts +37 -0
  84. package/src/tools/swarm-config/set-config.ts +17 -1
  85. package/src/tools/tool-config.ts +1 -0
  86. package/src/types.ts +122 -1
  87. package/src/utils/harness-provider.ts +32 -0
  88. package/tsconfig.json +0 -2
@@ -0,0 +1,228 @@
1
+ import { ensure } from "@desplega.ai/business-use";
2
+ import { getDb } from "@/be/db";
3
+ import { type RatingEvent, REFERENCES_SOURCE_MAX_LENGTH, sanitizeReferencesSource } from "./types";
4
+
5
+ /**
6
+ * Single chokepoint for posterior updates and audit-log writes.
7
+ *
8
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §3
9
+ *
10
+ * For every event in `events`:
11
+ * - alphaDelta = max(0, signal) * weight (rewards usefulness)
12
+ * - betaDelta = max(0, -signal) * weight (rewards anti-usefulness)
13
+ * - UPDATE agent_memory SET alpha = alpha + ?, beta = beta + ? WHERE id = ?
14
+ * - INSERT INTO memory_rating (...) VALUES (...)
15
+ * - When `referencesSource` is present (step-6 §3): UPSERT into
16
+ * agent_memory_edge with the SAME (alphaDelta, betaDelta) so the edge's
17
+ * own posterior tracks evidence the same way the memory's does.
18
+ *
19
+ * The whole batch runs in a single transaction so partial failure rolls back
20
+ * (commutativity of the Beta update means no idempotency check is needed —
21
+ * duplicate batches just shift the posterior further; the partial unique index
22
+ * on `(taskId, memoryId) WHERE source='explicit-self'` is the spam guard).
23
+ *
24
+ * Rejection semantics — events that fail validation are RETURNED in `rejected`,
25
+ * not thrown. This lets HTTP/MCP layers surface partial success cleanly.
26
+ */
27
+ export type ApplyRatingResult = {
28
+ applied: number;
29
+ rejected: { event: RatingEvent; reason: string }[];
30
+ };
31
+
32
+ export type ApplyRatingContext = {
33
+ taskId?: string;
34
+ };
35
+
36
+ export class ExplicitSelfDuplicateError extends Error {
37
+ constructor(
38
+ message: string,
39
+ public readonly event: RatingEvent,
40
+ ) {
41
+ super(message);
42
+ this.name = "ExplicitSelfDuplicateError";
43
+ }
44
+ }
45
+
46
+ export function applyRating(
47
+ events: RatingEvent[],
48
+ ctx: ApplyRatingContext = {},
49
+ ): ApplyRatingResult {
50
+ if (events.length === 0) {
51
+ return { applied: 0, rejected: [] };
52
+ }
53
+
54
+ const db = getDb();
55
+ const accepted: { event: RatingEvent; sanitizedReferencesSource: string | null }[] = [];
56
+ const rejected: ApplyRatingResult["rejected"] = [];
57
+
58
+ for (const event of events) {
59
+ const reason = validate(event);
60
+ if (reason) {
61
+ rejected.push({ event, reason });
62
+ continue;
63
+ }
64
+ let sanitizedReferencesSource: string | null = null;
65
+ if (event.referencesSource !== undefined) {
66
+ if (event.referencesSource.length === 0) {
67
+ rejected.push({ event, reason: "referencesSource must be non-empty" });
68
+ continue;
69
+ }
70
+ if (event.referencesSource.length > REFERENCES_SOURCE_MAX_LENGTH) {
71
+ rejected.push({
72
+ event,
73
+ reason: `referencesSource exceeds ${REFERENCES_SOURCE_MAX_LENGTH} chars`,
74
+ });
75
+ continue;
76
+ }
77
+ sanitizedReferencesSource = sanitizeReferencesSource(event.referencesSource);
78
+ if (sanitizedReferencesSource === null) {
79
+ rejected.push({
80
+ event,
81
+ reason: "referencesSource contains a NUL byte or strips to empty",
82
+ });
83
+ continue;
84
+ }
85
+ }
86
+ accepted.push({ event, sanitizedReferencesSource });
87
+ }
88
+
89
+ if (accepted.length === 0) {
90
+ return { applied: 0, rejected };
91
+ }
92
+
93
+ // One transaction for the whole batch. SQLite WAL handles concurrent
94
+ // writers — Beta updates are commutative, so racing applies converge.
95
+ const updateMemory = db.prepare(
96
+ "UPDATE agent_memory SET alpha = alpha + ?, beta = beta + ? WHERE id = ?",
97
+ );
98
+ const checkExists = db.prepare<{ id: string }, [string]>(
99
+ "SELECT id FROM agent_memory WHERE id = ?",
100
+ );
101
+ const insertRating = db.prepare(
102
+ `INSERT INTO memory_rating
103
+ (id, memoryId, taskId, source, signal, weight, reasoning, createdAt)
104
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
105
+ );
106
+ // Step-6 §3 — UPSERT the edge with the SAME deltas as the memory row.
107
+ // The `- 1.0` corrections in DO UPDATE undo the default-prior offset that
108
+ // the INSERT arm baked into excluded.alpha/excluded.beta. Net effect: on
109
+ // insert, alpha/beta start at `1 + delta`; on update, the existing
110
+ // (alpha, beta) simply gain (delta_alpha, delta_beta).
111
+ const upsertEdge = db.prepare(
112
+ `INSERT INTO agent_memory_edge (from_id, to_id, type, alpha, beta, createdAt)
113
+ VALUES (?, ?, 'references-source', ?, ?, ?)
114
+ ON CONFLICT(from_id, to_id, type) DO UPDATE SET
115
+ alpha = alpha + excluded.alpha - 1.0,
116
+ beta = beta + excluded.beta - 1.0`,
117
+ );
118
+
119
+ type AppliedEntry = { event: RatingEvent; sanitizedReferencesSource: string | null };
120
+
121
+ const applyTx = db.transaction(() => {
122
+ let applied = 0;
123
+ const lateRejects: ApplyRatingResult["rejected"] = [];
124
+ const appliedEvents: AppliedEntry[] = [];
125
+ for (const { event, sanitizedReferencesSource } of accepted) {
126
+ const exists = checkExists.get(event.memoryId);
127
+ if (!exists) {
128
+ lateRejects.push({ event, reason: "memoryId not found in agent_memory" });
129
+ continue;
130
+ }
131
+ const alphaDelta = Math.max(0, event.signal) * event.weight;
132
+ const betaDelta = Math.max(0, -event.signal) * event.weight;
133
+ updateMemory.run(alphaDelta, betaDelta, event.memoryId);
134
+ try {
135
+ insertRating.run(
136
+ crypto.randomUUID(),
137
+ event.memoryId,
138
+ ctx.taskId ?? null,
139
+ event.source,
140
+ event.signal,
141
+ event.weight,
142
+ event.reasoning ?? null,
143
+ new Date().toISOString(),
144
+ );
145
+ } catch (err) {
146
+ // Partial unique index on (taskId, memoryId) WHERE source='explicit-self'
147
+ // is the only constraint that can fire here.
148
+ if (isUniqueConstraintError(err)) {
149
+ throw new ExplicitSelfDuplicateError(
150
+ `duplicate explicit-self rating for memoryId=${event.memoryId} taskId=${ctx.taskId}`,
151
+ event,
152
+ );
153
+ }
154
+ throw err;
155
+ }
156
+ if (sanitizedReferencesSource !== null) {
157
+ upsertEdge.run(
158
+ event.memoryId,
159
+ sanitizedReferencesSource,
160
+ 1.0 + alphaDelta,
161
+ 1.0 + betaDelta,
162
+ new Date().toISOString(),
163
+ );
164
+ }
165
+ appliedEvents.push({ event, sanitizedReferencesSource });
166
+ applied += 1;
167
+ }
168
+ return { applied, lateRejects, appliedEvents };
169
+ });
170
+
171
+ const { applied, lateRejects, appliedEvents } = applyTx();
172
+
173
+ // Business-use instrumentation — emit ONE `memory_rated` event in the `task`
174
+ // flow per applied rating. Placed OUTSIDE the transaction (per CLAUDE.md BU
175
+ // block), validator self-contained (references only `data`). Skipped when
176
+ // `ctx.taskId` is absent because the `task` flow is keyed on taskId.
177
+ if (ctx.taskId && appliedEvents.length > 0) {
178
+ for (const { event, sanitizedReferencesSource } of appliedEvents) {
179
+ ensure({
180
+ id: "memory_rated",
181
+ flow: "task",
182
+ runId: ctx.taskId,
183
+ data: {
184
+ memoryId: event.memoryId,
185
+ source: event.source,
186
+ signal: event.signal,
187
+ weight: event.weight,
188
+ hasReferencesSource: sanitizedReferencesSource !== null,
189
+ },
190
+ validator: (data) =>
191
+ typeof data.memoryId === "string" &&
192
+ data.memoryId.length > 0 &&
193
+ typeof data.source === "string" &&
194
+ data.source.length > 0 &&
195
+ typeof data.signal === "number" &&
196
+ data.signal >= -1 &&
197
+ data.signal <= 1 &&
198
+ typeof data.weight === "number" &&
199
+ data.weight >= 0 &&
200
+ data.weight <= 1,
201
+ });
202
+ }
203
+ }
204
+
205
+ return { applied, rejected: [...rejected, ...lateRejects] };
206
+ }
207
+
208
+ function validate(event: RatingEvent): string | null {
209
+ if (!event.source || event.source.trim() === "") {
210
+ return "source is required";
211
+ }
212
+ if (!Number.isFinite(event.signal) || event.signal < -1 || event.signal > 1) {
213
+ return "signal must be in [-1, +1]";
214
+ }
215
+ if (!Number.isFinite(event.weight) || event.weight < 0 || event.weight > 1) {
216
+ return "weight must be in [0, 1]";
217
+ }
218
+ if (!event.memoryId) {
219
+ return "memoryId is required";
220
+ }
221
+ return null;
222
+ }
223
+
224
+ function isUniqueConstraintError(err: unknown): boolean {
225
+ if (!(err instanceof Error)) return false;
226
+ // bun:sqlite surfaces SQLITE_CONSTRAINT_UNIQUE in the message.
227
+ return /UNIQUE constraint failed|SQLITE_CONSTRAINT/i.test(err.message);
228
+ }
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Memory rater interface — pluggable signal source for the Beta-Binomial
3
+ * usefulness posteriors on agent_memory rows.
4
+ *
5
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §2
6
+ *
7
+ * Each rater returns RatingEvent[] from `rate(ctx)`. The framework
8
+ * (`applyRating` in ./store.ts) is the single chokepoint that:
9
+ * - validates signal ∈ [-1, +1] and weight ∈ [0, 1],
10
+ * - stamps `source = rater.name` (raters MUST NOT populate this — defence
11
+ * against rater spoofing),
12
+ * - applies the Beta posterior update atomically, and
13
+ * - writes the audit row to `memory_rating`.
14
+ */
15
+
16
+ export interface MemoryRater {
17
+ readonly name: string;
18
+ rate(ctx: RatingContext): Promise<RatingEvent[]>;
19
+ }
20
+
21
+ export type RatingEvent = {
22
+ memoryId: string;
23
+ /** Raw signal in [-1, +1]. Positive = useful, negative = misleading. */
24
+ signal: number;
25
+ /** Confidence in [0, 1]. Clipped delta = max(0, ±signal) * weight. */
26
+ weight: number;
27
+ /**
28
+ * Rater identity — populated by the framework, NOT by the rater itself.
29
+ * Raters that write a non-empty `source` are rejected by `applyRating`.
30
+ */
31
+ source: string;
32
+ /** Optional human-readable reason. Surfaced by LlmRater + ExplicitSelfRater. */
33
+ reasoning?: string;
34
+ /**
35
+ * Optional free-form external source identifier (v1.5 wedge — step-6).
36
+ *
37
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §1-§2
38
+ *
39
+ * When present, `applyRating` UPSERTs into `agent_memory_edge` with
40
+ * `type='references-source'`, applying the same Beta posterior delta as
41
+ * the memory row's `(alpha, beta)`. Convention `<source>:<identifier>`
42
+ * (e.g. `github:owner/repo#N`, `linear:KEY-N`, `customer:<slug>`) is
43
+ * documentation-only — server does NOT validate prefixes. Validation is
44
+ * write-site only: non-empty, ≤512 chars, control-char strip, no NUL.
45
+ */
46
+ referencesSource?: string;
47
+ };
48
+
49
+ /**
50
+ * Maximum byte length for `referencesSource` strings (Q2 contract). Encoded
51
+ * here once so the HTTP Zod schema, the MCP tool input schema, the LlmRater
52
+ * Zod schema, and `sanitizeReferencesSource` can't drift.
53
+ */
54
+ export const REFERENCES_SOURCE_MAX_LENGTH = 512;
55
+
56
+ const NUL_CHAR_CODE = 0x00;
57
+ const DEL_CHAR_CODE = 0x7f;
58
+ const FIRST_PRINTABLE_ASCII = 0x20;
59
+
60
+ /**
61
+ * Strip control characters from a `referencesSource` string and reject NUL
62
+ * bytes outright (Q2 free-form contract — step-6.md §2).
63
+ *
64
+ * - Returns the cleaned string when valid.
65
+ * - Returns `null` when the input contains a NUL byte (charCode 0x00) or
66
+ * when stripping control chars produces an empty string. Callers treat
67
+ * `null` as a validation failure (Zod transform → `z.NEVER`, applyRating
68
+ * → reject).
69
+ *
70
+ * Length is checked OUTSIDE this helper (Zod `.max(512)` runs first); the
71
+ * helper itself does not enforce a max so callers can apply different
72
+ * policies.
73
+ */
74
+ export function sanitizeReferencesSource(input: string): string | null {
75
+ let stripped = "";
76
+ for (let i = 0; i < input.length; i++) {
77
+ const code = input.charCodeAt(i);
78
+ if (code === NUL_CHAR_CODE) return null;
79
+ if (code < FIRST_PRINTABLE_ASCII || code === DEL_CHAR_CODE) {
80
+ // Non-NUL C0 / DEL — silently stripped.
81
+ continue;
82
+ }
83
+ stripped += input[i];
84
+ }
85
+ if (stripped.length === 0) return null;
86
+ return stripped;
87
+ }
88
+
89
+ export type RatingContext = {
90
+ taskId?: string;
91
+ agentId: string;
92
+ sessionId?: string;
93
+ /** Memories that were retrieved during this task; raters score subsets of these. */
94
+ retrievedMemoryIds: string[];
95
+ /**
96
+ * Server-side raters get session_logs content here; worker-side raters get
97
+ * the LLM summary text or the explicit user input. Null when no evidence is
98
+ * available (e.g. NoopRater).
99
+ */
100
+ evidence: string | null;
101
+ };
@@ -32,13 +32,43 @@ export function accessBoost(accessedAt: string, accessCount: number, now: Date):
32
32
  }
33
33
 
34
34
  /**
35
- * Final score combining similarity, recency decay, and access boost.
35
+ * Beta-Binomial usefulness factor for reranking.
36
+ *
37
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §5
38
+ *
39
+ * At Beta(1,1) (default prior) returns 1.0 exactly — strict no-op vs.
40
+ * pre-rater behaviour. Proven memories climb up to 2.0. Floored at the value
41
+ * of MEMORY_DEMOTION_FLOOR (default 1.0 = no demotion) — the default preserves
42
+ * brainstorm intent (memories are demoted toward the floor but never deleted
43
+ * on the reranker path) and is configurable per deployment.
44
+ */
45
+ function readDemotionFloor(): number {
46
+ const raw = process.env.MEMORY_DEMOTION_FLOOR;
47
+ const n = raw == null || raw === "" ? 1.0 : Number(raw);
48
+ return Number.isFinite(n) ? n : 1.0;
49
+ }
50
+
51
+ export function usefulness(alpha: number, beta: number): number {
52
+ const denom = alpha + beta;
53
+ if (denom <= 0) return 1.0;
54
+ const mean = alpha / denom;
55
+ return Math.max(readDemotionFloor(), Math.min(2.0, 2 * mean));
56
+ }
57
+
58
+ /**
59
+ * Final score combining similarity, recency decay, access boost, and
60
+ * Beta-Binomial usefulness. With default Beta(1,1) and default
61
+ * MEMORY_DEMOTION_FLOOR=1.0, the usefulness factor is exactly 1.0 and this
62
+ * computation matches the pre-rater behaviour byte-for-byte.
63
+ *
64
+ * v2: optional edge-aware boost — see thoughts/taras/plans/2026-05-05-memory-rater-v1.5/root.md
36
65
  */
37
66
  export function computeScore(candidate: MemoryCandidate, now: Date): number {
38
67
  return (
39
68
  candidate.similarity *
40
69
  recencyDecay(candidate.createdAt, now) *
41
- accessBoost(candidate.accessedAt, candidate.accessCount, now)
70
+ accessBoost(candidate.accessedAt, candidate.accessCount, now) *
71
+ usefulness(candidate.alpha, candidate.beta)
42
72
  );
43
73
  }
44
74
 
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Read-side query helpers for the `memory_retrieval` audit log.
3
+ *
4
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-3.md
5
+ *
6
+ * Step-2 owns the write path (`searchMemory` populates `memory_retrieval`
7
+ * when `X-Source-Task-ID` is set). Step-3 surfaces these reads to:
8
+ * - GET /api/memory/retrievals — worker raters list memories surfaced for
9
+ * a task/session so they can score them.
10
+ * - POST /api/memory/rate — R6 spam-guard checks that an `explicit-self`
11
+ * rating targets a memory that was actually retrieved for the task.
12
+ *
13
+ * Server-side only. Route handlers should call these functions instead of
14
+ * preparing SQL directly so the query is reusable and typed in one place.
15
+ */
16
+ import { getDb } from "@/be/db";
17
+
18
+ /** Max chars of `agent_memory.content` returned in retrieval listings. */
19
+ const RETRIEVAL_CONTENT_SNIPPET_CHARS = 500;
20
+
21
+ /** Max retrievals returned per request — matches the typical session set. */
22
+ const RETRIEVAL_LIST_LIMIT = 50;
23
+
24
+ export type RetrievalListRow = {
25
+ /** `agent_memory.id` — the memory that was retrieved. */
26
+ id: string;
27
+ name: string;
28
+ /** Up to RETRIEVAL_CONTENT_SNIPPET_CHARS chars of `agent_memory.content`. */
29
+ content: string;
30
+ scope: string;
31
+ /**
32
+ * `agent_memory.source` — `'task_completion' | 'session_summary' | 'manual'
33
+ * | 'file_index'`. Surfaced so the worker rater can scope dedup to the
34
+ * memory class that exhibits scheduled-task self-similarity.
35
+ */
36
+ source: string;
37
+ /**
38
+ * `agent_tasks.scheduleId` for the source task that wrote this memory, or
39
+ * `null` if the memory has no source task or the task wasn't a scheduled
40
+ * run. Worker raters use this as a precise cron-clone discriminator —
41
+ * memories sharing a non-null `scheduleId` are by definition from the same
42
+ * scheduled job and safe to dedupe.
43
+ */
44
+ scheduleId: string | null;
45
+ similarity: number | null;
46
+ retrievedAt: string;
47
+ };
48
+
49
+ export type RetrievalListFilter = {
50
+ taskId?: string;
51
+ sessionId?: string;
52
+ };
53
+
54
+ /**
55
+ * List memories retrieved for a given (taskId | sessionId), filtered by the
56
+ * requesting agent for defence-in-depth (the JOIN on `mr.agentId` keeps
57
+ * cross-agent rows out even though the worker is trusted).
58
+ *
59
+ * Returns at most {@link RETRIEVAL_LIST_LIMIT} rows, newest-first by
60
+ * `retrievedAt`. Caller MUST pass at least one of `taskId` / `sessionId`;
61
+ * the route's Zod schema enforces this — this function does not re-validate.
62
+ */
63
+ export function getRetrievalsForAgent(
64
+ agentId: string,
65
+ filter: RetrievalListFilter,
66
+ ): RetrievalListRow[] {
67
+ const conditions: string[] = ["mr.agentId = ?"];
68
+ const params: (string | number)[] = [agentId];
69
+ if (filter.taskId) {
70
+ conditions.push("mr.taskId = ?");
71
+ params.push(filter.taskId);
72
+ }
73
+ if (filter.sessionId) {
74
+ conditions.push("mr.sessionId = ?");
75
+ params.push(filter.sessionId);
76
+ }
77
+
78
+ // LEFT JOIN agent_tasks so we can surface `scheduleId` to worker raters —
79
+ // a non-null `scheduleId` is the precise cron-clone discriminator that
80
+ // `dedupeRetrievalsForRater` keys on. The LEFT keeps memories with no
81
+ // source task (manual / file_index) in the result set.
82
+ const sql = `
83
+ SELECT am.id AS id,
84
+ am.name AS name,
85
+ substr(am.content, 1, ?) AS content,
86
+ am.scope AS scope,
87
+ am.source AS source,
88
+ at.scheduleId AS scheduleId,
89
+ mr.similarity AS similarity,
90
+ mr.retrievedAt AS retrievedAt
91
+ FROM memory_retrieval mr
92
+ INNER JOIN agent_memory am ON am.id = mr.memoryId
93
+ LEFT JOIN agent_tasks at ON at.id = am.sourceTaskId
94
+ WHERE ${conditions.join(" AND ")}
95
+ ORDER BY mr.retrievedAt DESC
96
+ LIMIT ?
97
+ `;
98
+
99
+ return getDb()
100
+ .prepare<RetrievalListRow, (string | number)[]>(sql)
101
+ .all(RETRIEVAL_CONTENT_SNIPPET_CHARS, ...params, RETRIEVAL_LIST_LIMIT);
102
+ }
103
+
104
+ /**
105
+ * R6 spam-guard read: was the given `(taskId, memoryId)` actually surfaced
106
+ * to the agent during the task? Used by the rate endpoint to reject
107
+ * `explicit-self` ratings for memories the agent never saw.
108
+ */
109
+ export function hasRetrievalForTask(taskId: string, memoryId: string): boolean {
110
+ const row = getDb()
111
+ .prepare<{ id: string }, [string, string]>(
112
+ "SELECT id FROM memory_retrieval WHERE taskId = ? AND memoryId = ? LIMIT 1",
113
+ )
114
+ .get(taskId, memoryId);
115
+ return row != null;
116
+ }
@@ -52,6 +52,9 @@ export interface MemoryCandidate extends AgentMemory {
52
52
  accessCount: number;
53
53
  expiresAt: string | null;
54
54
  embeddingModel: string | null;
55
+ /** Beta-Binomial usefulness posterior. Default Beta(1,1) → reranker no-op. */
56
+ alpha: number;
57
+ beta: number;
55
58
  }
56
59
 
57
60
  export interface MemorySearchOptions {
@@ -0,0 +1,67 @@
1
+ -- 051_memory_posteriors_and_retrieval.sql
2
+ -- Memory rater v1.5 — brainstorm spine.
3
+ --
4
+ -- Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md
5
+ -- (The plan referred to this as `049_*`; numbers 049 and 050 were taken by the
6
+ -- wait-node feature on main between plan-write and plan-implement, so this
7
+ -- migration ships at 051 — semantics are unchanged.)
8
+ --
9
+ -- Adds:
10
+ -- * agent_memory.alpha / beta — Beta-Binomial usefulness posteriors
11
+ -- (Beta(1,1) prior → reranker no-op until
12
+ -- raters move them).
13
+ -- * memory_retrieval — audit log of which memories were surfaced
14
+ -- to which task (used by ImplicitCitationRater
15
+ -- + worker rating endpoints in steps 2/3).
16
+ -- * memory_rating — append-only audit of every RatingEvent the
17
+ -- framework applied. Hot-path posteriors live
18
+ -- on agent_memory; this table preserves the
19
+ -- signal/weight/source for offline analysis.
20
+ --
21
+ -- Spam guard (R6): partial unique index on (taskId, memoryId) WHERE source =
22
+ -- 'explicit-self'. Enforces "at most one explicit-self rating per (task, memory)"
23
+ -- at the DB layer; HTTP/MCP can surface SQLITE_CONSTRAINT as 409.
24
+ --
25
+ -- FK target is agent_tasks(id) — the brainstorm referenced `tasks(id)` but the
26
+ -- actual table name is agent_tasks (see Deviation A in step-1.md).
27
+
28
+ -- 1. Beta posteriors on every memory row (default Beta(1,1) → usefulness 1.0).
29
+ ALTER TABLE agent_memory ADD COLUMN alpha REAL NOT NULL DEFAULT 1.0;
30
+ ALTER TABLE agent_memory ADD COLUMN beta REAL NOT NULL DEFAULT 1.0;
31
+
32
+ -- 2. Retrieval audit — populated by /api/memory/search when X-Source-Task-ID
33
+ -- is present (wired in step-2). Created here so step-2 can land in parallel.
34
+ CREATE TABLE IF NOT EXISTS memory_retrieval (
35
+ id TEXT PRIMARY KEY,
36
+ taskId TEXT,
37
+ agentId TEXT NOT NULL,
38
+ sessionId TEXT,
39
+ memoryId TEXT NOT NULL,
40
+ similarity REAL,
41
+ retrievedAt TEXT NOT NULL,
42
+ FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE CASCADE
43
+ );
44
+ CREATE INDEX IF NOT EXISTS idx_memret_task ON memory_retrieval(taskId);
45
+ CREATE INDEX IF NOT EXISTS idx_memret_agent ON memory_retrieval(agentId);
46
+ CREATE INDEX IF NOT EXISTS idx_memret_memory ON memory_retrieval(memoryId);
47
+
48
+ -- 3. Rating audit — every applied RatingEvent. `source` is the rater name,
49
+ -- set by applyRating (raters MUST NOT populate it themselves).
50
+ CREATE TABLE IF NOT EXISTS memory_rating (
51
+ id TEXT PRIMARY KEY,
52
+ memoryId TEXT NOT NULL,
53
+ taskId TEXT,
54
+ source TEXT NOT NULL,
55
+ signal REAL NOT NULL,
56
+ weight REAL NOT NULL,
57
+ reasoning TEXT,
58
+ createdAt TEXT NOT NULL,
59
+ FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE CASCADE
60
+ );
61
+ CREATE INDEX IF NOT EXISTS idx_memrat_memory ON memory_rating(memoryId);
62
+ CREATE INDEX IF NOT EXISTS idx_memrat_task ON memory_rating(taskId);
63
+
64
+ -- DB-owned spam guard (R6): one explicit-self per (taskId, memoryId).
65
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_rating_explicit_unique
66
+ ON memory_rating(taskId, memoryId)
67
+ WHERE source = 'explicit-self';
@@ -0,0 +1,36 @@
1
+ -- 052_memory_edges.sql
2
+ -- Memory rater v1.5 step-6 — `references-source` edges, lite (v1.5 wedge).
3
+ --
4
+ -- Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §1
5
+ -- (The plan referred to this as `050_*`; numbers 049–051 were taken on main
6
+ -- between plan-write and step-6 implement, so this migration ships at 052 —
7
+ -- semantics are unchanged.)
8
+ --
9
+ -- Adds:
10
+ -- * agent_memory_edge — directed edges from a memory to an external entity,
11
+ -- with their own Beta-Binomial usefulness posteriors.
12
+ -- v1.5 ships exactly one edge type — references-source.
13
+ --
14
+ -- Q2 LOCKED (per step-6.md §1) — `to_id` is a free-form TEXT column. No closed
15
+ -- enum, no parser, no migration when a new integration shows up. Convention is
16
+ -- documented as `<source>:<identifier>` (e.g. github:owner/repo#N,
17
+ -- linear:KEY-N, customer:<slug>) but enforced only at write-site (≤512 chars,
18
+ -- control-char strip, no NUL).
19
+ --
20
+ -- The `CHECK (type = 'references-source')` constraint is intentionally
21
+ -- restrictive — lifting it = a forward migration that drops + recreates the
22
+ -- constraint with the v2 enum. Edge GC + multi-type edges are reserved for v2.
23
+
24
+ CREATE TABLE IF NOT EXISTS agent_memory_edge (
25
+ from_id TEXT NOT NULL, -- memory id
26
+ to_id TEXT NOT NULL, -- free-form external entity id (Q2 contract)
27
+ type TEXT NOT NULL CHECK (type = 'references-source'), -- v1.5: ONE type only
28
+ alpha REAL NOT NULL DEFAULT 1.0,
29
+ beta REAL NOT NULL DEFAULT 1.0,
30
+ createdAt TEXT NOT NULL,
31
+ PRIMARY KEY (from_id, to_id, type),
32
+ FOREIGN KEY (from_id) REFERENCES agent_memory(id) ON DELETE CASCADE
33
+ );
34
+ CREATE INDEX IF NOT EXISTS idx_memedge_from ON agent_memory_edge(from_id);
35
+ CREATE INDEX IF NOT EXISTS idx_memedge_to ON agent_memory_edge(to_id);
36
+ CREATE INDEX IF NOT EXISTS idx_memedge_type ON agent_memory_edge(type);
@@ -0,0 +1,61 @@
1
+ -- 053_agent_waiting_for_credentials_status.sql
2
+ --
3
+ -- Phase 3 of the worker credential safe-loop plan
4
+ -- (thoughts/taras/plans/2026-05-06-worker-credential-safe-loop.md).
5
+ --
6
+ -- Extend the `agents.status` enum with `waiting_for_credentials` and add a
7
+ -- `credentialMissing` JSON column that carries the list of env-var names
8
+ -- the worker is blocked on. We extend the existing status axis rather than
9
+ -- adding a parallel column because:
10
+ -- - All four states live on the same "is this agent reachable AND
11
+ -- willing to claim work?" axis.
12
+ -- - The dispatcher's capacity predicate already filters by
13
+ -- `status === 'idle'`; the new value is implicitly excluded with no
14
+ -- code change.
15
+ -- - Avoids JOIN-or-AND-condition churn in every read site.
16
+ --
17
+ -- SQLite cannot ALTER a CHECK constraint in place, so we rebuild the table.
18
+
19
+ -- 1. Create the new table with the expanded CHECK and the new column.
20
+ CREATE TABLE agents_new (
21
+ id TEXT PRIMARY KEY,
22
+ name TEXT NOT NULL,
23
+ isLead INTEGER NOT NULL DEFAULT 0,
24
+ status TEXT NOT NULL
25
+ CHECK(status IN ('idle', 'busy', 'offline', 'waiting_for_credentials')),
26
+ description TEXT,
27
+ role TEXT,
28
+ capabilities TEXT DEFAULT '[]',
29
+ maxTasks INTEGER DEFAULT 1,
30
+ emptyPollCount INTEGER DEFAULT 0,
31
+ claudeMd TEXT,
32
+ soulMd TEXT,
33
+ identityMd TEXT,
34
+ setupScript TEXT,
35
+ toolsMd TEXT,
36
+ lastActivityAt TEXT,
37
+ createdAt TEXT NOT NULL,
38
+ lastUpdatedAt TEXT NOT NULL,
39
+ heartbeatMd TEXT DEFAULT NULL,
40
+ provider TEXT,
41
+ credentialMissing TEXT
42
+ );
43
+
44
+ -- 2. Copy existing data. Enumerate columns explicitly so the new
45
+ -- `credentialMissing` slot picks up its column default (NULL) instead of
46
+ -- being filled by a positional shift if the source order ever drifts.
47
+ INSERT INTO agents_new (
48
+ id, name, isLead, status, description, role, capabilities, maxTasks,
49
+ emptyPollCount, claudeMd, soulMd, identityMd, setupScript, toolsMd,
50
+ lastActivityAt, createdAt, lastUpdatedAt, heartbeatMd, provider
51
+ )
52
+ SELECT
53
+ id, name, isLead, status, description, role, capabilities, maxTasks,
54
+ emptyPollCount, claudeMd, soulMd, identityMd, setupScript, toolsMd,
55
+ lastActivityAt, createdAt, lastUpdatedAt, heartbeatMd, provider
56
+ FROM agents;
57
+
58
+ -- 3. Drop old table + rename. Foreign keys referencing `agents.id`
59
+ -- survive the rename (SQLite resolves them by table name lookup).
60
+ DROP TABLE agents;
61
+ ALTER TABLE agents_new RENAME TO agents;