npm - @desplega.ai/agent-swarm - Versions diffs - 1.74.4 → 1.76.0 - Mend

@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/README.md +1 -1
package/openapi.json +1264 -46
package/package.json +2 -2
package/src/be/db.ts +563 -9
package/src/be/memory/edges-store.ts +69 -0
package/src/be/memory/providers/sqlite-store.ts +4 -0
package/src/be/memory/raters/explicit-self.ts +22 -0
package/src/be/memory/raters/implicit-citation.ts +44 -0
package/src/be/memory/raters/llm-client.ts +172 -0
package/src/be/memory/raters/llm-summarizer.ts +218 -0
package/src/be/memory/raters/llm.ts +375 -0
package/src/be/memory/raters/noop.ts +14 -0
package/src/be/memory/raters/registry.ts +86 -0
package/src/be/memory/raters/retrieval.ts +88 -0
package/src/be/memory/raters/run-server-raters.ts +97 -0
package/src/be/memory/raters/store.ts +228 -0
package/src/be/memory/raters/types.ts +101 -0
package/src/be/memory/reranker.ts +32 -2
package/src/be/memory/retrieval-store.ts +116 -0
package/src/be/memory/types.ts +3 -0
package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
package/src/be/migrations/052_memory_edges.sql +36 -0
package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
package/src/be/migrations/054_agent_harness_provider.sql +21 -0
package/src/be/migrations/055_agent_cred_status.sql +15 -0
package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
package/src/be/migrations/057_inbox_item_state.sql +27 -0
package/src/be/migrations/058_task_templates.sql +31 -0
package/src/be/swarm-config-guard.ts +24 -0
package/src/commands/credential-wait.ts +186 -0
package/src/commands/provider-credentials.ts +434 -0
package/src/commands/runner.ts +253 -21
package/src/hooks/hook.ts +143 -66
package/src/http/agents.ts +191 -1
package/src/http/config.ts +11 -1
package/src/http/core.ts +5 -0
package/src/http/inbox-state.ts +89 -0
package/src/http/index.ts +10 -0
package/src/http/memory.ts +230 -1
package/src/http/sessions.ts +86 -0
package/src/http/status.ts +665 -0
package/src/http/task-templates.ts +51 -0
package/src/http/tasks.ts +85 -5
package/src/http/users.ts +134 -0
package/src/prompts/memories.ts +62 -0
package/src/providers/claude-adapter.ts +22 -0
package/src/providers/claude-managed-adapter.ts +24 -0
package/src/providers/codex-adapter.ts +43 -1
package/src/providers/devin-adapter.ts +18 -0
package/src/providers/index.ts +7 -0
package/src/providers/opencode-adapter.ts +60 -0
package/src/providers/pi-mono-adapter.ts +71 -0
package/src/providers/types.ts +34 -0
package/src/server.ts +2 -0
package/src/slack/handlers.ts +0 -1
package/src/tests/agents-harness-provider.test.ts +333 -0
package/src/tests/credential-check.test.ts +367 -0
package/src/tests/credential-status-api.test.ts +223 -0
package/src/tests/credential-status-routing.test.ts +150 -0
package/src/tests/credential-wait.test.ts +282 -0
package/src/tests/harness-provider-resolution.test.ts +242 -0
package/src/tests/jira-sync.test.ts +1 -1
package/src/tests/memory-edges.test.ts +722 -0
package/src/tests/memory-rate-endpoint.test.ts +330 -0
package/src/tests/memory-rate-tool.test.ts +252 -0
package/src/tests/memory-rater-e2e.test.ts +578 -0
package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
package/src/tests/memory-rater-llm.test.ts +964 -0
package/src/tests/memory-rater-store.test.ts +249 -0
package/src/tests/memory-reranker.test.ts +161 -2
package/src/tests/migration-runner-regressions.test.ts +17 -2
package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
package/src/tests/run-server-raters.test.ts +291 -0
package/src/tests/sessions.test.ts +141 -0
package/src/tests/status.test.ts +843 -0
package/src/tests/stop-hook-task-resolution.test.ts +98 -0
package/src/tests/template-recommendations.test.ts +148 -0
package/src/tests/tool-annotations.test.ts +2 -2
package/src/tests/use-dismissible-card.test.ts +140 -0
package/src/tools/memory-rate.ts +166 -0
package/src/tools/memory-search.ts +18 -0
package/src/tools/store-progress.ts +37 -0
package/src/tools/swarm-config/set-config.ts +17 -1
package/src/tools/tool-config.ts +1 -0
package/src/types.ts +122 -1
package/src/utils/harness-provider.ts +32 -0
package/tsconfig.json +0 -2

package/src/be/memory/raters/store.ts ADDED Viewed

@@ -0,0 +1,228 @@
+import { ensure } from "@desplega.ai/business-use";
+import { getDb } from "@/be/db";
+import { type RatingEvent, REFERENCES_SOURCE_MAX_LENGTH, sanitizeReferencesSource } from "./types";
+/**
+ * Single chokepoint for posterior updates and audit-log writes.
+ *
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §3
+ *
+ * For every event in `events`:
+ *   - alphaDelta = max(0,  signal) * weight   (rewards usefulness)
+ *   - betaDelta  = max(0, -signal) * weight   (rewards anti-usefulness)
+ *   - UPDATE agent_memory SET alpha = alpha + ?, beta = beta + ? WHERE id = ?
+ *   - INSERT INTO memory_rating (...) VALUES (...)
+ *   - When `referencesSource` is present (step-6 §3): UPSERT into
+ *     agent_memory_edge with the SAME (alphaDelta, betaDelta) so the edge's
+ *     own posterior tracks evidence the same way the memory's does.
+ *
+ * The whole batch runs in a single transaction so partial failure rolls back
+ * (commutativity of the Beta update means no idempotency check is needed —
+ * duplicate batches just shift the posterior further; the partial unique index
+ * on `(taskId, memoryId) WHERE source='explicit-self'` is the spam guard).
+ *
+ * Rejection semantics — events that fail validation are RETURNED in `rejected`,
+ * not thrown. This lets HTTP/MCP layers surface partial success cleanly.
+ */
+export type ApplyRatingResult = {
+  applied: number;
+  rejected: { event: RatingEvent; reason: string }[];
+};
+export type ApplyRatingContext = {
+  taskId?: string;
+};
+export class ExplicitSelfDuplicateError extends Error {
+  constructor(
+    message: string,
+    public readonly event: RatingEvent,
+  ) {
+    super(message);
+    this.name = "ExplicitSelfDuplicateError";
+  }
+}
+export function applyRating(
+  events: RatingEvent[],
+  ctx: ApplyRatingContext = {},
+): ApplyRatingResult {
+  if (events.length === 0) {
+    return { applied: 0, rejected: [] };
+  }
+  const db = getDb();
+  const accepted: { event: RatingEvent; sanitizedReferencesSource: string | null }[] = [];
+  const rejected: ApplyRatingResult["rejected"] = [];
+  for (const event of events) {
+    const reason = validate(event);
+    if (reason) {
+      rejected.push({ event, reason });
+      continue;
+    }
+    let sanitizedReferencesSource: string | null = null;
+    if (event.referencesSource !== undefined) {
+      if (event.referencesSource.length === 0) {
+        rejected.push({ event, reason: "referencesSource must be non-empty" });
+        continue;
+      }
+      if (event.referencesSource.length > REFERENCES_SOURCE_MAX_LENGTH) {
+        rejected.push({
+          event,
+          reason: `referencesSource exceeds ${REFERENCES_SOURCE_MAX_LENGTH} chars`,
+        });
+        continue;
+      }
+      sanitizedReferencesSource = sanitizeReferencesSource(event.referencesSource);
+      if (sanitizedReferencesSource === null) {
+        rejected.push({
+          event,
+          reason: "referencesSource contains a NUL byte or strips to empty",
+        });
+        continue;
+      }
+    }
+    accepted.push({ event, sanitizedReferencesSource });
+  }
+  if (accepted.length === 0) {
+    return { applied: 0, rejected };
+  }
+  // One transaction for the whole batch. SQLite WAL handles concurrent
+  // writers — Beta updates are commutative, so racing applies converge.
+  const updateMemory = db.prepare(
+    "UPDATE agent_memory SET alpha = alpha + ?, beta = beta + ? WHERE id = ?",
+  );
+  const checkExists = db.prepare<{ id: string }, [string]>(
+    "SELECT id FROM agent_memory WHERE id = ?",
+  );
+  const insertRating = db.prepare(
+    `INSERT INTO memory_rating
+       (id, memoryId, taskId, source, signal, weight, reasoning, createdAt)
+     VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+  );
+  // Step-6 §3 — UPSERT the edge with the SAME deltas as the memory row.
+  // The `- 1.0` corrections in DO UPDATE undo the default-prior offset that
+  // the INSERT arm baked into excluded.alpha/excluded.beta. Net effect: on
+  // insert, alpha/beta start at `1 + delta`; on update, the existing
+  // (alpha, beta) simply gain (delta_alpha, delta_beta).
+  const upsertEdge = db.prepare(
+    `INSERT INTO agent_memory_edge (from_id, to_id, type, alpha, beta, createdAt)
+     VALUES (?, ?, 'references-source', ?, ?, ?)
+     ON CONFLICT(from_id, to_id, type) DO UPDATE SET
+       alpha = alpha + excluded.alpha - 1.0,
+       beta  = beta  + excluded.beta  - 1.0`,
+  );
+  type AppliedEntry = { event: RatingEvent; sanitizedReferencesSource: string | null };
+  const applyTx = db.transaction(() => {
+    let applied = 0;
+    const lateRejects: ApplyRatingResult["rejected"] = [];
+    const appliedEvents: AppliedEntry[] = [];
+    for (const { event, sanitizedReferencesSource } of accepted) {
+      const exists = checkExists.get(event.memoryId);
+      if (!exists) {
+        lateRejects.push({ event, reason: "memoryId not found in agent_memory" });
+        continue;
+      }
+      const alphaDelta = Math.max(0, event.signal) * event.weight;
+      const betaDelta = Math.max(0, -event.signal) * event.weight;
+      updateMemory.run(alphaDelta, betaDelta, event.memoryId);
+      try {
+        insertRating.run(
+          crypto.randomUUID(),
+          event.memoryId,
+          ctx.taskId ?? null,
+          event.source,
+          event.signal,
+          event.weight,
+          event.reasoning ?? null,
+          new Date().toISOString(),
+        );
+      } catch (err) {
+        // Partial unique index on (taskId, memoryId) WHERE source='explicit-self'
+        // is the only constraint that can fire here.
+        if (isUniqueConstraintError(err)) {
+          throw new ExplicitSelfDuplicateError(
+            `duplicate explicit-self rating for memoryId=${event.memoryId} taskId=${ctx.taskId}`,
+            event,
+          );
+        }
+        throw err;
+      }
+      if (sanitizedReferencesSource !== null) {
+        upsertEdge.run(
+          event.memoryId,
+          sanitizedReferencesSource,
+          1.0 + alphaDelta,
+          1.0 + betaDelta,
+          new Date().toISOString(),
+        );
+      }
+      appliedEvents.push({ event, sanitizedReferencesSource });
+      applied += 1;
+    }
+    return { applied, lateRejects, appliedEvents };
+  });
+  const { applied, lateRejects, appliedEvents } = applyTx();
+  // Business-use instrumentation — emit ONE `memory_rated` event in the `task`
+  // flow per applied rating. Placed OUTSIDE the transaction (per CLAUDE.md BU
+  // block), validator self-contained (references only `data`). Skipped when
+  // `ctx.taskId` is absent because the `task` flow is keyed on taskId.
+  if (ctx.taskId && appliedEvents.length > 0) {
+    for (const { event, sanitizedReferencesSource } of appliedEvents) {
+      ensure({
+        id: "memory_rated",
+        flow: "task",
+        runId: ctx.taskId,
+        data: {
+          memoryId: event.memoryId,
+          source: event.source,
+          signal: event.signal,
+          weight: event.weight,
+          hasReferencesSource: sanitizedReferencesSource !== null,
+        },
+        validator: (data) =>
+          typeof data.memoryId === "string" &&
+          data.memoryId.length > 0 &&
+          typeof data.source === "string" &&
+          data.source.length > 0 &&
+          typeof data.signal === "number" &&
+          data.signal >= -1 &&
+          data.signal <= 1 &&
+          typeof data.weight === "number" &&
+          data.weight >= 0 &&
+          data.weight <= 1,
+      });
+    }
+  }
+  return { applied, rejected: [...rejected, ...lateRejects] };
+}
+function validate(event: RatingEvent): string | null {
+  if (!event.source || event.source.trim() === "") {
+    return "source is required";
+  }
+  if (!Number.isFinite(event.signal) || event.signal < -1 || event.signal > 1) {
+    return "signal must be in [-1, +1]";
+  }
+  if (!Number.isFinite(event.weight) || event.weight < 0 || event.weight > 1) {
+    return "weight must be in [0, 1]";
+  }
+  if (!event.memoryId) {
+    return "memoryId is required";
+  }
+  return null;
+}
+function isUniqueConstraintError(err: unknown): boolean {
+  if (!(err instanceof Error)) return false;
+  // bun:sqlite surfaces SQLITE_CONSTRAINT_UNIQUE in the message.
+  return /UNIQUE constraint failed|SQLITE_CONSTRAINT/i.test(err.message);
+}

package/src/be/memory/raters/types.ts ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * Memory rater interface — pluggable signal source for the Beta-Binomial
+ * usefulness posteriors on agent_memory rows.
+ *
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §2
+ *
+ * Each rater returns RatingEvent[] from `rate(ctx)`. The framework
+ * (`applyRating` in ./store.ts) is the single chokepoint that:
+ *   - validates signal ∈ [-1, +1] and weight ∈ [0, 1],
+ *   - stamps `source = rater.name` (raters MUST NOT populate this — defence
+ *     against rater spoofing),
+ *   - applies the Beta posterior update atomically, and
+ *   - writes the audit row to `memory_rating`.
+ */
+export interface MemoryRater {
+  readonly name: string;
+  rate(ctx: RatingContext): Promise<RatingEvent[]>;
+}
+export type RatingEvent = {
+  memoryId: string;
+  /** Raw signal in [-1, +1]. Positive = useful, negative = misleading. */
+  signal: number;
+  /** Confidence in [0, 1]. Clipped delta = max(0, ±signal) * weight. */
+  weight: number;
+  /**
+   * Rater identity — populated by the framework, NOT by the rater itself.
+   * Raters that write a non-empty `source` are rejected by `applyRating`.
+   */
+  source: string;
+  /** Optional human-readable reason. Surfaced by LlmRater + ExplicitSelfRater. */
+  reasoning?: string;
+  /**
+   * Optional free-form external source identifier (v1.5 wedge — step-6).
+   *
+   * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §1-§2
+   *
+   * When present, `applyRating` UPSERTs into `agent_memory_edge` with
+   * `type='references-source'`, applying the same Beta posterior delta as
+   * the memory row's `(alpha, beta)`. Convention `<source>:<identifier>`
+   * (e.g. `github:owner/repo#N`, `linear:KEY-N`, `customer:<slug>`) is
+   * documentation-only — server does NOT validate prefixes. Validation is
+   * write-site only: non-empty, ≤512 chars, control-char strip, no NUL.
+   */
+  referencesSource?: string;
+};
+/**
+ * Maximum byte length for `referencesSource` strings (Q2 contract). Encoded
+ * here once so the HTTP Zod schema, the MCP tool input schema, the LlmRater
+ * Zod schema, and `sanitizeReferencesSource` can't drift.
+ */
+export const REFERENCES_SOURCE_MAX_LENGTH = 512;
+const NUL_CHAR_CODE = 0x00;
+const DEL_CHAR_CODE = 0x7f;
+const FIRST_PRINTABLE_ASCII = 0x20;
+/**
+ * Strip control characters from a `referencesSource` string and reject NUL
+ * bytes outright (Q2 free-form contract — step-6.md §2).
+ *
+ * - Returns the cleaned string when valid.
+ * - Returns `null` when the input contains a NUL byte (charCode 0x00) or
+ *   when stripping control chars produces an empty string. Callers treat
+ *   `null` as a validation failure (Zod transform → `z.NEVER`, applyRating
+ *   → reject).
+ *
+ * Length is checked OUTSIDE this helper (Zod `.max(512)` runs first); the
+ * helper itself does not enforce a max so callers can apply different
+ * policies.
+ */
+export function sanitizeReferencesSource(input: string): string | null {
+  let stripped = "";
+  for (let i = 0; i < input.length; i++) {
+    const code = input.charCodeAt(i);
+    if (code === NUL_CHAR_CODE) return null;
+    if (code < FIRST_PRINTABLE_ASCII || code === DEL_CHAR_CODE) {
+      // Non-NUL C0 / DEL — silently stripped.
+      continue;
+    }
+    stripped += input[i];
+  }
+  if (stripped.length === 0) return null;
+  return stripped;
+}
+export type RatingContext = {
+  taskId?: string;
+  agentId: string;
+  sessionId?: string;
+  /** Memories that were retrieved during this task; raters score subsets of these. */
+  retrievedMemoryIds: string[];
+  /**
+   * Server-side raters get session_logs content here; worker-side raters get
+   * the LLM summary text or the explicit user input. Null when no evidence is
+   * available (e.g. NoopRater).
+   */
+  evidence: string | null;
+};

package/src/be/memory/reranker.ts CHANGED Viewed

@@ -32,13 +32,43 @@ export function accessBoost(accessedAt: string, accessCount: number, now: Date):
 }
 /**
- * Final score combining similarity, recency decay, and access boost.
+ * Beta-Binomial usefulness factor for reranking.
+ *
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md §5
+ *
+ * At Beta(1,1) (default prior) returns 1.0 exactly — strict no-op vs.
+ * pre-rater behaviour. Proven memories climb up to 2.0. Floored at the value
+ * of MEMORY_DEMOTION_FLOOR (default 1.0 = no demotion) — the default preserves
+ * brainstorm intent (memories are demoted toward the floor but never deleted
+ * on the reranker path) and is configurable per deployment.
+ */
+function readDemotionFloor(): number {
+  const raw = process.env.MEMORY_DEMOTION_FLOOR;
+  const n = raw == null || raw === "" ? 1.0 : Number(raw);
+  return Number.isFinite(n) ? n : 1.0;
+}
+export function usefulness(alpha: number, beta: number): number {
+  const denom = alpha + beta;
+  if (denom <= 0) return 1.0;
+  const mean = alpha / denom;
+  return Math.max(readDemotionFloor(), Math.min(2.0, 2 * mean));
+}
+/**
+ * Final score combining similarity, recency decay, access boost, and
+ * Beta-Binomial usefulness. With default Beta(1,1) and default
+ * MEMORY_DEMOTION_FLOOR=1.0, the usefulness factor is exactly 1.0 and this
+ * computation matches the pre-rater behaviour byte-for-byte.
+ *
+ * v2: optional edge-aware boost — see thoughts/taras/plans/2026-05-05-memory-rater-v1.5/root.md
  */
 export function computeScore(candidate: MemoryCandidate, now: Date): number {
   return (
     candidate.similarity *
     recencyDecay(candidate.createdAt, now) *
-    accessBoost(candidate.accessedAt, candidate.accessCount, now)
+    accessBoost(candidate.accessedAt, candidate.accessCount, now) *
+    usefulness(candidate.alpha, candidate.beta)
   );
 }

package/src/be/memory/retrieval-store.ts ADDED Viewed

@@ -0,0 +1,116 @@
+/**
+ * Read-side query helpers for the `memory_retrieval` audit log.
+ *
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-3.md
+ *
+ * Step-2 owns the write path (`searchMemory` populates `memory_retrieval`
+ * when `X-Source-Task-ID` is set). Step-3 surfaces these reads to:
+ *   - GET /api/memory/retrievals — worker raters list memories surfaced for
+ *     a task/session so they can score them.
+ *   - POST /api/memory/rate — R6 spam-guard checks that an `explicit-self`
+ *     rating targets a memory that was actually retrieved for the task.
+ *
+ * Server-side only. Route handlers should call these functions instead of
+ * preparing SQL directly so the query is reusable and typed in one place.
+ */
+import { getDb } from "@/be/db";
+/** Max chars of `agent_memory.content` returned in retrieval listings. */
+const RETRIEVAL_CONTENT_SNIPPET_CHARS = 500;
+/** Max retrievals returned per request — matches the typical session set. */
+const RETRIEVAL_LIST_LIMIT = 50;
+export type RetrievalListRow = {
+  /** `agent_memory.id` — the memory that was retrieved. */
+  id: string;
+  name: string;
+  /** Up to RETRIEVAL_CONTENT_SNIPPET_CHARS chars of `agent_memory.content`. */
+  content: string;
+  scope: string;
+  /**
+   * `agent_memory.source` — `'task_completion' | 'session_summary' | 'manual'
+   * | 'file_index'`. Surfaced so the worker rater can scope dedup to the
+   * memory class that exhibits scheduled-task self-similarity.
+   */
+  source: string;
+  /**
+   * `agent_tasks.scheduleId` for the source task that wrote this memory, or
+   * `null` if the memory has no source task or the task wasn't a scheduled
+   * run. Worker raters use this as a precise cron-clone discriminator —
+   * memories sharing a non-null `scheduleId` are by definition from the same
+   * scheduled job and safe to dedupe.
+   */
+  scheduleId: string | null;
+  similarity: number | null;
+  retrievedAt: string;
+};
+export type RetrievalListFilter = {
+  taskId?: string;
+  sessionId?: string;
+};
+/**
+ * List memories retrieved for a given (taskId | sessionId), filtered by the
+ * requesting agent for defence-in-depth (the JOIN on `mr.agentId` keeps
+ * cross-agent rows out even though the worker is trusted).
+ *
+ * Returns at most {@link RETRIEVAL_LIST_LIMIT} rows, newest-first by
+ * `retrievedAt`. Caller MUST pass at least one of `taskId` / `sessionId`;
+ * the route's Zod schema enforces this — this function does not re-validate.
+ */
+export function getRetrievalsForAgent(
+  agentId: string,
+  filter: RetrievalListFilter,
+): RetrievalListRow[] {
+  const conditions: string[] = ["mr.agentId = ?"];
+  const params: (string | number)[] = [agentId];
+  if (filter.taskId) {
+    conditions.push("mr.taskId = ?");
+    params.push(filter.taskId);
+  }
+  if (filter.sessionId) {
+    conditions.push("mr.sessionId = ?");
+    params.push(filter.sessionId);
+  }
+  // LEFT JOIN agent_tasks so we can surface `scheduleId` to worker raters —
+  // a non-null `scheduleId` is the precise cron-clone discriminator that
+  // `dedupeRetrievalsForRater` keys on. The LEFT keeps memories with no
+  // source task (manual / file_index) in the result set.
+  const sql = `
+    SELECT am.id        AS id,
+           am.name      AS name,
+           substr(am.content, 1, ?) AS content,
+           am.scope     AS scope,
+           am.source    AS source,
+           at.scheduleId AS scheduleId,
+           mr.similarity AS similarity,
+           mr.retrievedAt AS retrievedAt
+      FROM memory_retrieval mr
+      INNER JOIN agent_memory am ON am.id = mr.memoryId
+      LEFT JOIN  agent_tasks at  ON at.id = am.sourceTaskId
+     WHERE ${conditions.join(" AND ")}
+     ORDER BY mr.retrievedAt DESC
+     LIMIT ?
+  `;
+  return getDb()
+    .prepare<RetrievalListRow, (string | number)[]>(sql)
+    .all(RETRIEVAL_CONTENT_SNIPPET_CHARS, ...params, RETRIEVAL_LIST_LIMIT);
+}
+/**
+ * R6 spam-guard read: was the given `(taskId, memoryId)` actually surfaced
+ * to the agent during the task? Used by the rate endpoint to reject
+ * `explicit-self` ratings for memories the agent never saw.
+ */
+export function hasRetrievalForTask(taskId: string, memoryId: string): boolean {
+  const row = getDb()
+    .prepare<{ id: string }, [string, string]>(
+      "SELECT id FROM memory_retrieval WHERE taskId = ? AND memoryId = ? LIMIT 1",
+    )
+    .get(taskId, memoryId);
+  return row != null;
+}

package/src/be/memory/types.ts CHANGED Viewed

@@ -52,6 +52,9 @@ export interface MemoryCandidate extends AgentMemory {
   accessCount: number;
   expiresAt: string | null;
   embeddingModel: string | null;
+  /** Beta-Binomial usefulness posterior. Default Beta(1,1) → reranker no-op. */
+  alpha: number;
+  beta: number;
 }
 export interface MemorySearchOptions {

package/src/be/migrations/051_memory_posteriors_and_retrieval.sql ADDED Viewed

@@ -0,0 +1,67 @@
+-- 051_memory_posteriors_and_retrieval.sql
+-- Memory rater v1.5 — brainstorm spine.
+--
+-- Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-1.md
+-- (The plan referred to this as `049_*`; numbers 049 and 050 were taken by the
+-- wait-node feature on main between plan-write and plan-implement, so this
+-- migration ships at 051 — semantics are unchanged.)
+--
+-- Adds:
+--   * agent_memory.alpha / beta  — Beta-Binomial usefulness posteriors
+--                                  (Beta(1,1) prior → reranker no-op until
+--                                  raters move them).
+--   * memory_retrieval           — audit log of which memories were surfaced
+--                                  to which task (used by ImplicitCitationRater
+--                                  + worker rating endpoints in steps 2/3).
+--   * memory_rating              — append-only audit of every RatingEvent the
+--                                  framework applied. Hot-path posteriors live
+--                                  on agent_memory; this table preserves the
+--                                  signal/weight/source for offline analysis.
+--
+-- Spam guard (R6): partial unique index on (taskId, memoryId) WHERE source =
+-- 'explicit-self'. Enforces "at most one explicit-self rating per (task, memory)"
+-- at the DB layer; HTTP/MCP can surface SQLITE_CONSTRAINT as 409.
+--
+-- FK target is agent_tasks(id) — the brainstorm referenced `tasks(id)` but the
+-- actual table name is agent_tasks (see Deviation A in step-1.md).
+-- 1. Beta posteriors on every memory row (default Beta(1,1) → usefulness 1.0).
+ALTER TABLE agent_memory ADD COLUMN alpha REAL NOT NULL DEFAULT 1.0;
+ALTER TABLE agent_memory ADD COLUMN beta  REAL NOT NULL DEFAULT 1.0;
+-- 2. Retrieval audit — populated by /api/memory/search when X-Source-Task-ID
+--    is present (wired in step-2). Created here so step-2 can land in parallel.
+CREATE TABLE IF NOT EXISTS memory_retrieval (
+  id          TEXT PRIMARY KEY,
+  taskId      TEXT,
+  agentId     TEXT NOT NULL,
+  sessionId   TEXT,
+  memoryId    TEXT NOT NULL,
+  similarity  REAL,
+  retrievedAt TEXT NOT NULL,
+  FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_memret_task   ON memory_retrieval(taskId);
+CREATE INDEX IF NOT EXISTS idx_memret_agent  ON memory_retrieval(agentId);
+CREATE INDEX IF NOT EXISTS idx_memret_memory ON memory_retrieval(memoryId);
+-- 3. Rating audit — every applied RatingEvent. `source` is the rater name,
+--    set by applyRating (raters MUST NOT populate it themselves).
+CREATE TABLE IF NOT EXISTS memory_rating (
+  id        TEXT PRIMARY KEY,
+  memoryId  TEXT NOT NULL,
+  taskId    TEXT,
+  source    TEXT NOT NULL,
+  signal    REAL NOT NULL,
+  weight    REAL NOT NULL,
+  reasoning TEXT,
+  createdAt TEXT NOT NULL,
+  FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_memrat_memory ON memory_rating(memoryId);
+CREATE INDEX IF NOT EXISTS idx_memrat_task   ON memory_rating(taskId);
+-- DB-owned spam guard (R6): one explicit-self per (taskId, memoryId).
+CREATE UNIQUE INDEX IF NOT EXISTS idx_memory_rating_explicit_unique
+  ON memory_rating(taskId, memoryId)
+  WHERE source = 'explicit-self';

package/src/be/migrations/052_memory_edges.sql ADDED Viewed

@@ -0,0 +1,36 @@
+-- 052_memory_edges.sql
+-- Memory rater v1.5 step-6 — `references-source` edges, lite (v1.5 wedge).
+--
+-- Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §1
+-- (The plan referred to this as `050_*`; numbers 049–051 were taken on main
+-- between plan-write and step-6 implement, so this migration ships at 052 —
+-- semantics are unchanged.)
+--
+-- Adds:
+--   * agent_memory_edge — directed edges from a memory to an external entity,
+--                         with their own Beta-Binomial usefulness posteriors.
+--                         v1.5 ships exactly one edge type — references-source.
+--
+-- Q2 LOCKED (per step-6.md §1) — `to_id` is a free-form TEXT column. No closed
+-- enum, no parser, no migration when a new integration shows up. Convention is
+-- documented as `<source>:<identifier>` (e.g. github:owner/repo#N,
+-- linear:KEY-N, customer:<slug>) but enforced only at write-site (≤512 chars,
+-- control-char strip, no NUL).
+--
+-- The `CHECK (type = 'references-source')` constraint is intentionally
+-- restrictive — lifting it = a forward migration that drops + recreates the
+-- constraint with the v2 enum. Edge GC + multi-type edges are reserved for v2.
+CREATE TABLE IF NOT EXISTS agent_memory_edge (
+  from_id   TEXT NOT NULL,                                                -- memory id
+  to_id     TEXT NOT NULL,                                                -- free-form external entity id (Q2 contract)
+  type      TEXT NOT NULL CHECK (type = 'references-source'),             -- v1.5: ONE type only
+  alpha     REAL NOT NULL DEFAULT 1.0,
+  beta      REAL NOT NULL DEFAULT 1.0,
+  createdAt TEXT NOT NULL,
+  PRIMARY KEY (from_id, to_id, type),
+  FOREIGN KEY (from_id) REFERENCES agent_memory(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_memedge_from ON agent_memory_edge(from_id);
+CREATE INDEX IF NOT EXISTS idx_memedge_to   ON agent_memory_edge(to_id);
+CREATE INDEX IF NOT EXISTS idx_memedge_type ON agent_memory_edge(type);

package/src/be/migrations/053_agent_waiting_for_credentials_status.sql ADDED Viewed

@@ -0,0 +1,61 @@
+-- 053_agent_waiting_for_credentials_status.sql
+--
+-- Phase 3 of the worker credential safe-loop plan
+-- (thoughts/taras/plans/2026-05-06-worker-credential-safe-loop.md).
+--
+-- Extend the `agents.status` enum with `waiting_for_credentials` and add a
+-- `credentialMissing` JSON column that carries the list of env-var names
+-- the worker is blocked on. We extend the existing status axis rather than
+-- adding a parallel column because:
+--   - All four states live on the same "is this agent reachable AND
+--     willing to claim work?" axis.
+--   - The dispatcher's capacity predicate already filters by
+--     `status === 'idle'`; the new value is implicitly excluded with no
+--     code change.
+--   - Avoids JOIN-or-AND-condition churn in every read site.
+--
+-- SQLite cannot ALTER a CHECK constraint in place, so we rebuild the table.
+-- 1. Create the new table with the expanded CHECK and the new column.
+CREATE TABLE agents_new (
+    id TEXT PRIMARY KEY,
+    name TEXT NOT NULL,
+    isLead INTEGER NOT NULL DEFAULT 0,
+    status TEXT NOT NULL
+      CHECK(status IN ('idle', 'busy', 'offline', 'waiting_for_credentials')),
+    description TEXT,
+    role TEXT,
+    capabilities TEXT DEFAULT '[]',
+    maxTasks INTEGER DEFAULT 1,
+    emptyPollCount INTEGER DEFAULT 0,
+    claudeMd TEXT,
+    soulMd TEXT,
+    identityMd TEXT,
+    setupScript TEXT,
+    toolsMd TEXT,
+    lastActivityAt TEXT,
+    createdAt TEXT NOT NULL,
+    lastUpdatedAt TEXT NOT NULL,
+    heartbeatMd TEXT DEFAULT NULL,
+    provider TEXT,
+    credentialMissing TEXT
+);
+-- 2. Copy existing data. Enumerate columns explicitly so the new
+--    `credentialMissing` slot picks up its column default (NULL) instead of
+--    being filled by a positional shift if the source order ever drifts.
+INSERT INTO agents_new (
+    id, name, isLead, status, description, role, capabilities, maxTasks,
+    emptyPollCount, claudeMd, soulMd, identityMd, setupScript, toolsMd,
+    lastActivityAt, createdAt, lastUpdatedAt, heartbeatMd, provider
+)
+SELECT
+    id, name, isLead, status, description, role, capabilities, maxTasks,
+    emptyPollCount, claudeMd, soulMd, identityMd, setupScript, toolsMd,
+    lastActivityAt, createdAt, lastUpdatedAt, heartbeatMd, provider
+FROM agents;
+-- 3. Drop old table + rename. Foreign keys referencing `agents.id`
+--    survive the rename (SQLite resolves them by table name lookup).
+DROP TABLE agents;
+ALTER TABLE agents_new RENAME TO agents;