@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +1264 -46
  3. package/package.json +2 -2
  4. package/src/be/db.ts +563 -9
  5. package/src/be/memory/edges-store.ts +69 -0
  6. package/src/be/memory/providers/sqlite-store.ts +4 -0
  7. package/src/be/memory/raters/explicit-self.ts +22 -0
  8. package/src/be/memory/raters/implicit-citation.ts +44 -0
  9. package/src/be/memory/raters/llm-client.ts +172 -0
  10. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  11. package/src/be/memory/raters/llm.ts +375 -0
  12. package/src/be/memory/raters/noop.ts +14 -0
  13. package/src/be/memory/raters/registry.ts +86 -0
  14. package/src/be/memory/raters/retrieval.ts +88 -0
  15. package/src/be/memory/raters/run-server-raters.ts +97 -0
  16. package/src/be/memory/raters/store.ts +228 -0
  17. package/src/be/memory/raters/types.ts +101 -0
  18. package/src/be/memory/reranker.ts +32 -2
  19. package/src/be/memory/retrieval-store.ts +116 -0
  20. package/src/be/memory/types.ts +3 -0
  21. package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
  22. package/src/be/migrations/052_memory_edges.sql +36 -0
  23. package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
  24. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  25. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  26. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  27. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  28. package/src/be/migrations/058_task_templates.sql +31 -0
  29. package/src/be/swarm-config-guard.ts +24 -0
  30. package/src/commands/credential-wait.ts +186 -0
  31. package/src/commands/provider-credentials.ts +434 -0
  32. package/src/commands/runner.ts +253 -21
  33. package/src/hooks/hook.ts +143 -66
  34. package/src/http/agents.ts +191 -1
  35. package/src/http/config.ts +11 -1
  36. package/src/http/core.ts +5 -0
  37. package/src/http/inbox-state.ts +89 -0
  38. package/src/http/index.ts +10 -0
  39. package/src/http/memory.ts +230 -1
  40. package/src/http/sessions.ts +86 -0
  41. package/src/http/status.ts +665 -0
  42. package/src/http/task-templates.ts +51 -0
  43. package/src/http/tasks.ts +85 -5
  44. package/src/http/users.ts +134 -0
  45. package/src/prompts/memories.ts +62 -0
  46. package/src/providers/claude-adapter.ts +22 -0
  47. package/src/providers/claude-managed-adapter.ts +24 -0
  48. package/src/providers/codex-adapter.ts +43 -1
  49. package/src/providers/devin-adapter.ts +18 -0
  50. package/src/providers/index.ts +7 -0
  51. package/src/providers/opencode-adapter.ts +60 -0
  52. package/src/providers/pi-mono-adapter.ts +71 -0
  53. package/src/providers/types.ts +34 -0
  54. package/src/server.ts +2 -0
  55. package/src/slack/handlers.ts +0 -1
  56. package/src/tests/agents-harness-provider.test.ts +333 -0
  57. package/src/tests/credential-check.test.ts +367 -0
  58. package/src/tests/credential-status-api.test.ts +223 -0
  59. package/src/tests/credential-status-routing.test.ts +150 -0
  60. package/src/tests/credential-wait.test.ts +282 -0
  61. package/src/tests/harness-provider-resolution.test.ts +242 -0
  62. package/src/tests/jira-sync.test.ts +1 -1
  63. package/src/tests/memory-edges.test.ts +722 -0
  64. package/src/tests/memory-rate-endpoint.test.ts +330 -0
  65. package/src/tests/memory-rate-tool.test.ts +252 -0
  66. package/src/tests/memory-rater-e2e.test.ts +578 -0
  67. package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
  68. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  69. package/src/tests/memory-rater-llm.test.ts +964 -0
  70. package/src/tests/memory-rater-store.test.ts +249 -0
  71. package/src/tests/memory-reranker.test.ts +161 -2
  72. package/src/tests/migration-runner-regressions.test.ts +17 -2
  73. package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
  74. package/src/tests/run-server-raters.test.ts +291 -0
  75. package/src/tests/sessions.test.ts +141 -0
  76. package/src/tests/status.test.ts +843 -0
  77. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  78. package/src/tests/template-recommendations.test.ts +148 -0
  79. package/src/tests/tool-annotations.test.ts +2 -2
  80. package/src/tests/use-dismissible-card.test.ts +140 -0
  81. package/src/tools/memory-rate.ts +166 -0
  82. package/src/tools/memory-search.ts +18 -0
  83. package/src/tools/store-progress.ts +37 -0
  84. package/src/tools/swarm-config/set-config.ts +17 -1
  85. package/src/tools/tool-config.ts +1 -0
  86. package/src/types.ts +122 -1
  87. package/src/utils/harness-provider.ts +32 -0
  88. package/tsconfig.json +0 -2
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Read-side query helpers for the `agent_memory_edge` table.
3
+ *
4
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §7
5
+ *
6
+ * The write path lives in `src/be/memory/raters/store.ts` (`applyRating`
7
+ * UPSERTs the edge atomically with the memory's posterior update). This
8
+ * module surfaces reads to the GET `/api/memory/edges` endpoint that powers
9
+ * the homepage demo ("this memory references PR #377").
10
+ *
11
+ * Server-side only.
12
+ */
13
+ import { getDb } from "@/be/db";
14
+
15
+ const USEFULNESS_FLOOR = 1.0;
16
+ const USEFULNESS_CEILING = 2.0;
17
+
18
+ export type MemoryEdgeRow = {
19
+ to: string;
20
+ type: "references-source";
21
+ alpha: number;
22
+ beta: number;
23
+ /** clamp(2 * α/(α+β), 1.0, 2.0) — same formula as the memory reranker. */
24
+ usefulness: number;
25
+ createdAt: string;
26
+ };
27
+
28
+ /**
29
+ * List edges for a memory, with defence-in-depth: the joined `agent_memory`
30
+ * row must either be swarm-scope or owned by the requesting agent. Returns
31
+ * `[]` when the memory does not exist or is not visible to the agent — same
32
+ * shape as a memory with no edges, since neither case has anything useful
33
+ * to surface to the caller.
34
+ */
35
+ export function listEdgesForAgent(agentId: string, memoryId: string): MemoryEdgeRow[] {
36
+ const db = getDb();
37
+ const memory = db
38
+ .prepare<{ scope: string; agentId: string | null }, [string]>(
39
+ "SELECT scope, agentId FROM agent_memory WHERE id = ?",
40
+ )
41
+ .get(memoryId);
42
+ if (!memory) return [];
43
+ if (memory.scope !== "swarm" && memory.agentId !== agentId) return [];
44
+
45
+ const rows = db
46
+ .prepare<{ to_id: string; alpha: number; beta: number; createdAt: string }, [string]>(
47
+ `SELECT to_id, alpha, beta, createdAt
48
+ FROM agent_memory_edge
49
+ WHERE from_id = ? AND type = 'references-source'
50
+ ORDER BY createdAt DESC`,
51
+ )
52
+ .all(memoryId);
53
+
54
+ return rows.map((row) => ({
55
+ to: row.to_id,
56
+ type: "references-source" as const,
57
+ alpha: row.alpha,
58
+ beta: row.beta,
59
+ usefulness: clampUsefulness(row.alpha, row.beta),
60
+ createdAt: row.createdAt,
61
+ }));
62
+ }
63
+
64
+ function clampUsefulness(alpha: number, beta: number): number {
65
+ const denom = alpha + beta;
66
+ if (denom <= 0) return USEFULNESS_FLOOR;
67
+ const mean = alpha / denom;
68
+ return Math.max(USEFULNESS_FLOOR, Math.min(USEFULNESS_CEILING, 2 * mean));
69
+ }
@@ -30,6 +30,8 @@ type AgentMemoryRow = {
30
30
  expiresAt: string | null;
31
31
  accessCount: number;
32
32
  embeddingModel: string | null;
33
+ alpha: number;
34
+ beta: number;
33
35
  };
34
36
 
35
37
  function rowToAgentMemory(row: AgentMemoryRow): AgentMemory {
@@ -61,6 +63,8 @@ function rowToCandidate(row: AgentMemoryRow, similarity: number): MemoryCandidat
61
63
  accessCount: row.accessCount ?? 0,
62
64
  expiresAt: row.expiresAt ?? null,
63
65
  embeddingModel: row.embeddingModel ?? null,
66
+ alpha: row.alpha ?? 1.0,
67
+ beta: row.beta ?? 1.0,
64
68
  };
65
69
  }
66
70
 
@@ -0,0 +1,22 @@
1
+ import type { MemoryRater, RatingEvent } from "./types";
2
+
3
+ /**
4
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-5.md §3
5
+ *
6
+ * Explicit-self rater — registry sentinel only. Never auto-fires from
7
+ * `applyRating`. Its `RatingEvent`s arrive exclusively through the worker-side
8
+ * `memory_rate` MCP tool, which POSTs to `/api/memory/rate` with
9
+ * `source: "explicit-self"`.
10
+ *
11
+ * The class exists so `MEMORY_RATERS=explicit-self` can register the name —
12
+ * which (per step-5.md §5) unlocks the conditional system-prompt hint that
13
+ * teaches the agent to call `memory_rate`. Stays out of `SERVER_RATERS` so
14
+ * the store-progress hook never invokes it.
15
+ */
16
+ export class ExplicitSelfRatingRater implements MemoryRater {
17
+ readonly name = "explicit-self";
18
+
19
+ async rate(): Promise<RatingEvent[]> {
20
+ return [];
21
+ }
22
+ }
@@ -0,0 +1,44 @@
1
+ import type { MemoryRater, RatingContext, RatingEvent } from "./types";
2
+
3
+ /**
4
+ * Implicit-citation rater — pure ID-grep over `evidence`.
5
+ *
6
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-2.md §4
7
+ *
8
+ * For each `memoryId` in `ctx.retrievedMemoryIds`:
9
+ * - if `ctx.evidence` contains the literal `memoryId` → +1 weight=0.5
10
+ * (positive citation; the agent referenced the memory's id somewhere
11
+ * in the task's `session_logs`).
12
+ * - else → -1 weight=0.25 (miss; we surfaced this memory but the agent
13
+ * did not cite it. Negative signal carries less confidence per
14
+ * IR convention from research §3.A and brainstorm Q4).
15
+ *
16
+ * The framework (`applyRating` in ./store.ts) sets `event.source` from the
17
+ * rater's `name`. This rater MUST NOT populate `source` itself — `applyRating`
18
+ * rejects rater-set sources to defend against rater spoofing.
19
+ *
20
+ * Match semantics: literal substring match using `String.prototype.includes`.
21
+ * If two memory IDs share a prefix (e.g. `mem-A` is a prefix of `mem-AB`),
22
+ * citing `mem-AB` will count as a hit for both. UUIDs (the production case)
23
+ * never collide so this is benign; the unit tests lock the behaviour in.
24
+ *
25
+ * Pure / deterministic / no DB I/O.
26
+ */
27
+ export class ImplicitCitationRater implements MemoryRater {
28
+ readonly name = "implicit-citation";
29
+
30
+ async rate(ctx: RatingContext): Promise<RatingEvent[]> {
31
+ if (ctx.retrievedMemoryIds.length === 0) return [];
32
+ const evidence = ctx.evidence ?? "";
33
+
34
+ const events: RatingEvent[] = [];
35
+ for (const memoryId of ctx.retrievedMemoryIds) {
36
+ if (evidence.length > 0 && evidence.includes(memoryId)) {
37
+ events.push({ memoryId, signal: 1, weight: 0.5, source: "" });
38
+ } else {
39
+ events.push({ memoryId, signal: -1, weight: 0.25, source: "" });
40
+ }
41
+ }
42
+ return events;
43
+ }
44
+ }
@@ -0,0 +1,172 @@
1
+ /**
2
+ * `LlmRaterClient` — pluggable LLM driver used by `LlmRater` to score the
3
+ * usefulness of a single retrieved memory against a (query, response) pair.
4
+ *
5
+ * Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §1
6
+ *
7
+ * This module is imported from worker-side `src/hooks/hook.ts` (the session-
8
+ * summary piggyback path), so it MUST NOT touch `bun:sqlite` or `src/be/db`.
9
+ * The DB-boundary check in `scripts/check-db-boundary.sh` enforces this.
10
+ *
11
+ * Default implementation shells out to the same `claude -p` CLI the hook
12
+ * already uses for session summarization — zero new SDK dependencies.
13
+ */
14
+
15
+ export type LlmRaterInput = {
16
+ /** What the agent asked the memory system for. */
17
+ query: string;
18
+ /** The memory we're scoring. */
19
+ memory: {
20
+ id: string;
21
+ name: string;
22
+ content: string;
23
+ };
24
+ /** The agent's eventual response (or session summary) — the "did this help?" signal. */
25
+ response: string;
26
+ };
27
+
28
+ export type LlmRaterResult = {
29
+ /** Usefulness score in [0, 1]. 0 = misleading, 1 = highly useful. */
30
+ score: number;
31
+ /** Short human-readable explanation. */
32
+ reasoning: string;
33
+ };
34
+
35
+ export interface LlmRaterClient {
36
+ /**
37
+ * Score one memory. Returns null on parse failure / non-JSON output / timeout
38
+ * — the caller (`LlmRater`) treats `null` as "skip this rating", no posterior
39
+ * change. Implementations MUST NOT throw on transport errors; swallow + log
40
+ * + return null so the worker hook can never crash on rater failure.
41
+ */
42
+ rate(input: LlmRaterInput): Promise<LlmRaterResult | null>;
43
+ }
44
+
45
+ /**
46
+ * Configuration for the Claude-CLI implementation.
47
+ */
48
+ export type ClaudeCliLlmRaterClientOptions = {
49
+ /** Override the model. Defaults to `MEMORY_LLM_RATER_MODEL` env var or "haiku". */
50
+ model?: string;
51
+ /** Soft timeout (ms) for the `claude -p` shell-out. Default 30s. */
52
+ timeoutMs?: number;
53
+ };
54
+
55
+ const DEFAULT_TIMEOUT_MS = 30000;
56
+
57
+ const PROMPT_TEMPLATE = `You are scoring the usefulness of one retrieved memory.
58
+
59
+ Return ONLY a JSON object with these fields (no prose, no markdown):
60
+ {
61
+ "score": number, // 0 = misleading/unhelpful, 1 = highly useful
62
+ "reasoning": string // 1..500 chars, why
63
+ }
64
+
65
+ QUERY:
66
+ \${query}
67
+
68
+ MEMORY:
69
+ id: \${memoryId}
70
+ name: \${memoryName}
71
+ content: \${memoryContent}
72
+
73
+ AGENT RESPONSE / SUMMARY:
74
+ \${response}
75
+
76
+ Score 0..1.`;
77
+
78
+ /**
79
+ * `claude -p --output-format json` returns a JSON envelope of the shape
80
+ * `{ result: string, ... }`. We parse the envelope, then JSON-parse the
81
+ * inner `result` to recover the score+reasoning object.
82
+ */
83
+ type ClaudeCliEnvelope = { result?: unknown };
84
+
85
+ function buildPrompt(input: LlmRaterInput): string {
86
+ return PROMPT_TEMPLATE.replace("${query}", input.query)
87
+ .replace("${memoryId}", input.memory.id)
88
+ .replace("${memoryName}", input.memory.name)
89
+ .replace("${memoryContent}", input.memory.content)
90
+ .replace("${response}", input.response);
91
+ }
92
+
93
+ function parseScoreAndReasoning(raw: unknown): LlmRaterResult | null {
94
+ if (typeof raw !== "string") return null;
95
+ let parsed: unknown;
96
+ try {
97
+ parsed = JSON.parse(raw.trim());
98
+ } catch {
99
+ return null;
100
+ }
101
+ if (!parsed || typeof parsed !== "object") return null;
102
+ const obj = parsed as { score?: unknown; reasoning?: unknown };
103
+ const score = typeof obj.score === "number" ? obj.score : null;
104
+ const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : null;
105
+ if (score == null || reasoning == null) return null;
106
+ if (!Number.isFinite(score) || score < 0 || score > 1) return null;
107
+ if (reasoning.length === 0 || reasoning.length > 500) return null;
108
+ return { score, reasoning };
109
+ }
110
+
111
+ export class ClaudeCliLlmRaterClient implements LlmRaterClient {
112
+ private readonly model: string;
113
+ private readonly timeoutMs: number;
114
+
115
+ constructor(opts: ClaudeCliLlmRaterClientOptions = {}) {
116
+ this.model = opts.model ?? process.env.MEMORY_LLM_RATER_MODEL ?? "haiku";
117
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
118
+ }
119
+
120
+ async rate(input: LlmRaterInput): Promise<LlmRaterResult | null> {
121
+ const prompt = buildPrompt(input);
122
+ const tmpFile = `/tmp/llm-rater-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`;
123
+
124
+ let stdout = "";
125
+ try {
126
+ await Bun.write(tmpFile, prompt);
127
+ const proc = Bun.spawn(
128
+ ["bash", "-c", `cat "${tmpFile}" | claude -p --model ${this.model} --output-format json`],
129
+ {
130
+ stdout: "pipe",
131
+ stderr: "pipe",
132
+ env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
133
+ },
134
+ );
135
+ const timeoutId = setTimeout(() => proc.kill(), this.timeoutMs);
136
+ stdout = await new Response(proc.stdout).text();
137
+ clearTimeout(timeoutId);
138
+ } catch (err) {
139
+ console.error("[memory-rater:llm] claude -p shell-out failed:", (err as Error).message);
140
+ return null;
141
+ } finally {
142
+ try {
143
+ await Bun.$`rm -f ${tmpFile}`.quiet();
144
+ } catch {
145
+ // best-effort
146
+ }
147
+ }
148
+
149
+ let envelope: ClaudeCliEnvelope;
150
+ try {
151
+ envelope = JSON.parse(stdout) as ClaudeCliEnvelope;
152
+ } catch {
153
+ return null;
154
+ }
155
+ return parseScoreAndReasoning(envelope.result);
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Factory honouring `MEMORY_LLM_RATER_PROVIDER` — defaults to `claude-cli`.
161
+ * Unknown providers fall back to the Claude CLI default and log a warning so
162
+ * misconfiguration never crashes the worker.
163
+ */
164
+ export function getDefaultLlmRaterClient(): LlmRaterClient {
165
+ const provider = (process.env.MEMORY_LLM_RATER_PROVIDER ?? "claude-cli").trim();
166
+ if (provider !== "claude-cli") {
167
+ console.warn(
168
+ `[memory-rater:llm] Unknown MEMORY_LLM_RATER_PROVIDER "${provider}" — falling back to claude-cli`,
169
+ );
170
+ }
171
+ return new ClaudeCliLlmRaterClient();
172
+ }
@@ -0,0 +1,218 @@
1
+ /**
2
+ * `runMemoryRater` — Stop-hook helper that calls OpenRouter for the combined
3
+ * session-summary + LLM-rater piggyback prompt and returns a schema-validated
4
+ * `SummaryWithRatings`.
5
+ *
6
+ * Refactored out of `src/hooks/hook.ts` so the rater logic stays out of the
7
+ * hook (review feedback on PR #450). The hook just calls `runMemoryRater(...)`
8
+ * and inspects the typed result.
9
+ *
10
+ * Worker-safe — uses raw `fetch` + the tolerant JSON parser landed in PR #447.
11
+ * No `bun:sqlite` / `src/be/db` imports. Boundary script enforces this.
12
+ */
13
+ import { z } from "zod";
14
+ import { type SummaryWithRatings, SummaryWithRatingsSchema } from "./llm";
15
+
16
+ /**
17
+ * Default model used when `MEMORY_RATER_MODEL` is unset. Gemini 3 Flash on
18
+ * OpenRouter — the only Gemini 3 Flash variant published as of this PR (no
19
+ * stable non-preview slug exists yet). CLAUDE.md project-wide default.
20
+ */
21
+ export const DEFAULT_MEMORY_RATER_MODEL = "google/gemini-3-flash-preview";
22
+
23
+ /**
24
+ * `response_format.json_schema.name` sent to OpenRouter. Used by some
25
+ * providers as a tag in their structured-output telemetry — keep it stable
26
+ * so model behaviour stays comparable across calls.
27
+ */
28
+ export const MEMORY_RATER_SCHEMA_NAME = "memory_rater_output";
29
+
30
+ const OPENROUTER_CHAT_COMPLETIONS_URL = "https://openrouter.ai/api/v1/chat/completions";
31
+
32
+ /**
33
+ * JSON Schema derived from {@link SummaryWithRatingsSchema}, the source of
34
+ * truth. Computed once at module load via Zod v4's native `z.toJSONSchema`
35
+ * (Zod v3's `zod-to-json-schema` is incompatible with the v4 runtime we
36
+ * pin). The `$schema` key is stripped because OpenRouter / OpenAI strict
37
+ * json_schema mode rejects unrecognized top-level keys.
38
+ *
39
+ * Probed end-to-end against `google/gemini-3-flash-preview` with
40
+ * `response_format.json_schema.strict: true` — accepted, no rewrite needed.
41
+ */
42
+ export const MEMORY_RATER_JSON_SCHEMA: Record<string, unknown> = (() => {
43
+ const schema = z.toJSONSchema(SummaryWithRatingsSchema) as Record<string, unknown>;
44
+ delete schema.$schema;
45
+ return schema;
46
+ })();
47
+
48
+ /**
49
+ * Resolve the OpenRouter model slug. Reads `MEMORY_RATER_MODEL` from the env;
50
+ * falls back to {@link DEFAULT_MEMORY_RATER_MODEL}. Self-hosters can pin a
51
+ * different slug (e.g. `anthropic/claude-haiku-4.5`) without a code change.
52
+ */
53
+ export function getMemoryRaterModel(env: NodeJS.ProcessEnv = process.env): string {
54
+ const raw = env.MEMORY_RATER_MODEL;
55
+ if (typeof raw === "string" && raw.trim().length > 0) return raw.trim();
56
+ return DEFAULT_MEMORY_RATER_MODEL;
57
+ }
58
+
59
+ /**
60
+ * Best-effort parse of a JSON string that may be wrapped in markdown fences
61
+ * (```json … ``` or plain ``` … ```), have a prose preamble, or both. Returns
62
+ * the parsed value or `null`. NEVER throws.
63
+ *
64
+ * Strategy: try strict parse first. On failure, strip a leading ```json /
65
+ * ```<lang> / ``` fence + matching trailing ```; on second failure, slice
66
+ * from the first `{` to the last `}` and retry.
67
+ *
68
+ * Originally landed in PR #447 to recover ratings from Haiku's occasional
69
+ * fenced/preambled output despite `response_format: {type: "json_object"}`.
70
+ * Restored here to harden the OpenRouter direct-HTTP path against the same
71
+ * class of provider quirks (Gemini Flash also occasionally fences output).
72
+ */
73
+ export function tryParseLooseJson(raw: string): unknown {
74
+ const trimmed = raw.trim();
75
+ try {
76
+ return JSON.parse(trimmed);
77
+ } catch {
78
+ // fall through to fence-stripping
79
+ }
80
+ const fenced = trimmed.match(/^```[a-zA-Z0-9_-]*\s*\n?([\s\S]*?)\n?```\s*$/);
81
+ if (fenced?.[1]) {
82
+ try {
83
+ return JSON.parse(fenced[1].trim());
84
+ } catch {
85
+ // fall through
86
+ }
87
+ }
88
+ const first = trimmed.indexOf("{");
89
+ const last = trimmed.lastIndexOf("}");
90
+ if (first !== -1 && last > first) {
91
+ try {
92
+ return JSON.parse(trimmed.slice(first, last + 1));
93
+ } catch {
94
+ // fall through
95
+ }
96
+ }
97
+ return null;
98
+ }
99
+
100
+ export type RunMemoryRaterOpts = {
101
+ /** The fully-built prompt (e.g. from `buildSummaryWithRatingsPrompt`). */
102
+ prompt: string;
103
+ /** OpenRouter API key. Caller is responsible for the no-op-when-unset gate. */
104
+ apiKey: string;
105
+ /** Model slug override; falls through to {@link getMemoryRaterModel}. */
106
+ model?: string;
107
+ /** Injectable for tests — defaults to the global `fetch`. */
108
+ fetchImpl?: typeof fetch;
109
+ /**
110
+ * Bytes to keep when logging unexpected response payloads. Capped to avoid
111
+ * leaking very large bodies into stderr.
112
+ */
113
+ responseLogCap?: number;
114
+ };
115
+
116
+ export type RunMemoryRaterResult =
117
+ | { ok: true; data: SummaryWithRatings; model: string }
118
+ | {
119
+ ok: false;
120
+ reason: "transport" | "http_error" | "empty_content" | "parse" | "schema";
121
+ status?: number;
122
+ };
123
+
124
+ /**
125
+ * Call OpenRouter's chat completions endpoint with `response_format` =
126
+ * `json_object`, then parse and schema-validate the assistant's content.
127
+ *
128
+ * Returns a tagged union: `ok: true` with a typed `SummaryWithRatings`, or
129
+ * `ok: false` with a `reason` discriminator the caller can branch on for
130
+ * logging. NEVER throws — the hook wraps this in its own try/catch as a
131
+ * second line of defence, but this function is designed to short-circuit
132
+ * cleanly rather than propagate exceptions.
133
+ */
134
+ export async function runMemoryRater(opts: RunMemoryRaterOpts): Promise<RunMemoryRaterResult> {
135
+ const fetchFn = opts.fetchImpl ?? fetch;
136
+ const model = opts.model ?? getMemoryRaterModel();
137
+ const responseLogCap = opts.responseLogCap ?? 200;
138
+
139
+ let res: Response;
140
+ try {
141
+ res = await fetchFn(OPENROUTER_CHAT_COMPLETIONS_URL, {
142
+ method: "POST",
143
+ headers: {
144
+ "Content-Type": "application/json",
145
+ Authorization: `Bearer ${opts.apiKey}`,
146
+ },
147
+ body: JSON.stringify({
148
+ model,
149
+ // OpenRouter strict json_schema — forces the provider's structured-
150
+ // output guardrails on instead of the looser `json_object` mode.
151
+ // Schema is derived from the same Zod source of truth, so the
152
+ // request and the post-validation Zod check can't drift.
153
+ // https://openrouter.ai/docs/guides/features/structured-outputs
154
+ response_format: {
155
+ type: "json_schema",
156
+ json_schema: {
157
+ name: MEMORY_RATER_SCHEMA_NAME,
158
+ strict: true,
159
+ schema: MEMORY_RATER_JSON_SCHEMA,
160
+ },
161
+ },
162
+ messages: [{ role: "user", content: opts.prompt }],
163
+ }),
164
+ });
165
+ } catch (err) {
166
+ console.error("[memory-rater:llm] runMemoryRater fetch threw:", (err as Error).message);
167
+ return { ok: false, reason: "transport" };
168
+ }
169
+
170
+ if (!res.ok) {
171
+ const text = await res.text().catch(() => "");
172
+ console.error(
173
+ `[memory-rater:llm] OpenRouter ${res.status} ${res.statusText}: ${text.slice(0, responseLogCap)}`,
174
+ );
175
+ return { ok: false, reason: "http_error", status: res.status };
176
+ }
177
+
178
+ let body: unknown;
179
+ try {
180
+ body = await res.json();
181
+ } catch (err) {
182
+ console.error("[memory-rater:llm] OpenRouter response was not JSON:", (err as Error).message);
183
+ return { ok: false, reason: "parse" };
184
+ }
185
+
186
+ const content = extractContent(body);
187
+ if (typeof content !== "string" || content.length === 0) {
188
+ return { ok: false, reason: "empty_content" };
189
+ }
190
+
191
+ const candidate = tryParseLooseJson(content);
192
+ if (candidate === null || typeof candidate !== "object") {
193
+ return { ok: false, reason: "parse" };
194
+ }
195
+
196
+ const parsed = SummaryWithRatingsSchema.safeParse(candidate);
197
+ if (!parsed.success) {
198
+ return { ok: false, reason: "schema" };
199
+ }
200
+ return { ok: true, data: parsed.data, model };
201
+ }
202
+
203
+ /**
204
+ * Pull `choices[0].message.content` out of an OpenRouter chat-completion
205
+ * response defensively. Returns the string, or `null` when the shape doesn't
206
+ * match — caller treats that as `empty_content`.
207
+ */
208
+ function extractContent(body: unknown): string | null {
209
+ if (!body || typeof body !== "object") return null;
210
+ const choices = (body as { choices?: unknown }).choices;
211
+ if (!Array.isArray(choices) || choices.length === 0) return null;
212
+ const first = choices[0];
213
+ if (!first || typeof first !== "object") return null;
214
+ const message = (first as { message?: unknown }).message;
215
+ if (!message || typeof message !== "object") return null;
216
+ const content = (message as { content?: unknown }).content;
217
+ return typeof content === "string" ? content : null;
218
+ }