@desplega.ai/agent-swarm 1.74.4 → 1.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +1264 -46
- package/package.json +2 -2
- package/src/be/db.ts +563 -9
- package/src/be/memory/edges-store.ts +69 -0
- package/src/be/memory/providers/sqlite-store.ts +4 -0
- package/src/be/memory/raters/explicit-self.ts +22 -0
- package/src/be/memory/raters/implicit-citation.ts +44 -0
- package/src/be/memory/raters/llm-client.ts +172 -0
- package/src/be/memory/raters/llm-summarizer.ts +218 -0
- package/src/be/memory/raters/llm.ts +375 -0
- package/src/be/memory/raters/noop.ts +14 -0
- package/src/be/memory/raters/registry.ts +86 -0
- package/src/be/memory/raters/retrieval.ts +88 -0
- package/src/be/memory/raters/run-server-raters.ts +97 -0
- package/src/be/memory/raters/store.ts +228 -0
- package/src/be/memory/raters/types.ts +101 -0
- package/src/be/memory/reranker.ts +32 -2
- package/src/be/memory/retrieval-store.ts +116 -0
- package/src/be/memory/types.ts +3 -0
- package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
- package/src/be/migrations/052_memory_edges.sql +36 -0
- package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
- package/src/be/migrations/054_agent_harness_provider.sql +21 -0
- package/src/be/migrations/055_agent_cred_status.sql +15 -0
- package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
- package/src/be/migrations/057_inbox_item_state.sql +27 -0
- package/src/be/migrations/058_task_templates.sql +31 -0
- package/src/be/swarm-config-guard.ts +24 -0
- package/src/commands/credential-wait.ts +186 -0
- package/src/commands/provider-credentials.ts +434 -0
- package/src/commands/runner.ts +253 -21
- package/src/hooks/hook.ts +143 -66
- package/src/http/agents.ts +191 -1
- package/src/http/config.ts +11 -1
- package/src/http/core.ts +5 -0
- package/src/http/inbox-state.ts +89 -0
- package/src/http/index.ts +10 -0
- package/src/http/memory.ts +230 -1
- package/src/http/sessions.ts +86 -0
- package/src/http/status.ts +665 -0
- package/src/http/task-templates.ts +51 -0
- package/src/http/tasks.ts +85 -5
- package/src/http/users.ts +134 -0
- package/src/prompts/memories.ts +62 -0
- package/src/providers/claude-adapter.ts +22 -0
- package/src/providers/claude-managed-adapter.ts +24 -0
- package/src/providers/codex-adapter.ts +43 -1
- package/src/providers/devin-adapter.ts +18 -0
- package/src/providers/index.ts +7 -0
- package/src/providers/opencode-adapter.ts +60 -0
- package/src/providers/pi-mono-adapter.ts +71 -0
- package/src/providers/types.ts +34 -0
- package/src/server.ts +2 -0
- package/src/slack/handlers.ts +0 -1
- package/src/tests/agents-harness-provider.test.ts +333 -0
- package/src/tests/credential-check.test.ts +367 -0
- package/src/tests/credential-status-api.test.ts +223 -0
- package/src/tests/credential-status-routing.test.ts +150 -0
- package/src/tests/credential-wait.test.ts +282 -0
- package/src/tests/harness-provider-resolution.test.ts +242 -0
- package/src/tests/jira-sync.test.ts +1 -1
- package/src/tests/memory-edges.test.ts +722 -0
- package/src/tests/memory-rate-endpoint.test.ts +330 -0
- package/src/tests/memory-rate-tool.test.ts +252 -0
- package/src/tests/memory-rater-e2e.test.ts +578 -0
- package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
- package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
- package/src/tests/memory-rater-llm.test.ts +964 -0
- package/src/tests/memory-rater-store.test.ts +249 -0
- package/src/tests/memory-reranker.test.ts +161 -2
- package/src/tests/migration-runner-regressions.test.ts +17 -2
- package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
- package/src/tests/run-server-raters.test.ts +291 -0
- package/src/tests/sessions.test.ts +141 -0
- package/src/tests/status.test.ts +843 -0
- package/src/tests/stop-hook-task-resolution.test.ts +98 -0
- package/src/tests/template-recommendations.test.ts +148 -0
- package/src/tests/tool-annotations.test.ts +2 -2
- package/src/tests/use-dismissible-card.test.ts +140 -0
- package/src/tools/memory-rate.ts +166 -0
- package/src/tools/memory-search.ts +18 -0
- package/src/tools/store-progress.ts +37 -0
- package/src/tools/swarm-config/set-config.ts +17 -1
- package/src/tools/tool-config.ts +1 -0
- package/src/types.ts +122 -1
- package/src/utils/harness-provider.ts +32 -0
- package/tsconfig.json +0 -2
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side query helpers for the `agent_memory_edge` table.
|
|
3
|
+
*
|
|
4
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-6.md §7
|
|
5
|
+
*
|
|
6
|
+
* The write path lives in `src/be/memory/raters/store.ts` (`applyRating`
|
|
7
|
+
* UPSERTs the edge atomically with the memory's posterior update). This
|
|
8
|
+
* module surfaces reads to the GET `/api/memory/edges` endpoint that powers
|
|
9
|
+
* the homepage demo ("this memory references PR #377").
|
|
10
|
+
*
|
|
11
|
+
* Server-side only.
|
|
12
|
+
*/
|
|
13
|
+
import { getDb } from "@/be/db";
|
|
14
|
+
|
|
15
|
+
const USEFULNESS_FLOOR = 1.0;
|
|
16
|
+
const USEFULNESS_CEILING = 2.0;
|
|
17
|
+
|
|
18
|
+
export type MemoryEdgeRow = {
|
|
19
|
+
to: string;
|
|
20
|
+
type: "references-source";
|
|
21
|
+
alpha: number;
|
|
22
|
+
beta: number;
|
|
23
|
+
/** clamp(2 * α/(α+β), 1.0, 2.0) — same formula as the memory reranker. */
|
|
24
|
+
usefulness: number;
|
|
25
|
+
createdAt: string;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* List edges for a memory, with defence-in-depth: the joined `agent_memory`
|
|
30
|
+
* row must either be swarm-scope or owned by the requesting agent. Returns
|
|
31
|
+
* `[]` when the memory does not exist or is not visible to the agent — same
|
|
32
|
+
* shape as a memory with no edges, since neither case has anything useful
|
|
33
|
+
* to surface to the caller.
|
|
34
|
+
*/
|
|
35
|
+
export function listEdgesForAgent(agentId: string, memoryId: string): MemoryEdgeRow[] {
|
|
36
|
+
const db = getDb();
|
|
37
|
+
const memory = db
|
|
38
|
+
.prepare<{ scope: string; agentId: string | null }, [string]>(
|
|
39
|
+
"SELECT scope, agentId FROM agent_memory WHERE id = ?",
|
|
40
|
+
)
|
|
41
|
+
.get(memoryId);
|
|
42
|
+
if (!memory) return [];
|
|
43
|
+
if (memory.scope !== "swarm" && memory.agentId !== agentId) return [];
|
|
44
|
+
|
|
45
|
+
const rows = db
|
|
46
|
+
.prepare<{ to_id: string; alpha: number; beta: number; createdAt: string }, [string]>(
|
|
47
|
+
`SELECT to_id, alpha, beta, createdAt
|
|
48
|
+
FROM agent_memory_edge
|
|
49
|
+
WHERE from_id = ? AND type = 'references-source'
|
|
50
|
+
ORDER BY createdAt DESC`,
|
|
51
|
+
)
|
|
52
|
+
.all(memoryId);
|
|
53
|
+
|
|
54
|
+
return rows.map((row) => ({
|
|
55
|
+
to: row.to_id,
|
|
56
|
+
type: "references-source" as const,
|
|
57
|
+
alpha: row.alpha,
|
|
58
|
+
beta: row.beta,
|
|
59
|
+
usefulness: clampUsefulness(row.alpha, row.beta),
|
|
60
|
+
createdAt: row.createdAt,
|
|
61
|
+
}));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function clampUsefulness(alpha: number, beta: number): number {
|
|
65
|
+
const denom = alpha + beta;
|
|
66
|
+
if (denom <= 0) return USEFULNESS_FLOOR;
|
|
67
|
+
const mean = alpha / denom;
|
|
68
|
+
return Math.max(USEFULNESS_FLOOR, Math.min(USEFULNESS_CEILING, 2 * mean));
|
|
69
|
+
}
|
|
@@ -30,6 +30,8 @@ type AgentMemoryRow = {
|
|
|
30
30
|
expiresAt: string | null;
|
|
31
31
|
accessCount: number;
|
|
32
32
|
embeddingModel: string | null;
|
|
33
|
+
alpha: number;
|
|
34
|
+
beta: number;
|
|
33
35
|
};
|
|
34
36
|
|
|
35
37
|
function rowToAgentMemory(row: AgentMemoryRow): AgentMemory {
|
|
@@ -61,6 +63,8 @@ function rowToCandidate(row: AgentMemoryRow, similarity: number): MemoryCandidat
|
|
|
61
63
|
accessCount: row.accessCount ?? 0,
|
|
62
64
|
expiresAt: row.expiresAt ?? null,
|
|
63
65
|
embeddingModel: row.embeddingModel ?? null,
|
|
66
|
+
alpha: row.alpha ?? 1.0,
|
|
67
|
+
beta: row.beta ?? 1.0,
|
|
64
68
|
};
|
|
65
69
|
}
|
|
66
70
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { MemoryRater, RatingEvent } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-5.md §3
|
|
5
|
+
*
|
|
6
|
+
* Explicit-self rater — registry sentinel only. Never auto-fires from
|
|
7
|
+
* `applyRating`. Its `RatingEvent`s arrive exclusively through the worker-side
|
|
8
|
+
* `memory_rate` MCP tool, which POSTs to `/api/memory/rate` with
|
|
9
|
+
* `source: "explicit-self"`.
|
|
10
|
+
*
|
|
11
|
+
* The class exists so `MEMORY_RATERS=explicit-self` can register the name —
|
|
12
|
+
* which (per step-5.md §5) unlocks the conditional system-prompt hint that
|
|
13
|
+
* teaches the agent to call `memory_rate`. Stays out of `SERVER_RATERS` so
|
|
14
|
+
* the store-progress hook never invokes it.
|
|
15
|
+
*/
|
|
16
|
+
export class ExplicitSelfRatingRater implements MemoryRater {
|
|
17
|
+
readonly name = "explicit-self";
|
|
18
|
+
|
|
19
|
+
async rate(): Promise<RatingEvent[]> {
|
|
20
|
+
return [];
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { MemoryRater, RatingContext, RatingEvent } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Implicit-citation rater — pure ID-grep over `evidence`.
|
|
5
|
+
*
|
|
6
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-2.md §4
|
|
7
|
+
*
|
|
8
|
+
* For each `memoryId` in `ctx.retrievedMemoryIds`:
|
|
9
|
+
* - if `ctx.evidence` contains the literal `memoryId` → +1 weight=0.5
|
|
10
|
+
* (positive citation; the agent referenced the memory's id somewhere
|
|
11
|
+
* in the task's `session_logs`).
|
|
12
|
+
* - else → -1 weight=0.25 (miss; we surfaced this memory but the agent
|
|
13
|
+
* did not cite it. Negative signal carries less confidence per
|
|
14
|
+
* IR convention from research §3.A and brainstorm Q4).
|
|
15
|
+
*
|
|
16
|
+
* The framework (`applyRating` in ./store.ts) sets `event.source` from the
|
|
17
|
+
* rater's `name`. This rater MUST NOT populate `source` itself — `applyRating`
|
|
18
|
+
* rejects rater-set sources to defend against rater spoofing.
|
|
19
|
+
*
|
|
20
|
+
* Match semantics: literal substring match using `String.prototype.includes`.
|
|
21
|
+
* If two memory IDs share a prefix (e.g. `mem-A` is a prefix of `mem-AB`),
|
|
22
|
+
* citing `mem-AB` will count as a hit for both. UUIDs (the production case)
|
|
23
|
+
* never collide so this is benign; the unit tests lock the behaviour in.
|
|
24
|
+
*
|
|
25
|
+
* Pure / deterministic / no DB I/O.
|
|
26
|
+
*/
|
|
27
|
+
export class ImplicitCitationRater implements MemoryRater {
|
|
28
|
+
readonly name = "implicit-citation";
|
|
29
|
+
|
|
30
|
+
async rate(ctx: RatingContext): Promise<RatingEvent[]> {
|
|
31
|
+
if (ctx.retrievedMemoryIds.length === 0) return [];
|
|
32
|
+
const evidence = ctx.evidence ?? "";
|
|
33
|
+
|
|
34
|
+
const events: RatingEvent[] = [];
|
|
35
|
+
for (const memoryId of ctx.retrievedMemoryIds) {
|
|
36
|
+
if (evidence.length > 0 && evidence.includes(memoryId)) {
|
|
37
|
+
events.push({ memoryId, signal: 1, weight: 0.5, source: "" });
|
|
38
|
+
} else {
|
|
39
|
+
events.push({ memoryId, signal: -1, weight: 0.25, source: "" });
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return events;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `LlmRaterClient` — pluggable LLM driver used by `LlmRater` to score the
|
|
3
|
+
* usefulness of a single retrieved memory against a (query, response) pair.
|
|
4
|
+
*
|
|
5
|
+
* Plan: thoughts/taras/plans/2026-05-05-memory-rater-v1.5/step-4.md §1
|
|
6
|
+
*
|
|
7
|
+
* This module is imported from worker-side `src/hooks/hook.ts` (the session-
|
|
8
|
+
* summary piggyback path), so it MUST NOT touch `bun:sqlite` or `src/be/db`.
|
|
9
|
+
* The DB-boundary check in `scripts/check-db-boundary.sh` enforces this.
|
|
10
|
+
*
|
|
11
|
+
* Default implementation shells out to the same `claude -p` CLI the hook
|
|
12
|
+
* already uses for session summarization — zero new SDK dependencies.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export type LlmRaterInput = {
|
|
16
|
+
/** What the agent asked the memory system for. */
|
|
17
|
+
query: string;
|
|
18
|
+
/** The memory we're scoring. */
|
|
19
|
+
memory: {
|
|
20
|
+
id: string;
|
|
21
|
+
name: string;
|
|
22
|
+
content: string;
|
|
23
|
+
};
|
|
24
|
+
/** The agent's eventual response (or session summary) — the "did this help?" signal. */
|
|
25
|
+
response: string;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export type LlmRaterResult = {
|
|
29
|
+
/** Usefulness score in [0, 1]. 0 = misleading, 1 = highly useful. */
|
|
30
|
+
score: number;
|
|
31
|
+
/** Short human-readable explanation. */
|
|
32
|
+
reasoning: string;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export interface LlmRaterClient {
|
|
36
|
+
/**
|
|
37
|
+
* Score one memory. Returns null on parse failure / non-JSON output / timeout
|
|
38
|
+
* — the caller (`LlmRater`) treats `null` as "skip this rating", no posterior
|
|
39
|
+
* change. Implementations MUST NOT throw on transport errors; swallow + log
|
|
40
|
+
* + return null so the worker hook can never crash on rater failure.
|
|
41
|
+
*/
|
|
42
|
+
rate(input: LlmRaterInput): Promise<LlmRaterResult | null>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Configuration for the Claude-CLI implementation.
|
|
47
|
+
*/
|
|
48
|
+
export type ClaudeCliLlmRaterClientOptions = {
|
|
49
|
+
/** Override the model. Defaults to `MEMORY_LLM_RATER_MODEL` env var or "haiku". */
|
|
50
|
+
model?: string;
|
|
51
|
+
/** Soft timeout (ms) for the `claude -p` shell-out. Default 30s. */
|
|
52
|
+
timeoutMs?: number;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const DEFAULT_TIMEOUT_MS = 30000;
|
|
56
|
+
|
|
57
|
+
const PROMPT_TEMPLATE = `You are scoring the usefulness of one retrieved memory.
|
|
58
|
+
|
|
59
|
+
Return ONLY a JSON object with these fields (no prose, no markdown):
|
|
60
|
+
{
|
|
61
|
+
"score": number, // 0 = misleading/unhelpful, 1 = highly useful
|
|
62
|
+
"reasoning": string // 1..500 chars, why
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
QUERY:
|
|
66
|
+
\${query}
|
|
67
|
+
|
|
68
|
+
MEMORY:
|
|
69
|
+
id: \${memoryId}
|
|
70
|
+
name: \${memoryName}
|
|
71
|
+
content: \${memoryContent}
|
|
72
|
+
|
|
73
|
+
AGENT RESPONSE / SUMMARY:
|
|
74
|
+
\${response}
|
|
75
|
+
|
|
76
|
+
Score 0..1.`;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* `claude -p --output-format json` returns a JSON envelope of the shape
|
|
80
|
+
* `{ result: string, ... }`. We parse the envelope, then JSON-parse the
|
|
81
|
+
* inner `result` to recover the score+reasoning object.
|
|
82
|
+
*/
|
|
83
|
+
type ClaudeCliEnvelope = { result?: unknown };
|
|
84
|
+
|
|
85
|
+
function buildPrompt(input: LlmRaterInput): string {
|
|
86
|
+
return PROMPT_TEMPLATE.replace("${query}", input.query)
|
|
87
|
+
.replace("${memoryId}", input.memory.id)
|
|
88
|
+
.replace("${memoryName}", input.memory.name)
|
|
89
|
+
.replace("${memoryContent}", input.memory.content)
|
|
90
|
+
.replace("${response}", input.response);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function parseScoreAndReasoning(raw: unknown): LlmRaterResult | null {
|
|
94
|
+
if (typeof raw !== "string") return null;
|
|
95
|
+
let parsed: unknown;
|
|
96
|
+
try {
|
|
97
|
+
parsed = JSON.parse(raw.trim());
|
|
98
|
+
} catch {
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
102
|
+
const obj = parsed as { score?: unknown; reasoning?: unknown };
|
|
103
|
+
const score = typeof obj.score === "number" ? obj.score : null;
|
|
104
|
+
const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : null;
|
|
105
|
+
if (score == null || reasoning == null) return null;
|
|
106
|
+
if (!Number.isFinite(score) || score < 0 || score > 1) return null;
|
|
107
|
+
if (reasoning.length === 0 || reasoning.length > 500) return null;
|
|
108
|
+
return { score, reasoning };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export class ClaudeCliLlmRaterClient implements LlmRaterClient {
|
|
112
|
+
private readonly model: string;
|
|
113
|
+
private readonly timeoutMs: number;
|
|
114
|
+
|
|
115
|
+
constructor(opts: ClaudeCliLlmRaterClientOptions = {}) {
|
|
116
|
+
this.model = opts.model ?? process.env.MEMORY_LLM_RATER_MODEL ?? "haiku";
|
|
117
|
+
this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async rate(input: LlmRaterInput): Promise<LlmRaterResult | null> {
|
|
121
|
+
const prompt = buildPrompt(input);
|
|
122
|
+
const tmpFile = `/tmp/llm-rater-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`;
|
|
123
|
+
|
|
124
|
+
let stdout = "";
|
|
125
|
+
try {
|
|
126
|
+
await Bun.write(tmpFile, prompt);
|
|
127
|
+
const proc = Bun.spawn(
|
|
128
|
+
["bash", "-c", `cat "${tmpFile}" | claude -p --model ${this.model} --output-format json`],
|
|
129
|
+
{
|
|
130
|
+
stdout: "pipe",
|
|
131
|
+
stderr: "pipe",
|
|
132
|
+
env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
|
|
133
|
+
},
|
|
134
|
+
);
|
|
135
|
+
const timeoutId = setTimeout(() => proc.kill(), this.timeoutMs);
|
|
136
|
+
stdout = await new Response(proc.stdout).text();
|
|
137
|
+
clearTimeout(timeoutId);
|
|
138
|
+
} catch (err) {
|
|
139
|
+
console.error("[memory-rater:llm] claude -p shell-out failed:", (err as Error).message);
|
|
140
|
+
return null;
|
|
141
|
+
} finally {
|
|
142
|
+
try {
|
|
143
|
+
await Bun.$`rm -f ${tmpFile}`.quiet();
|
|
144
|
+
} catch {
|
|
145
|
+
// best-effort
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
let envelope: ClaudeCliEnvelope;
|
|
150
|
+
try {
|
|
151
|
+
envelope = JSON.parse(stdout) as ClaudeCliEnvelope;
|
|
152
|
+
} catch {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
return parseScoreAndReasoning(envelope.result);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Factory honouring `MEMORY_LLM_RATER_PROVIDER` — defaults to `claude-cli`.
|
|
161
|
+
* Unknown providers fall back to the Claude CLI default and log a warning so
|
|
162
|
+
* misconfiguration never crashes the worker.
|
|
163
|
+
*/
|
|
164
|
+
export function getDefaultLlmRaterClient(): LlmRaterClient {
|
|
165
|
+
const provider = (process.env.MEMORY_LLM_RATER_PROVIDER ?? "claude-cli").trim();
|
|
166
|
+
if (provider !== "claude-cli") {
|
|
167
|
+
console.warn(
|
|
168
|
+
`[memory-rater:llm] Unknown MEMORY_LLM_RATER_PROVIDER "${provider}" — falling back to claude-cli`,
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
return new ClaudeCliLlmRaterClient();
|
|
172
|
+
}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `runMemoryRater` — Stop-hook helper that calls OpenRouter for the combined
|
|
3
|
+
* session-summary + LLM-rater piggyback prompt and returns a schema-validated
|
|
4
|
+
* `SummaryWithRatings`.
|
|
5
|
+
*
|
|
6
|
+
* Refactored out of `src/hooks/hook.ts` so the rater logic stays out of the
|
|
7
|
+
* hook (review feedback on PR #450). The hook just calls `runMemoryRater(...)`
|
|
8
|
+
* and inspects the typed result.
|
|
9
|
+
*
|
|
10
|
+
* Worker-safe — uses raw `fetch` + the tolerant JSON parser landed in PR #447.
|
|
11
|
+
* No `bun:sqlite` / `src/be/db` imports. Boundary script enforces this.
|
|
12
|
+
*/
|
|
13
|
+
import { z } from "zod";
|
|
14
|
+
import { type SummaryWithRatings, SummaryWithRatingsSchema } from "./llm";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Default model used when `MEMORY_RATER_MODEL` is unset. Gemini 3 Flash on
|
|
18
|
+
* OpenRouter — the only Gemini 3 Flash variant published as of this PR (no
|
|
19
|
+
* stable non-preview slug exists yet). CLAUDE.md project-wide default.
|
|
20
|
+
*/
|
|
21
|
+
export const DEFAULT_MEMORY_RATER_MODEL = "google/gemini-3-flash-preview";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* `response_format.json_schema.name` sent to OpenRouter. Used by some
|
|
25
|
+
* providers as a tag in their structured-output telemetry — keep it stable
|
|
26
|
+
* so model behaviour stays comparable across calls.
|
|
27
|
+
*/
|
|
28
|
+
export const MEMORY_RATER_SCHEMA_NAME = "memory_rater_output";
|
|
29
|
+
|
|
30
|
+
const OPENROUTER_CHAT_COMPLETIONS_URL = "https://openrouter.ai/api/v1/chat/completions";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* JSON Schema derived from {@link SummaryWithRatingsSchema}, the source of
|
|
34
|
+
* truth. Computed once at module load via Zod v4's native `z.toJSONSchema`
|
|
35
|
+
* (Zod v3's `zod-to-json-schema` is incompatible with the v4 runtime we
|
|
36
|
+
* pin). The `$schema` key is stripped because OpenRouter / OpenAI strict
|
|
37
|
+
* json_schema mode rejects unrecognized top-level keys.
|
|
38
|
+
*
|
|
39
|
+
* Probed end-to-end against `google/gemini-3-flash-preview` with
|
|
40
|
+
* `response_format.json_schema.strict: true` — accepted, no rewrite needed.
|
|
41
|
+
*/
|
|
42
|
+
export const MEMORY_RATER_JSON_SCHEMA: Record<string, unknown> = (() => {
|
|
43
|
+
const schema = z.toJSONSchema(SummaryWithRatingsSchema) as Record<string, unknown>;
|
|
44
|
+
delete schema.$schema;
|
|
45
|
+
return schema;
|
|
46
|
+
})();
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Resolve the OpenRouter model slug. Reads `MEMORY_RATER_MODEL` from the env;
|
|
50
|
+
* falls back to {@link DEFAULT_MEMORY_RATER_MODEL}. Self-hosters can pin a
|
|
51
|
+
* different slug (e.g. `anthropic/claude-haiku-4.5`) without a code change.
|
|
52
|
+
*/
|
|
53
|
+
export function getMemoryRaterModel(env: NodeJS.ProcessEnv = process.env): string {
|
|
54
|
+
const raw = env.MEMORY_RATER_MODEL;
|
|
55
|
+
if (typeof raw === "string" && raw.trim().length > 0) return raw.trim();
|
|
56
|
+
return DEFAULT_MEMORY_RATER_MODEL;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Best-effort parse of a JSON string that may be wrapped in markdown fences
|
|
61
|
+
* (```json … ``` or plain ``` … ```), have a prose preamble, or both. Returns
|
|
62
|
+
* the parsed value or `null`. NEVER throws.
|
|
63
|
+
*
|
|
64
|
+
* Strategy: try strict parse first. On failure, strip a leading ```json /
|
|
65
|
+
* ```<lang> / ``` fence + matching trailing ```; on second failure, slice
|
|
66
|
+
* from the first `{` to the last `}` and retry.
|
|
67
|
+
*
|
|
68
|
+
* Originally landed in PR #447 to recover ratings from Haiku's occasional
|
|
69
|
+
* fenced/preambled output despite `response_format: {type: "json_object"}`.
|
|
70
|
+
* Restored here to harden the OpenRouter direct-HTTP path against the same
|
|
71
|
+
* class of provider quirks (Gemini Flash also occasionally fences output).
|
|
72
|
+
*/
|
|
73
|
+
export function tryParseLooseJson(raw: string): unknown {
|
|
74
|
+
const trimmed = raw.trim();
|
|
75
|
+
try {
|
|
76
|
+
return JSON.parse(trimmed);
|
|
77
|
+
} catch {
|
|
78
|
+
// fall through to fence-stripping
|
|
79
|
+
}
|
|
80
|
+
const fenced = trimmed.match(/^```[a-zA-Z0-9_-]*\s*\n?([\s\S]*?)\n?```\s*$/);
|
|
81
|
+
if (fenced?.[1]) {
|
|
82
|
+
try {
|
|
83
|
+
return JSON.parse(fenced[1].trim());
|
|
84
|
+
} catch {
|
|
85
|
+
// fall through
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const first = trimmed.indexOf("{");
|
|
89
|
+
const last = trimmed.lastIndexOf("}");
|
|
90
|
+
if (first !== -1 && last > first) {
|
|
91
|
+
try {
|
|
92
|
+
return JSON.parse(trimmed.slice(first, last + 1));
|
|
93
|
+
} catch {
|
|
94
|
+
// fall through
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export type RunMemoryRaterOpts = {
|
|
101
|
+
/** The fully-built prompt (e.g. from `buildSummaryWithRatingsPrompt`). */
|
|
102
|
+
prompt: string;
|
|
103
|
+
/** OpenRouter API key. Caller is responsible for the no-op-when-unset gate. */
|
|
104
|
+
apiKey: string;
|
|
105
|
+
/** Model slug override; falls through to {@link getMemoryRaterModel}. */
|
|
106
|
+
model?: string;
|
|
107
|
+
/** Injectable for tests — defaults to the global `fetch`. */
|
|
108
|
+
fetchImpl?: typeof fetch;
|
|
109
|
+
/**
|
|
110
|
+
* Bytes to keep when logging unexpected response payloads. Capped to avoid
|
|
111
|
+
* leaking very large bodies into stderr.
|
|
112
|
+
*/
|
|
113
|
+
responseLogCap?: number;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
export type RunMemoryRaterResult =
|
|
117
|
+
| { ok: true; data: SummaryWithRatings; model: string }
|
|
118
|
+
| {
|
|
119
|
+
ok: false;
|
|
120
|
+
reason: "transport" | "http_error" | "empty_content" | "parse" | "schema";
|
|
121
|
+
status?: number;
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Call OpenRouter's chat completions endpoint with `response_format` =
|
|
126
|
+
* `json_object`, then parse and schema-validate the assistant's content.
|
|
127
|
+
*
|
|
128
|
+
* Returns a tagged union: `ok: true` with a typed `SummaryWithRatings`, or
|
|
129
|
+
* `ok: false` with a `reason` discriminator the caller can branch on for
|
|
130
|
+
* logging. NEVER throws — the hook wraps this in its own try/catch as a
|
|
131
|
+
* second line of defence, but this function is designed to short-circuit
|
|
132
|
+
* cleanly rather than propagate exceptions.
|
|
133
|
+
*/
|
|
134
|
+
export async function runMemoryRater(opts: RunMemoryRaterOpts): Promise<RunMemoryRaterResult> {
|
|
135
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
136
|
+
const model = opts.model ?? getMemoryRaterModel();
|
|
137
|
+
const responseLogCap = opts.responseLogCap ?? 200;
|
|
138
|
+
|
|
139
|
+
let res: Response;
|
|
140
|
+
try {
|
|
141
|
+
res = await fetchFn(OPENROUTER_CHAT_COMPLETIONS_URL, {
|
|
142
|
+
method: "POST",
|
|
143
|
+
headers: {
|
|
144
|
+
"Content-Type": "application/json",
|
|
145
|
+
Authorization: `Bearer ${opts.apiKey}`,
|
|
146
|
+
},
|
|
147
|
+
body: JSON.stringify({
|
|
148
|
+
model,
|
|
149
|
+
// OpenRouter strict json_schema — forces the provider's structured-
|
|
150
|
+
// output guardrails on instead of the looser `json_object` mode.
|
|
151
|
+
// Schema is derived from the same Zod source of truth, so the
|
|
152
|
+
// request and the post-validation Zod check can't drift.
|
|
153
|
+
// https://openrouter.ai/docs/guides/features/structured-outputs
|
|
154
|
+
response_format: {
|
|
155
|
+
type: "json_schema",
|
|
156
|
+
json_schema: {
|
|
157
|
+
name: MEMORY_RATER_SCHEMA_NAME,
|
|
158
|
+
strict: true,
|
|
159
|
+
schema: MEMORY_RATER_JSON_SCHEMA,
|
|
160
|
+
},
|
|
161
|
+
},
|
|
162
|
+
messages: [{ role: "user", content: opts.prompt }],
|
|
163
|
+
}),
|
|
164
|
+
});
|
|
165
|
+
} catch (err) {
|
|
166
|
+
console.error("[memory-rater:llm] runMemoryRater fetch threw:", (err as Error).message);
|
|
167
|
+
return { ok: false, reason: "transport" };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!res.ok) {
|
|
171
|
+
const text = await res.text().catch(() => "");
|
|
172
|
+
console.error(
|
|
173
|
+
`[memory-rater:llm] OpenRouter ${res.status} ${res.statusText}: ${text.slice(0, responseLogCap)}`,
|
|
174
|
+
);
|
|
175
|
+
return { ok: false, reason: "http_error", status: res.status };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let body: unknown;
|
|
179
|
+
try {
|
|
180
|
+
body = await res.json();
|
|
181
|
+
} catch (err) {
|
|
182
|
+
console.error("[memory-rater:llm] OpenRouter response was not JSON:", (err as Error).message);
|
|
183
|
+
return { ok: false, reason: "parse" };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const content = extractContent(body);
|
|
187
|
+
if (typeof content !== "string" || content.length === 0) {
|
|
188
|
+
return { ok: false, reason: "empty_content" };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const candidate = tryParseLooseJson(content);
|
|
192
|
+
if (candidate === null || typeof candidate !== "object") {
|
|
193
|
+
return { ok: false, reason: "parse" };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const parsed = SummaryWithRatingsSchema.safeParse(candidate);
|
|
197
|
+
if (!parsed.success) {
|
|
198
|
+
return { ok: false, reason: "schema" };
|
|
199
|
+
}
|
|
200
|
+
return { ok: true, data: parsed.data, model };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Pull `choices[0].message.content` out of an OpenRouter chat-completion
|
|
205
|
+
* response defensively. Returns the string, or `null` when the shape doesn't
|
|
206
|
+
* match — caller treats that as `empty_content`.
|
|
207
|
+
*/
|
|
208
|
+
function extractContent(body: unknown): string | null {
|
|
209
|
+
if (!body || typeof body !== "object") return null;
|
|
210
|
+
const choices = (body as { choices?: unknown }).choices;
|
|
211
|
+
if (!Array.isArray(choices) || choices.length === 0) return null;
|
|
212
|
+
const first = choices[0];
|
|
213
|
+
if (!first || typeof first !== "object") return null;
|
|
214
|
+
const message = (first as { message?: unknown }).message;
|
|
215
|
+
if (!message || typeof message !== "object") return null;
|
|
216
|
+
const content = (message as { content?: unknown }).content;
|
|
217
|
+
return typeof content === "string" ? content : null;
|
|
218
|
+
}
|