@desplega.ai/agent-swarm 1.75.0 → 1.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +973 -36
- package/package.json +2 -2
- package/src/be/db.ts +527 -9
- package/src/be/memory/raters/llm-summarizer.ts +218 -0
- package/src/be/memory/raters/llm.ts +56 -75
- package/src/be/memory/retrieval-store.ts +21 -0
- package/src/be/migrations/054_agent_harness_provider.sql +21 -0
- package/src/be/migrations/055_agent_cred_status.sql +15 -0
- package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
- package/src/be/migrations/057_inbox_item_state.sql +27 -0
- package/src/be/migrations/058_task_templates.sql +31 -0
- package/src/be/swarm-config-guard.ts +24 -0
- package/src/commands/credential-wait.ts +1 -1
- package/src/commands/provider-credentials.ts +434 -0
- package/src/commands/runner.ts +229 -42
- package/src/hooks/hook.ts +115 -95
- package/src/http/agents.ts +82 -2
- package/src/http/config.ts +11 -1
- package/src/http/inbox-state.ts +89 -0
- package/src/http/index.ts +10 -0
- package/src/http/sessions.ts +86 -0
- package/src/http/status.ts +665 -0
- package/src/http/task-templates.ts +51 -0
- package/src/http/tasks.ts +85 -5
- package/src/http/users.ts +134 -0
- package/src/providers/claude-adapter.ts +5 -0
- package/src/providers/codex-adapter.ts +1 -1
- package/src/providers/index.ts +1 -1
- package/src/slack/handlers.ts +0 -1
- package/src/tests/agents-harness-provider.test.ts +333 -0
- package/src/tests/credential-check.test.ts +32 -1
- package/src/tests/credential-status-api.test.ts +42 -0
- package/src/tests/harness-provider-resolution.test.ts +242 -0
- package/src/tests/jira-sync.test.ts +1 -1
- package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
- package/src/tests/memory-rater-llm.test.ts +265 -107
- package/src/tests/migration-runner-regressions.test.ts +17 -2
- package/src/tests/sessions.test.ts +141 -0
- package/src/tests/status.test.ts +843 -0
- package/src/tests/stop-hook-task-resolution.test.ts +98 -0
- package/src/tests/template-recommendations.test.ts +148 -0
- package/src/tests/use-dismissible-card.test.ts +140 -0
- package/src/tools/swarm-config/set-config.ts +17 -1
- package/src/types.ts +117 -0
- package/src/utils/harness-provider.ts +32 -0
- package/tsconfig.json +0 -2
- package/src/providers/credentials.ts +0 -74
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `runMemoryRater` — Stop-hook helper that calls OpenRouter for the combined
|
|
3
|
+
* session-summary + LLM-rater piggyback prompt and returns a schema-validated
|
|
4
|
+
* `SummaryWithRatings`.
|
|
5
|
+
*
|
|
6
|
+
* Refactored out of `src/hooks/hook.ts` so the rater logic stays out of the
|
|
7
|
+
* hook (review feedback on PR #450). The hook just calls `runMemoryRater(...)`
|
|
8
|
+
* and inspects the typed result.
|
|
9
|
+
*
|
|
10
|
+
* Worker-safe — uses raw `fetch` + the tolerant JSON parser landed in PR #447.
|
|
11
|
+
* No `bun:sqlite` / `src/be/db` imports. Boundary script enforces this.
|
|
12
|
+
*/
|
|
13
|
+
import { z } from "zod";
|
|
14
|
+
import { type SummaryWithRatings, SummaryWithRatingsSchema } from "./llm";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Default model used when `MEMORY_RATER_MODEL` is unset. Gemini 3 Flash on
|
|
18
|
+
* OpenRouter — the only Gemini 3 Flash variant published as of this PR (no
|
|
19
|
+
* stable non-preview slug exists yet). CLAUDE.md project-wide default.
|
|
20
|
+
*/
|
|
21
|
+
export const DEFAULT_MEMORY_RATER_MODEL = "google/gemini-3-flash-preview";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* `response_format.json_schema.name` sent to OpenRouter. Used by some
|
|
25
|
+
* providers as a tag in their structured-output telemetry — keep it stable
|
|
26
|
+
* so model behaviour stays comparable across calls.
|
|
27
|
+
*/
|
|
28
|
+
export const MEMORY_RATER_SCHEMA_NAME = "memory_rater_output";
|
|
29
|
+
|
|
30
|
+
const OPENROUTER_CHAT_COMPLETIONS_URL = "https://openrouter.ai/api/v1/chat/completions";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* JSON Schema derived from {@link SummaryWithRatingsSchema}, the source of
|
|
34
|
+
* truth. Computed once at module load via Zod v4's native `z.toJSONSchema`
|
|
35
|
+
* (Zod v3's `zod-to-json-schema` is incompatible with the v4 runtime we
|
|
36
|
+
* pin). The `$schema` key is stripped because OpenRouter / OpenAI strict
|
|
37
|
+
* json_schema mode rejects unrecognized top-level keys.
|
|
38
|
+
*
|
|
39
|
+
* Probed end-to-end against `google/gemini-3-flash-preview` with
|
|
40
|
+
* `response_format.json_schema.strict: true` — accepted, no rewrite needed.
|
|
41
|
+
*/
|
|
42
|
+
export const MEMORY_RATER_JSON_SCHEMA: Record<string, unknown> = (() => {
|
|
43
|
+
const schema = z.toJSONSchema(SummaryWithRatingsSchema) as Record<string, unknown>;
|
|
44
|
+
delete schema.$schema;
|
|
45
|
+
return schema;
|
|
46
|
+
})();
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Resolve the OpenRouter model slug. Reads `MEMORY_RATER_MODEL` from the env;
|
|
50
|
+
* falls back to {@link DEFAULT_MEMORY_RATER_MODEL}. Self-hosters can pin a
|
|
51
|
+
* different slug (e.g. `anthropic/claude-haiku-4.5`) without a code change.
|
|
52
|
+
*/
|
|
53
|
+
export function getMemoryRaterModel(env: NodeJS.ProcessEnv = process.env): string {
|
|
54
|
+
const raw = env.MEMORY_RATER_MODEL;
|
|
55
|
+
if (typeof raw === "string" && raw.trim().length > 0) return raw.trim();
|
|
56
|
+
return DEFAULT_MEMORY_RATER_MODEL;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Best-effort parse of a JSON string that may be wrapped in markdown fences
|
|
61
|
+
* (```json … ``` or plain ``` … ```), have a prose preamble, or both. Returns
|
|
62
|
+
* the parsed value or `null`. NEVER throws.
|
|
63
|
+
*
|
|
64
|
+
* Strategy: try strict parse first. On failure, strip a leading ```json /
|
|
65
|
+
* ```<lang> / ``` fence + matching trailing ```; on second failure, slice
|
|
66
|
+
* from the first `{` to the last `}` and retry.
|
|
67
|
+
*
|
|
68
|
+
* Originally landed in PR #447 to recover ratings from Haiku's occasional
|
|
69
|
+
* fenced/preambled output despite `response_format: {type: "json_object"}`.
|
|
70
|
+
* Restored here to harden the OpenRouter direct-HTTP path against the same
|
|
71
|
+
* class of provider quirks (Gemini Flash also occasionally fences output).
|
|
72
|
+
*/
|
|
73
|
+
export function tryParseLooseJson(raw: string): unknown {
|
|
74
|
+
const trimmed = raw.trim();
|
|
75
|
+
try {
|
|
76
|
+
return JSON.parse(trimmed);
|
|
77
|
+
} catch {
|
|
78
|
+
// fall through to fence-stripping
|
|
79
|
+
}
|
|
80
|
+
const fenced = trimmed.match(/^```[a-zA-Z0-9_-]*\s*\n?([\s\S]*?)\n?```\s*$/);
|
|
81
|
+
if (fenced?.[1]) {
|
|
82
|
+
try {
|
|
83
|
+
return JSON.parse(fenced[1].trim());
|
|
84
|
+
} catch {
|
|
85
|
+
// fall through
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const first = trimmed.indexOf("{");
|
|
89
|
+
const last = trimmed.lastIndexOf("}");
|
|
90
|
+
if (first !== -1 && last > first) {
|
|
91
|
+
try {
|
|
92
|
+
return JSON.parse(trimmed.slice(first, last + 1));
|
|
93
|
+
} catch {
|
|
94
|
+
// fall through
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export type RunMemoryRaterOpts = {
|
|
101
|
+
/** The fully-built prompt (e.g. from `buildSummaryWithRatingsPrompt`). */
|
|
102
|
+
prompt: string;
|
|
103
|
+
/** OpenRouter API key. Caller is responsible for the no-op-when-unset gate. */
|
|
104
|
+
apiKey: string;
|
|
105
|
+
/** Model slug override; falls through to {@link getMemoryRaterModel}. */
|
|
106
|
+
model?: string;
|
|
107
|
+
/** Injectable for tests — defaults to the global `fetch`. */
|
|
108
|
+
fetchImpl?: typeof fetch;
|
|
109
|
+
/**
|
|
110
|
+
* Bytes to keep when logging unexpected response payloads. Capped to avoid
|
|
111
|
+
* leaking very large bodies into stderr.
|
|
112
|
+
*/
|
|
113
|
+
responseLogCap?: number;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
export type RunMemoryRaterResult =
|
|
117
|
+
| { ok: true; data: SummaryWithRatings; model: string }
|
|
118
|
+
| {
|
|
119
|
+
ok: false;
|
|
120
|
+
reason: "transport" | "http_error" | "empty_content" | "parse" | "schema";
|
|
121
|
+
status?: number;
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Call OpenRouter's chat completions endpoint with `response_format` =
|
|
126
|
+
* `json_object`, then parse and schema-validate the assistant's content.
|
|
127
|
+
*
|
|
128
|
+
* Returns a tagged union: `ok: true` with a typed `SummaryWithRatings`, or
|
|
129
|
+
* `ok: false` with a `reason` discriminator the caller can branch on for
|
|
130
|
+
* logging. NEVER throws — the hook wraps this in its own try/catch as a
|
|
131
|
+
* second line of defence, but this function is designed to short-circuit
|
|
132
|
+
* cleanly rather than propagate exceptions.
|
|
133
|
+
*/
|
|
134
|
+
export async function runMemoryRater(opts: RunMemoryRaterOpts): Promise<RunMemoryRaterResult> {
|
|
135
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
136
|
+
const model = opts.model ?? getMemoryRaterModel();
|
|
137
|
+
const responseLogCap = opts.responseLogCap ?? 200;
|
|
138
|
+
|
|
139
|
+
let res: Response;
|
|
140
|
+
try {
|
|
141
|
+
res = await fetchFn(OPENROUTER_CHAT_COMPLETIONS_URL, {
|
|
142
|
+
method: "POST",
|
|
143
|
+
headers: {
|
|
144
|
+
"Content-Type": "application/json",
|
|
145
|
+
Authorization: `Bearer ${opts.apiKey}`,
|
|
146
|
+
},
|
|
147
|
+
body: JSON.stringify({
|
|
148
|
+
model,
|
|
149
|
+
// OpenRouter strict json_schema — forces the provider's structured-
|
|
150
|
+
// output guardrails on instead of the looser `json_object` mode.
|
|
151
|
+
// Schema is derived from the same Zod source of truth, so the
|
|
152
|
+
// request and the post-validation Zod check can't drift.
|
|
153
|
+
// https://openrouter.ai/docs/guides/features/structured-outputs
|
|
154
|
+
response_format: {
|
|
155
|
+
type: "json_schema",
|
|
156
|
+
json_schema: {
|
|
157
|
+
name: MEMORY_RATER_SCHEMA_NAME,
|
|
158
|
+
strict: true,
|
|
159
|
+
schema: MEMORY_RATER_JSON_SCHEMA,
|
|
160
|
+
},
|
|
161
|
+
},
|
|
162
|
+
messages: [{ role: "user", content: opts.prompt }],
|
|
163
|
+
}),
|
|
164
|
+
});
|
|
165
|
+
} catch (err) {
|
|
166
|
+
console.error("[memory-rater:llm] runMemoryRater fetch threw:", (err as Error).message);
|
|
167
|
+
return { ok: false, reason: "transport" };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!res.ok) {
|
|
171
|
+
const text = await res.text().catch(() => "");
|
|
172
|
+
console.error(
|
|
173
|
+
`[memory-rater:llm] OpenRouter ${res.status} ${res.statusText}: ${text.slice(0, responseLogCap)}`,
|
|
174
|
+
);
|
|
175
|
+
return { ok: false, reason: "http_error", status: res.status };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let body: unknown;
|
|
179
|
+
try {
|
|
180
|
+
body = await res.json();
|
|
181
|
+
} catch (err) {
|
|
182
|
+
console.error("[memory-rater:llm] OpenRouter response was not JSON:", (err as Error).message);
|
|
183
|
+
return { ok: false, reason: "parse" };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const content = extractContent(body);
|
|
187
|
+
if (typeof content !== "string" || content.length === 0) {
|
|
188
|
+
return { ok: false, reason: "empty_content" };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const candidate = tryParseLooseJson(content);
|
|
192
|
+
if (candidate === null || typeof candidate !== "object") {
|
|
193
|
+
return { ok: false, reason: "parse" };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const parsed = SummaryWithRatingsSchema.safeParse(candidate);
|
|
197
|
+
if (!parsed.success) {
|
|
198
|
+
return { ok: false, reason: "schema" };
|
|
199
|
+
}
|
|
200
|
+
return { ok: true, data: parsed.data, model };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Pull `choices[0].message.content` out of an OpenRouter chat-completion
|
|
205
|
+
* response defensively. Returns the string, or `null` when the shape doesn't
|
|
206
|
+
* match — caller treats that as `empty_content`.
|
|
207
|
+
*/
|
|
208
|
+
function extractContent(body: unknown): string | null {
|
|
209
|
+
if (!body || typeof body !== "object") return null;
|
|
210
|
+
const choices = (body as { choices?: unknown }).choices;
|
|
211
|
+
if (!Array.isArray(choices) || choices.length === 0) return null;
|
|
212
|
+
const first = choices[0];
|
|
213
|
+
if (!first || typeof first !== "object") return null;
|
|
214
|
+
const message = (first as { message?: unknown }).message;
|
|
215
|
+
if (!message || typeof message !== "object") return null;
|
|
216
|
+
const content = (message as { content?: unknown }).content;
|
|
217
|
+
return typeof content === "string" ? content : null;
|
|
218
|
+
}
|
|
@@ -193,7 +193,9 @@ export function buildSummaryWithRatingsPrompt(
|
|
|
193
193
|
|
|
194
194
|
return `${basePrompt}
|
|
195
195
|
|
|
196
|
-
CRITICAL:
|
|
196
|
+
CRITICAL: Your entire response MUST be a single JSON object that conforms to the schema below. Do NOT wrap it in triple-backtick fences (no \`\`\`json or \`\`\`), do NOT add a prose preamble, do NOT add trailing commentary. Just the JSON object, nothing else.
|
|
197
|
+
|
|
198
|
+
Schema:
|
|
197
199
|
{
|
|
198
200
|
"summary": string, // your existing summary text
|
|
199
201
|
"ratings": [ // one entry per memory you can score
|
|
@@ -215,80 +217,6 @@ Memories retrieved during this session:
|
|
|
215
217
|
${memoryBlock}`;
|
|
216
218
|
}
|
|
217
219
|
|
|
218
|
-
/**
|
|
219
|
-
* Best-effort parse of the structured `SummaryWithRatingsSchema` JSON out of
|
|
220
|
-
* the `claude -p --output-format json` envelope (`{ result: "<inner json>" }`).
|
|
221
|
-
*
|
|
222
|
-
* Returns `null` on any parse failure — the caller falls back to the existing
|
|
223
|
-
* summary-only path. NEVER throws.
|
|
224
|
-
*/
|
|
225
|
-
export function parseSummaryWithRatings(claudeStdout: string): SummaryWithRatings | null {
|
|
226
|
-
let envelope: { result?: unknown };
|
|
227
|
-
try {
|
|
228
|
-
envelope = JSON.parse(claudeStdout) as { result?: unknown };
|
|
229
|
-
} catch {
|
|
230
|
-
return null;
|
|
231
|
-
}
|
|
232
|
-
const inner = envelope.result;
|
|
233
|
-
let candidate: unknown;
|
|
234
|
-
if (typeof inner === "string") {
|
|
235
|
-
try {
|
|
236
|
-
candidate = JSON.parse(inner.trim());
|
|
237
|
-
} catch {
|
|
238
|
-
return null;
|
|
239
|
-
}
|
|
240
|
-
} else if (inner && typeof inner === "object") {
|
|
241
|
-
candidate = inner;
|
|
242
|
-
} else {
|
|
243
|
-
return null;
|
|
244
|
-
}
|
|
245
|
-
const parsed = SummaryWithRatingsSchema.safeParse(candidate);
|
|
246
|
-
return parsed.success ? parsed.data : null;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
/**
|
|
250
|
-
* Fallback summary-text extractor for the hook's `claude -p` envelope. Used
|
|
251
|
-
* when {@link parseSummaryWithRatings} returns null — i.e., when the LLM
|
|
252
|
-
* returned a valid envelope but the inner payload either wasn't structured
|
|
253
|
-
* JSON (unstructured prompt path) OR was structured JSON whose ratings failed
|
|
254
|
-
* `SummaryWithRatingsSchema` validation (e.g., out-of-range scores).
|
|
255
|
-
*
|
|
256
|
-
* In the latter case `envelope.result` is the full inner JSON STRING such as
|
|
257
|
-
* `{"summary":"...","ratings":[...]}`; indexing that verbatim into agent
|
|
258
|
-
* memory would violate the step-4 contract that ratings are best-effort and
|
|
259
|
-
* the existing summary-indexing behavior remains unchanged. We extract the
|
|
260
|
-
* inner `summary` field if present, else return the inner string (treating
|
|
261
|
-
* it as plain summary text). NEVER throws.
|
|
262
|
-
*/
|
|
263
|
-
export function extractSummaryFromClaudeStdout(claudeStdout: string): string {
|
|
264
|
-
let envelope: { result?: unknown };
|
|
265
|
-
try {
|
|
266
|
-
envelope = JSON.parse(claudeStdout) as { result?: unknown };
|
|
267
|
-
} catch {
|
|
268
|
-
return claudeStdout;
|
|
269
|
-
}
|
|
270
|
-
const inner = envelope.result;
|
|
271
|
-
if (typeof inner === "string") {
|
|
272
|
-
try {
|
|
273
|
-
const innerParsed = JSON.parse(inner.trim()) as { summary?: unknown };
|
|
274
|
-
if (innerParsed && typeof innerParsed.summary === "string") {
|
|
275
|
-
return innerParsed.summary;
|
|
276
|
-
}
|
|
277
|
-
} catch {
|
|
278
|
-
// inner wasn't JSON — treat it as plain summary text
|
|
279
|
-
}
|
|
280
|
-
return inner;
|
|
281
|
-
}
|
|
282
|
-
if (
|
|
283
|
-
inner &&
|
|
284
|
-
typeof inner === "object" &&
|
|
285
|
-
typeof (inner as { summary?: unknown }).summary === "string"
|
|
286
|
-
) {
|
|
287
|
-
return (inner as { summary: string }).summary;
|
|
288
|
-
}
|
|
289
|
-
return claudeStdout;
|
|
290
|
-
}
|
|
291
|
-
|
|
292
220
|
/**
|
|
293
221
|
* `MEMORY_RATERS=...` includes `llm`? Used by the hook to gate the piggyback
|
|
294
222
|
* path — strict opt-in so existing deployments are byte-identical when unset.
|
|
@@ -308,10 +236,63 @@ export type RetrievalRow = {
|
|
|
308
236
|
name: string;
|
|
309
237
|
content: string;
|
|
310
238
|
scope?: string;
|
|
239
|
+
/** `agent_memory.source` — present once the API surfaces it (post-PR #451 amendment). */
|
|
240
|
+
source?: string;
|
|
241
|
+
/** `agent_tasks.scheduleId` for the writing task, or null when not a scheduled run. */
|
|
242
|
+
scheduleId?: string | null;
|
|
311
243
|
similarity?: number | null;
|
|
312
244
|
retrievedAt?: string;
|
|
313
245
|
};
|
|
314
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Dedupe candidate memories before LLM rating to prevent posterior inflation
|
|
249
|
+
* from scheduled-task self-similarity.
|
|
250
|
+
*
|
|
251
|
+
* **Why this exists.** Scheduled tasks fire identical task text on every
|
|
252
|
+
* run, and the task-completion path names each memory
|
|
253
|
+
* `"Task: ${task.task.slice(0, 80)}"` (`src/tools/store-progress.ts`). When
|
|
254
|
+
* the next run searches memory, its own past runs surface as "highly
|
|
255
|
+
* similar" rows. Without dedup, the LLM rater scored 5+ near-clones at +1.0
|
|
256
|
+
* each — bumping alpha 5x in a single session and distorting the Beta(α,β)
|
|
257
|
+
* ranking vs. a normal one-shot session. Concrete case (Lead's audit of the
|
|
258
|
+
* first 37 `llm` ratings, post-PR #450): the Claude Code Changelog Monitor
|
|
259
|
+
* hourly cron (taskId `f938d74d-05af-44a7-a0aa-3463d22be502`) produced 5
|
|
260
|
+
* saturating +1s in one rater pass — every rated memory was a prior hourly
|
|
261
|
+
* run.
|
|
262
|
+
*
|
|
263
|
+
* **Discriminator.** `agent_tasks.scheduleId`. Memories sharing a non-null
|
|
264
|
+
* `scheduleId` are by definition from the same scheduled job — that is the
|
|
265
|
+
* exact duplicate class the audit identified, and the only one we want to
|
|
266
|
+
* collapse. We do NOT key on `name` alone, because the 80-char truncation in
|
|
267
|
+
* task-completion names ("Task: …") and session-summary names ("Session: …")
|
|
268
|
+
* means two distinct one-shot tasks/summaries that happen to share the first
|
|
269
|
+
* 80 chars of their description would silently collapse — the false-positive
|
|
270
|
+
* path the PR #451 reviewer flagged.
|
|
271
|
+
*
|
|
272
|
+
* **Pass-through cases (NOT deduped).**
|
|
273
|
+
* - `scheduleId` is null/undefined (manual one-shot tasks, manual memories,
|
|
274
|
+
* file-index memories) — no scheduled-clone risk.
|
|
275
|
+
* - Two memories from different scheduled jobs that happen to surface in
|
|
276
|
+
* the same retrieval set — different `scheduleId`s, both kept.
|
|
277
|
+
*
|
|
278
|
+
* **Tie-break.** Input is `ORDER BY mr.retrievedAt DESC` from
|
|
279
|
+
* `getRetrievalsForAgent`, so "first occurrence per scheduleId" = "freshest
|
|
280
|
+
* surfaced run", which is the representative we want.
|
|
281
|
+
*/
|
|
282
|
+
export function dedupeRetrievalsForRater<T extends { scheduleId?: string | null }>(rows: T[]): T[] {
|
|
283
|
+
const seenSchedules = new Set<string>();
|
|
284
|
+
const out: T[] = [];
|
|
285
|
+
for (const row of rows) {
|
|
286
|
+
const scheduleId = row.scheduleId;
|
|
287
|
+
if (typeof scheduleId === "string" && scheduleId.length > 0) {
|
|
288
|
+
if (seenSchedules.has(scheduleId)) continue;
|
|
289
|
+
seenSchedules.add(scheduleId);
|
|
290
|
+
}
|
|
291
|
+
out.push(row);
|
|
292
|
+
}
|
|
293
|
+
return out;
|
|
294
|
+
}
|
|
295
|
+
|
|
315
296
|
/**
|
|
316
297
|
* GET `/api/memory/retrievals?taskId=` — best-effort. Returns `[]` on any
|
|
317
298
|
* failure so a transient API outage never blocks the summary-indexing path.
|
|
@@ -28,6 +28,20 @@ export type RetrievalListRow = {
|
|
|
28
28
|
/** Up to RETRIEVAL_CONTENT_SNIPPET_CHARS chars of `agent_memory.content`. */
|
|
29
29
|
content: string;
|
|
30
30
|
scope: string;
|
|
31
|
+
/**
|
|
32
|
+
* `agent_memory.source` — `'task_completion' | 'session_summary' | 'manual'
|
|
33
|
+
* | 'file_index'`. Surfaced so the worker rater can scope dedup to the
|
|
34
|
+
* memory class that exhibits scheduled-task self-similarity.
|
|
35
|
+
*/
|
|
36
|
+
source: string;
|
|
37
|
+
/**
|
|
38
|
+
* `agent_tasks.scheduleId` for the source task that wrote this memory, or
|
|
39
|
+
* `null` if the memory has no source task or the task wasn't a scheduled
|
|
40
|
+
* run. Worker raters use this as a precise cron-clone discriminator —
|
|
41
|
+
* memories sharing a non-null `scheduleId` are by definition from the same
|
|
42
|
+
* scheduled job and safe to dedupe.
|
|
43
|
+
*/
|
|
44
|
+
scheduleId: string | null;
|
|
31
45
|
similarity: number | null;
|
|
32
46
|
retrievedAt: string;
|
|
33
47
|
};
|
|
@@ -61,15 +75,22 @@ export function getRetrievalsForAgent(
|
|
|
61
75
|
params.push(filter.sessionId);
|
|
62
76
|
}
|
|
63
77
|
|
|
78
|
+
// LEFT JOIN agent_tasks so we can surface `scheduleId` to worker raters —
|
|
79
|
+
// a non-null `scheduleId` is the precise cron-clone discriminator that
|
|
80
|
+
// `dedupeRetrievalsForRater` keys on. The LEFT keeps memories with no
|
|
81
|
+
// source task (manual / file_index) in the result set.
|
|
64
82
|
const sql = `
|
|
65
83
|
SELECT am.id AS id,
|
|
66
84
|
am.name AS name,
|
|
67
85
|
substr(am.content, 1, ?) AS content,
|
|
68
86
|
am.scope AS scope,
|
|
87
|
+
am.source AS source,
|
|
88
|
+
at.scheduleId AS scheduleId,
|
|
69
89
|
mr.similarity AS similarity,
|
|
70
90
|
mr.retrievedAt AS retrievedAt
|
|
71
91
|
FROM memory_retrieval mr
|
|
72
92
|
INNER JOIN agent_memory am ON am.id = mr.memoryId
|
|
93
|
+
LEFT JOIN agent_tasks at ON at.id = am.sourceTaskId
|
|
73
94
|
WHERE ${conditions.join(" AND ")}
|
|
74
95
|
ORDER BY mr.retrievedAt DESC
|
|
75
96
|
LIMIT ?
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
-- 054_agent_harness_provider.sql
|
|
2
|
+
--
|
|
3
|
+
-- Phase 1.5 of the cloud-personalization plan
|
|
4
|
+
-- (thoughts/taras/plans/2026-05-08-cloud-personalization-phases-1-4.md).
|
|
5
|
+
--
|
|
6
|
+
-- Add a first-class `harness_provider` column on `agents` so each agent's
|
|
7
|
+
-- harness (claude / codex / pi / devin / claude-managed / opencode) is
|
|
8
|
+
-- queryable per-row, independent of `process.env.HARNESS_PROVIDER` at
|
|
9
|
+
-- worker boot.
|
|
10
|
+
--
|
|
11
|
+
-- Workers push their `HARNESS_PROVIDER` value on registration; an operator
|
|
12
|
+
-- can later re-assign via `PATCH /api/agents/:id/harness-provider`. The
|
|
13
|
+
-- worker itself does NOT yet react in real time — picked up on next worker
|
|
14
|
+
-- restart. Full per-agent harness with dynamic adapter loading lives in
|
|
15
|
+
-- Linear DES-359.
|
|
16
|
+
--
|
|
17
|
+
-- Forward-only. NULL default = backward-compat for already-registered
|
|
18
|
+
-- agents (their column stays NULL until they re-register or an operator
|
|
19
|
+
-- patches it).
|
|
20
|
+
|
|
21
|
+
ALTER TABLE agents ADD COLUMN harness_provider TEXT NULL;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
-- 055_agent_cred_status.sql
|
|
2
|
+
--
|
|
3
|
+
-- Worker-self-reported credential snapshot. Pairs with `harness_provider`
|
|
4
|
+
-- (054): the JSON describes the agent's creds for whichever harness that
|
|
5
|
+
-- agent runs. NULL = unreported (worker hasn't booted yet, or
|
|
6
|
+
-- CRED_CHECK_DISABLE=1 was set).
|
|
7
|
+
--
|
|
8
|
+
-- The existing `credentialMissing` column (053) stays. This one is additive
|
|
9
|
+
-- and carries the full snapshot (ready, missing, satisfiedBy, hint,
|
|
10
|
+
-- liveTest, reportedAt, reportKind). Once `cred_status.missing` is proven
|
|
11
|
+
-- across deploys, `credentialMissing` can be retired in a later migration.
|
|
12
|
+
--
|
|
13
|
+
-- Forward-only.
|
|
14
|
+
|
|
15
|
+
ALTER TABLE agents ADD COLUMN cred_status TEXT;
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
-- Drop the SQL CHECK constraint on agent_tasks.source.
|
|
2
|
+
-- The Zod layer (`AgentTaskSourceSchema` in src/types.ts) is now the single
|
|
3
|
+
-- source of truth for the allowed enum, so adding a new source no longer
|
|
4
|
+
-- requires a forward-only migration. This makes future source additions
|
|
5
|
+
-- (Phase 1 of the UI chat/session experience plan) cheap.
|
|
6
|
+
--
|
|
7
|
+
-- SQLite cannot ALTER a CHECK constraint in place; we follow the table-rebuild
|
|
8
|
+
-- pattern from migration 043_jira_source.sql verbatim, minus the CHECK clause
|
|
9
|
+
-- on `source`. All other columns, defaults, indexes, and FKs are preserved
|
|
10
|
+
-- exactly. No data migration — existing rows remain valid.
|
|
11
|
+
--
|
|
12
|
+
-- INSERT uses an explicit column list (no `SELECT *`) to be robust against
|
|
13
|
+
-- column-order drift between SQLite versions and against post-043 ALTERs
|
|
14
|
+
-- (migration 044 added `provider` and `providerMeta`).
|
|
15
|
+
PRAGMA foreign_keys=off;
|
|
16
|
+
|
|
17
|
+
CREATE TABLE agent_tasks_new (
|
|
18
|
+
id TEXT PRIMARY KEY,
|
|
19
|
+
agentId TEXT,
|
|
20
|
+
creatorAgentId TEXT,
|
|
21
|
+
task TEXT NOT NULL,
|
|
22
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
23
|
+
source TEXT NOT NULL DEFAULT 'mcp',
|
|
24
|
+
taskType TEXT,
|
|
25
|
+
tags TEXT DEFAULT '[]',
|
|
26
|
+
priority INTEGER DEFAULT 50,
|
|
27
|
+
dependsOn TEXT DEFAULT '[]',
|
|
28
|
+
offeredTo TEXT,
|
|
29
|
+
offeredAt TEXT,
|
|
30
|
+
acceptedAt TEXT,
|
|
31
|
+
rejectionReason TEXT,
|
|
32
|
+
slackChannelId TEXT,
|
|
33
|
+
slackThreadTs TEXT,
|
|
34
|
+
slackUserId TEXT,
|
|
35
|
+
mentionMessageId TEXT,
|
|
36
|
+
mentionChannelId TEXT,
|
|
37
|
+
vcsProvider TEXT,
|
|
38
|
+
vcsRepo TEXT,
|
|
39
|
+
vcsEventType TEXT,
|
|
40
|
+
vcsNumber INTEGER,
|
|
41
|
+
vcsCommentId INTEGER,
|
|
42
|
+
vcsAuthor TEXT,
|
|
43
|
+
vcsUrl TEXT,
|
|
44
|
+
parentTaskId TEXT,
|
|
45
|
+
claudeSessionId TEXT,
|
|
46
|
+
agentmailInboxId TEXT,
|
|
47
|
+
agentmailMessageId TEXT,
|
|
48
|
+
agentmailThreadId TEXT,
|
|
49
|
+
model TEXT,
|
|
50
|
+
scheduleId TEXT,
|
|
51
|
+
workflowRunId TEXT REFERENCES workflow_runs(id),
|
|
52
|
+
workflowRunStepId TEXT REFERENCES workflow_run_steps(id),
|
|
53
|
+
createdAt TEXT NOT NULL,
|
|
54
|
+
lastUpdatedAt TEXT NOT NULL,
|
|
55
|
+
finishedAt TEXT,
|
|
56
|
+
failureReason TEXT,
|
|
57
|
+
output TEXT,
|
|
58
|
+
progress TEXT,
|
|
59
|
+
notifiedAt TEXT,
|
|
60
|
+
dir TEXT,
|
|
61
|
+
outputSchema TEXT,
|
|
62
|
+
compactionCount INTEGER DEFAULT 0,
|
|
63
|
+
peakContextPercent REAL,
|
|
64
|
+
totalContextTokensUsed INTEGER,
|
|
65
|
+
contextWindowSize INTEGER,
|
|
66
|
+
was_paused INTEGER NOT NULL DEFAULT 0,
|
|
67
|
+
credentialKeySuffix TEXT,
|
|
68
|
+
credentialKeyType TEXT,
|
|
69
|
+
requestedByUserId TEXT REFERENCES users(id),
|
|
70
|
+
vcsInstallationId INTEGER,
|
|
71
|
+
vcsNodeId TEXT,
|
|
72
|
+
slackReplySent INTEGER DEFAULT 0,
|
|
73
|
+
swarmVersion TEXT,
|
|
74
|
+
contextKey TEXT,
|
|
75
|
+
provider TEXT,
|
|
76
|
+
providerMeta TEXT
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
INSERT INTO agent_tasks_new (
|
|
80
|
+
id, agentId, creatorAgentId, task, status, source, taskType, tags,
|
|
81
|
+
priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
|
|
82
|
+
slackChannelId, slackThreadTs, slackUserId,
|
|
83
|
+
mentionMessageId, mentionChannelId,
|
|
84
|
+
vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
|
|
85
|
+
parentTaskId, claudeSessionId,
|
|
86
|
+
agentmailInboxId, agentmailMessageId, agentmailThreadId,
|
|
87
|
+
model, scheduleId, workflowRunId, workflowRunStepId,
|
|
88
|
+
createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
|
|
89
|
+
dir, outputSchema, compactionCount, peakContextPercent,
|
|
90
|
+
totalContextTokensUsed, contextWindowSize, was_paused,
|
|
91
|
+
credentialKeySuffix, credentialKeyType, requestedByUserId,
|
|
92
|
+
vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
|
|
93
|
+
provider, providerMeta
|
|
94
|
+
)
|
|
95
|
+
SELECT
|
|
96
|
+
id, agentId, creatorAgentId, task, status, source, taskType, tags,
|
|
97
|
+
priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
|
|
98
|
+
slackChannelId, slackThreadTs, slackUserId,
|
|
99
|
+
mentionMessageId, mentionChannelId,
|
|
100
|
+
vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
|
|
101
|
+
parentTaskId, claudeSessionId,
|
|
102
|
+
agentmailInboxId, agentmailMessageId, agentmailThreadId,
|
|
103
|
+
model, scheduleId, workflowRunId, workflowRunStepId,
|
|
104
|
+
createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
|
|
105
|
+
dir, outputSchema, compactionCount, peakContextPercent,
|
|
106
|
+
totalContextTokensUsed, contextWindowSize, was_paused,
|
|
107
|
+
credentialKeySuffix, credentialKeyType, requestedByUserId,
|
|
108
|
+
vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
|
|
109
|
+
provider, providerMeta
|
|
110
|
+
FROM agent_tasks;
|
|
111
|
+
|
|
112
|
+
DROP TABLE agent_tasks;
|
|
113
|
+
ALTER TABLE agent_tasks_new RENAME TO agent_tasks;
|
|
114
|
+
|
|
115
|
+
-- Recreate every index that existed on agent_tasks (mirrors 043 + later additions):
|
|
116
|
+
-- 001/004/006/009/026: agentId, status, offeredTo, taskType, agentmailThreadId, scheduleId, workflowRunId
|
|
117
|
+
-- 031: requestedByUserId (partial)
|
|
118
|
+
-- 034: parentTaskId
|
|
119
|
+
-- 037: swarmVersion
|
|
120
|
+
-- 040: composite (slackChannelId, slackThreadTs, status)
|
|
121
|
+
-- 042: contextKey + (contextKey, status) composite
|
|
122
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentId ON agent_tasks(agentId);
|
|
123
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_status ON agent_tasks(status);
|
|
124
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_offeredTo ON agent_tasks(offeredTo);
|
|
125
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_taskType ON agent_tasks(taskType);
|
|
126
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentmailThreadId ON agent_tasks(agentmailThreadId);
|
|
127
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_schedule_id ON agent_tasks(scheduleId);
|
|
128
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_workflow_run ON agent_tasks(workflowRunId);
|
|
129
|
+
CREATE INDEX IF NOT EXISTS idx_tasks_requested_by ON agent_tasks(requestedByUserId) WHERE requestedByUserId IS NOT NULL;
|
|
130
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_parentTaskId ON agent_tasks(parentTaskId);
|
|
131
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_swarmVersion ON agent_tasks(swarmVersion);
|
|
132
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_slack_thread
|
|
133
|
+
ON agent_tasks(slackChannelId, slackThreadTs, status);
|
|
134
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key
|
|
135
|
+
ON agent_tasks(contextKey);
|
|
136
|
+
CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key_status
|
|
137
|
+
ON agent_tasks(contextKey, status);
|
|
138
|
+
|
|
139
|
+
PRAGMA foreign_keys=on;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
-- Inbox item state — per-user dismiss/snooze/done state for action-items inbox
|
|
2
|
+
-- buckets (approval, credential_missing, broken_task, to_read, to_start_template).
|
|
3
|
+
--
|
|
4
|
+
-- itemType is enforced via Zod (`InboxItemTypeSchema` in src/types.ts), not a
|
|
5
|
+
-- SQL CHECK constraint — Phase 1 lesson, lets us extend the enum without a
|
|
6
|
+
-- forward-only migration. Direct SQL inserts can bypass; the HTTP layer
|
|
7
|
+
-- (`PATCH /api/inbox-state`) is the only sanctioned writer.
|
|
8
|
+
--
|
|
9
|
+
-- itemId references the underlying entity (task id, approval-request id,
|
|
10
|
+
-- agent id, template id, …) but is left as a free TEXT column rather than a
|
|
11
|
+
-- typed FK because itemType disambiguates which table it points at.
|
|
12
|
+
CREATE TABLE IF NOT EXISTS inbox_item_state (
|
|
13
|
+
id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
|
|
14
|
+
userId TEXT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
15
|
+
itemType TEXT NOT NULL,
|
|
16
|
+
itemId TEXT NOT NULL,
|
|
17
|
+
status TEXT NOT NULL DEFAULT 'open',
|
|
18
|
+
snoozeUntil TEXT,
|
|
19
|
+
dismissedAt TEXT,
|
|
20
|
+
doneAt TEXT,
|
|
21
|
+
createdAt TEXT NOT NULL DEFAULT (datetime('now')),
|
|
22
|
+
lastUpdatedAt TEXT NOT NULL DEFAULT (datetime('now')),
|
|
23
|
+
UNIQUE(userId, itemType, itemId)
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE INDEX IF NOT EXISTS idx_inbox_item_state_userId_status
|
|
27
|
+
ON inbox_item_state(userId, status);
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
-- Task templates — "To start" bucket starters. Polymorphic from day one
|
|
2
|
+
-- (kind = 'task' | 'workflow' | 'schedule') so v2 can register workflow /
|
|
3
|
+
-- schedule starters without a follow-up migration. v1 only inserts/reads
|
|
4
|
+
-- kind='task' rows; the schema is shaped for v2.
|
|
5
|
+
--
|
|
6
|
+
-- The `prompt` column is NOT NULL only because v1 only ever seeds task rows;
|
|
7
|
+
-- a future migration can relax that when workflow/schedule starters land
|
|
8
|
+
-- (workflows carry workflowId in `payload`, schedules carry cron + prompt).
|
|
9
|
+
--
|
|
10
|
+
-- Table name kept as `task_templates` for v1 to match existing references
|
|
11
|
+
-- across the plan; v2 may rename to `quick_starts` if non-task kinds graduate.
|
|
12
|
+
CREATE TABLE IF NOT EXISTS task_templates (
|
|
13
|
+
id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
|
|
14
|
+
title TEXT NOT NULL,
|
|
15
|
+
description TEXT NOT NULL,
|
|
16
|
+
prompt TEXT NOT NULL,
|
|
17
|
+
kind TEXT NOT NULL DEFAULT 'task' CHECK(kind IN ('task','workflow','schedule')),
|
|
18
|
+
payload TEXT NOT NULL DEFAULT '{}',
|
|
19
|
+
category TEXT,
|
|
20
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
21
|
+
createdAt TEXT NOT NULL DEFAULT (datetime('now'))
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
CREATE INDEX IF NOT EXISTS idx_task_templates_kind ON task_templates(kind);
|
|
25
|
+
|
|
26
|
+
INSERT INTO task_templates (title, description, prompt, category, tags) VALUES
|
|
27
|
+
('Refactor a file', 'Improve a file without changing behavior', 'Refactor the file at <path> for readability while preserving behavior. Run typecheck + tests after.', 'engineering', '["refactor"]'),
|
|
28
|
+
('Investigate a bug', 'Reproduce, root-cause, and propose a fix', 'Investigate the following bug: <symptom>. Reproduce locally, identify the root cause, and propose a fix.', 'engineering', '["debug"]'),
|
|
29
|
+
('Open a PR', 'Create a PR for the current branch', 'Open a PR from the current branch with a clear summary and test plan.', 'git', '["git","pr"]'),
|
|
30
|
+
('Write tests for X', 'Cover an under-tested module', 'Write unit tests for <module>. Aim for ~80% line coverage.', 'engineering', '["test"]'),
|
|
31
|
+
('Daily triage', 'Review failed tasks + pending approvals', 'Triage the action-items inbox: dismiss noise, escalate blockers, summarize unread sessions.', 'ops', '["triage"]');
|