@desplega.ai/agent-swarm 1.75.0 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +1 -1
  2. package/openapi.json +973 -36
  3. package/package.json +2 -2
  4. package/src/be/db.ts +527 -9
  5. package/src/be/memory/raters/llm-summarizer.ts +218 -0
  6. package/src/be/memory/raters/llm.ts +56 -75
  7. package/src/be/memory/retrieval-store.ts +21 -0
  8. package/src/be/migrations/054_agent_harness_provider.sql +21 -0
  9. package/src/be/migrations/055_agent_cred_status.sql +15 -0
  10. package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
  11. package/src/be/migrations/057_inbox_item_state.sql +27 -0
  12. package/src/be/migrations/058_task_templates.sql +31 -0
  13. package/src/be/swarm-config-guard.ts +24 -0
  14. package/src/commands/credential-wait.ts +1 -1
  15. package/src/commands/provider-credentials.ts +434 -0
  16. package/src/commands/runner.ts +229 -42
  17. package/src/hooks/hook.ts +115 -95
  18. package/src/http/agents.ts +82 -2
  19. package/src/http/config.ts +11 -1
  20. package/src/http/inbox-state.ts +89 -0
  21. package/src/http/index.ts +10 -0
  22. package/src/http/sessions.ts +86 -0
  23. package/src/http/status.ts +665 -0
  24. package/src/http/task-templates.ts +51 -0
  25. package/src/http/tasks.ts +85 -5
  26. package/src/http/users.ts +134 -0
  27. package/src/providers/claude-adapter.ts +5 -0
  28. package/src/providers/codex-adapter.ts +1 -1
  29. package/src/providers/index.ts +1 -1
  30. package/src/slack/handlers.ts +0 -1
  31. package/src/tests/agents-harness-provider.test.ts +333 -0
  32. package/src/tests/credential-check.test.ts +32 -1
  33. package/src/tests/credential-status-api.test.ts +42 -0
  34. package/src/tests/harness-provider-resolution.test.ts +242 -0
  35. package/src/tests/jira-sync.test.ts +1 -1
  36. package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
  37. package/src/tests/memory-rater-llm.test.ts +265 -107
  38. package/src/tests/migration-runner-regressions.test.ts +17 -2
  39. package/src/tests/sessions.test.ts +141 -0
  40. package/src/tests/status.test.ts +843 -0
  41. package/src/tests/stop-hook-task-resolution.test.ts +98 -0
  42. package/src/tests/template-recommendations.test.ts +148 -0
  43. package/src/tests/use-dismissible-card.test.ts +140 -0
  44. package/src/tools/swarm-config/set-config.ts +17 -1
  45. package/src/types.ts +117 -0
  46. package/src/utils/harness-provider.ts +32 -0
  47. package/tsconfig.json +0 -2
  48. package/src/providers/credentials.ts +0 -74
@@ -0,0 +1,218 @@
1
+ /**
2
+ * `runMemoryRater` — Stop-hook helper that calls OpenRouter for the combined
3
+ * session-summary + LLM-rater piggyback prompt and returns a schema-validated
4
+ * `SummaryWithRatings`.
5
+ *
6
+ * Refactored out of `src/hooks/hook.ts` so the rater logic stays out of the
7
+ * hook (review feedback on PR #450). The hook just calls `runMemoryRater(...)`
8
+ * and inspects the typed result.
9
+ *
10
+ * Worker-safe — uses raw `fetch` + the tolerant JSON parser landed in PR #447.
11
+ * No `bun:sqlite` / `src/be/db` imports. Boundary script enforces this.
12
+ */
13
+ import { z } from "zod";
14
+ import { type SummaryWithRatings, SummaryWithRatingsSchema } from "./llm";
15
+
16
+ /**
17
+ * Default model used when `MEMORY_RATER_MODEL` is unset. Gemini 3 Flash on
18
+ * OpenRouter — the only Gemini 3 Flash variant published as of this PR (no
19
+ * stable non-preview slug exists yet). CLAUDE.md project-wide default.
20
+ */
21
+ export const DEFAULT_MEMORY_RATER_MODEL = "google/gemini-3-flash-preview";
22
+
23
+ /**
24
+ * `response_format.json_schema.name` sent to OpenRouter. Used by some
25
+ * providers as a tag in their structured-output telemetry — keep it stable
26
+ * so model behaviour stays comparable across calls.
27
+ */
28
+ export const MEMORY_RATER_SCHEMA_NAME = "memory_rater_output";
29
+
30
+ const OPENROUTER_CHAT_COMPLETIONS_URL = "https://openrouter.ai/api/v1/chat/completions";
31
+
32
+ /**
33
+ * JSON Schema derived from {@link SummaryWithRatingsSchema}, the source of
34
+ * truth. Computed once at module load via Zod v4's native `z.toJSONSchema`
35
+ * (Zod v3's `zod-to-json-schema` is incompatible with the v4 runtime we
36
+ * pin). The `$schema` key is stripped because OpenRouter / OpenAI strict
37
+ * json_schema mode rejects unrecognized top-level keys.
38
+ *
39
+ * Probed end-to-end against `google/gemini-3-flash-preview` with
40
+ * `response_format.json_schema.strict: true` — accepted, no rewrite needed.
41
+ */
42
+ export const MEMORY_RATER_JSON_SCHEMA: Record<string, unknown> = (() => {
43
+ const schema = z.toJSONSchema(SummaryWithRatingsSchema) as Record<string, unknown>;
44
+ delete schema.$schema;
45
+ return schema;
46
+ })();
47
+
48
+ /**
49
+ * Resolve the OpenRouter model slug. Reads `MEMORY_RATER_MODEL` from the env;
50
+ * falls back to {@link DEFAULT_MEMORY_RATER_MODEL}. Self-hosters can pin a
51
+ * different slug (e.g. `anthropic/claude-haiku-4.5`) without a code change.
52
+ */
53
+ export function getMemoryRaterModel(env: NodeJS.ProcessEnv = process.env): string {
54
+ const raw = env.MEMORY_RATER_MODEL;
55
+ if (typeof raw === "string" && raw.trim().length > 0) return raw.trim();
56
+ return DEFAULT_MEMORY_RATER_MODEL;
57
+ }
58
+
59
+ /**
60
+ * Best-effort parse of a JSON string that may be wrapped in markdown fences
61
+ * (```json … ``` or plain ``` … ```), have a prose preamble, or both. Returns
62
+ * the parsed value or `null`. NEVER throws.
63
+ *
64
+ * Strategy: try strict parse first. On failure, strip a leading ```json /
65
+ * ```<lang> / ``` fence + matching trailing ```; on second failure, slice
66
+ * from the first `{` to the last `}` and retry.
67
+ *
68
+ * Originally landed in PR #447 to recover ratings from Haiku's occasional
69
+ * fenced/preambled output despite `response_format: {type: "json_object"}`.
70
+ * Restored here to harden the OpenRouter direct-HTTP path against the same
71
+ * class of provider quirks (Gemini Flash also occasionally fences output).
72
+ */
73
+ export function tryParseLooseJson(raw: string): unknown {
74
+ const trimmed = raw.trim();
75
+ try {
76
+ return JSON.parse(trimmed);
77
+ } catch {
78
+ // fall through to fence-stripping
79
+ }
80
+ const fenced = trimmed.match(/^```[a-zA-Z0-9_-]*\s*\n?([\s\S]*?)\n?```\s*$/);
81
+ if (fenced?.[1]) {
82
+ try {
83
+ return JSON.parse(fenced[1].trim());
84
+ } catch {
85
+ // fall through
86
+ }
87
+ }
88
+ const first = trimmed.indexOf("{");
89
+ const last = trimmed.lastIndexOf("}");
90
+ if (first !== -1 && last > first) {
91
+ try {
92
+ return JSON.parse(trimmed.slice(first, last + 1));
93
+ } catch {
94
+ // fall through
95
+ }
96
+ }
97
+ return null;
98
+ }
99
+
100
+ export type RunMemoryRaterOpts = {
101
+ /** The fully-built prompt (e.g. from `buildSummaryWithRatingsPrompt`). */
102
+ prompt: string;
103
+ /** OpenRouter API key. Caller is responsible for the no-op-when-unset gate. */
104
+ apiKey: string;
105
+ /** Model slug override; falls through to {@link getMemoryRaterModel}. */
106
+ model?: string;
107
+ /** Injectable for tests — defaults to the global `fetch`. */
108
+ fetchImpl?: typeof fetch;
109
+ /**
110
+ * Bytes to keep when logging unexpected response payloads. Capped to avoid
111
+ * leaking very large bodies into stderr.
112
+ */
113
+ responseLogCap?: number;
114
+ };
115
+
116
+ export type RunMemoryRaterResult =
117
+ | { ok: true; data: SummaryWithRatings; model: string }
118
+ | {
119
+ ok: false;
120
+ reason: "transport" | "http_error" | "empty_content" | "parse" | "schema";
121
+ status?: number;
122
+ };
123
+
124
+ /**
125
+ * Call OpenRouter's chat completions endpoint with `response_format` =
126
+ * `json_object`, then parse and schema-validate the assistant's content.
127
+ *
128
+ * Returns a tagged union: `ok: true` with a typed `SummaryWithRatings`, or
129
+ * `ok: false` with a `reason` discriminator the caller can branch on for
130
+ * logging. NEVER throws — the hook wraps this in its own try/catch as a
131
+ * second line of defence, but this function is designed to short-circuit
132
+ * cleanly rather than propagate exceptions.
133
+ */
134
+ export async function runMemoryRater(opts: RunMemoryRaterOpts): Promise<RunMemoryRaterResult> {
135
+ const fetchFn = opts.fetchImpl ?? fetch;
136
+ const model = opts.model ?? getMemoryRaterModel();
137
+ const responseLogCap = opts.responseLogCap ?? 200;
138
+
139
+ let res: Response;
140
+ try {
141
+ res = await fetchFn(OPENROUTER_CHAT_COMPLETIONS_URL, {
142
+ method: "POST",
143
+ headers: {
144
+ "Content-Type": "application/json",
145
+ Authorization: `Bearer ${opts.apiKey}`,
146
+ },
147
+ body: JSON.stringify({
148
+ model,
149
+ // OpenRouter strict json_schema — forces the provider's structured-
150
+ // output guardrails on instead of the looser `json_object` mode.
151
+ // Schema is derived from the same Zod source of truth, so the
152
+ // request and the post-validation Zod check can't drift.
153
+ // https://openrouter.ai/docs/guides/features/structured-outputs
154
+ response_format: {
155
+ type: "json_schema",
156
+ json_schema: {
157
+ name: MEMORY_RATER_SCHEMA_NAME,
158
+ strict: true,
159
+ schema: MEMORY_RATER_JSON_SCHEMA,
160
+ },
161
+ },
162
+ messages: [{ role: "user", content: opts.prompt }],
163
+ }),
164
+ });
165
+ } catch (err) {
166
+ console.error("[memory-rater:llm] runMemoryRater fetch threw:", (err as Error).message);
167
+ return { ok: false, reason: "transport" };
168
+ }
169
+
170
+ if (!res.ok) {
171
+ const text = await res.text().catch(() => "");
172
+ console.error(
173
+ `[memory-rater:llm] OpenRouter ${res.status} ${res.statusText}: ${text.slice(0, responseLogCap)}`,
174
+ );
175
+ return { ok: false, reason: "http_error", status: res.status };
176
+ }
177
+
178
+ let body: unknown;
179
+ try {
180
+ body = await res.json();
181
+ } catch (err) {
182
+ console.error("[memory-rater:llm] OpenRouter response was not JSON:", (err as Error).message);
183
+ return { ok: false, reason: "parse" };
184
+ }
185
+
186
+ const content = extractContent(body);
187
+ if (typeof content !== "string" || content.length === 0) {
188
+ return { ok: false, reason: "empty_content" };
189
+ }
190
+
191
+ const candidate = tryParseLooseJson(content);
192
+ if (candidate === null || typeof candidate !== "object") {
193
+ return { ok: false, reason: "parse" };
194
+ }
195
+
196
+ const parsed = SummaryWithRatingsSchema.safeParse(candidate);
197
+ if (!parsed.success) {
198
+ return { ok: false, reason: "schema" };
199
+ }
200
+ return { ok: true, data: parsed.data, model };
201
+ }
202
+
203
+ /**
204
+ * Pull `choices[0].message.content` out of an OpenRouter chat-completion
205
+ * response defensively. Returns the string, or `null` when the shape doesn't
206
+ * match — caller treats that as `empty_content`.
207
+ */
208
+ function extractContent(body: unknown): string | null {
209
+ if (!body || typeof body !== "object") return null;
210
+ const choices = (body as { choices?: unknown }).choices;
211
+ if (!Array.isArray(choices) || choices.length === 0) return null;
212
+ const first = choices[0];
213
+ if (!first || typeof first !== "object") return null;
214
+ const message = (first as { message?: unknown }).message;
215
+ if (!message || typeof message !== "object") return null;
216
+ const content = (message as { content?: unknown }).content;
217
+ return typeof content === "string" ? content : null;
218
+ }
@@ -193,7 +193,9 @@ export function buildSummaryWithRatingsPrompt(
193
193
 
194
194
  return `${basePrompt}
195
195
 
196
- CRITICAL: Return JSON conforming to this schema (no prose outside the JSON, no markdown fences):
196
+ CRITICAL: Your entire response MUST be a single JSON object that conforms to the schema below. Do NOT wrap it in triple-backtick fences (no \`\`\`json or \`\`\`), do NOT add a prose preamble, do NOT add trailing commentary. Just the JSON object, nothing else.
197
+
198
+ Schema:
197
199
  {
198
200
  "summary": string, // your existing summary text
199
201
  "ratings": [ // one entry per memory you can score
@@ -215,80 +217,6 @@ Memories retrieved during this session:
215
217
  ${memoryBlock}`;
216
218
  }
217
219
 
218
- /**
219
- * Best-effort parse of the structured `SummaryWithRatingsSchema` JSON out of
220
- * the `claude -p --output-format json` envelope (`{ result: "<inner json>" }`).
221
- *
222
- * Returns `null` on any parse failure — the caller falls back to the existing
223
- * summary-only path. NEVER throws.
224
- */
225
- export function parseSummaryWithRatings(claudeStdout: string): SummaryWithRatings | null {
226
- let envelope: { result?: unknown };
227
- try {
228
- envelope = JSON.parse(claudeStdout) as { result?: unknown };
229
- } catch {
230
- return null;
231
- }
232
- const inner = envelope.result;
233
- let candidate: unknown;
234
- if (typeof inner === "string") {
235
- try {
236
- candidate = JSON.parse(inner.trim());
237
- } catch {
238
- return null;
239
- }
240
- } else if (inner && typeof inner === "object") {
241
- candidate = inner;
242
- } else {
243
- return null;
244
- }
245
- const parsed = SummaryWithRatingsSchema.safeParse(candidate);
246
- return parsed.success ? parsed.data : null;
247
- }
248
-
249
- /**
250
- * Fallback summary-text extractor for the hook's `claude -p` envelope. Used
251
- * when {@link parseSummaryWithRatings} returns null — i.e., when the LLM
252
- * returned a valid envelope but the inner payload either wasn't structured
253
- * JSON (unstructured prompt path) OR was structured JSON whose ratings failed
254
- * `SummaryWithRatingsSchema` validation (e.g., out-of-range scores).
255
- *
256
- * In the latter case `envelope.result` is the full inner JSON STRING such as
257
- * `{"summary":"...","ratings":[...]}`; indexing that verbatim into agent
258
- * memory would violate the step-4 contract that ratings are best-effort and
259
- * the existing summary-indexing behavior remains unchanged. We extract the
260
- * inner `summary` field if present, else return the inner string (treating
261
- * it as plain summary text). NEVER throws.
262
- */
263
- export function extractSummaryFromClaudeStdout(claudeStdout: string): string {
264
- let envelope: { result?: unknown };
265
- try {
266
- envelope = JSON.parse(claudeStdout) as { result?: unknown };
267
- } catch {
268
- return claudeStdout;
269
- }
270
- const inner = envelope.result;
271
- if (typeof inner === "string") {
272
- try {
273
- const innerParsed = JSON.parse(inner.trim()) as { summary?: unknown };
274
- if (innerParsed && typeof innerParsed.summary === "string") {
275
- return innerParsed.summary;
276
- }
277
- } catch {
278
- // inner wasn't JSON — treat it as plain summary text
279
- }
280
- return inner;
281
- }
282
- if (
283
- inner &&
284
- typeof inner === "object" &&
285
- typeof (inner as { summary?: unknown }).summary === "string"
286
- ) {
287
- return (inner as { summary: string }).summary;
288
- }
289
- return claudeStdout;
290
- }
291
-
292
220
  /**
293
221
  * `MEMORY_RATERS=...` includes `llm`? Used by the hook to gate the piggyback
294
222
  * path — strict opt-in so existing deployments are byte-identical when unset.
@@ -308,10 +236,63 @@ export type RetrievalRow = {
308
236
  name: string;
309
237
  content: string;
310
238
  scope?: string;
239
+ /** `agent_memory.source` — present once the API surfaces it (post-PR #451 amendment). */
240
+ source?: string;
241
+ /** `agent_tasks.scheduleId` for the writing task, or null when not a scheduled run. */
242
+ scheduleId?: string | null;
311
243
  similarity?: number | null;
312
244
  retrievedAt?: string;
313
245
  };
314
246
 
247
+ /**
248
+ * Dedupe candidate memories before LLM rating to prevent posterior inflation
249
+ * from scheduled-task self-similarity.
250
+ *
251
+ * **Why this exists.** Scheduled tasks fire identical task text on every
252
+ * run, and the task-completion path names each memory
253
+ * `"Task: ${task.task.slice(0, 80)}"` (`src/tools/store-progress.ts`). When
254
+ * the next run searches memory, its own past runs surface as "highly
255
+ * similar" rows. Without dedup, the LLM rater scored 5+ near-clones at +1.0
256
+ * each — bumping alpha 5x in a single session and distorting the Beta(α,β)
257
+ * ranking vs. a normal one-shot session. Concrete case (Lead's audit of the
258
+ * first 37 `llm` ratings, post-PR #450): the Claude Code Changelog Monitor
259
+ * hourly cron (taskId `f938d74d-05af-44a7-a0aa-3463d22be502`) produced 5
260
+ * saturating +1s in one rater pass — every rated memory was a prior hourly
261
+ * run.
262
+ *
263
+ * **Discriminator.** `agent_tasks.scheduleId`. Memories sharing a non-null
264
+ * `scheduleId` are by definition from the same scheduled job — that is the
265
+ * exact duplicate class the audit identified, and the only one we want to
266
+ * collapse. We do NOT key on `name` alone, because the 80-char truncation in
267
+ * task-completion names ("Task: …") and session-summary names ("Session: …")
268
+ * means two distinct one-shot tasks/summaries that happen to share the first
269
+ * 80 chars of their description would silently collapse — the false-positive
270
+ * path the PR #451 reviewer flagged.
271
+ *
272
+ * **Pass-through cases (NOT deduped).**
273
+ * - `scheduleId` is null/undefined (manual one-shot tasks, manual memories,
274
+ * file-index memories) — no scheduled-clone risk.
275
+ * - Two memories from different scheduled jobs that happen to surface in
276
+ * the same retrieval set — different `scheduleId`s, both kept.
277
+ *
278
+ * **Tie-break.** Input is `ORDER BY mr.retrievedAt DESC` from
279
+ * `getRetrievalsForAgent`, so "first occurrence per scheduleId" = "freshest
280
+ * surfaced run", which is the representative we want.
281
+ */
282
+ export function dedupeRetrievalsForRater<T extends { scheduleId?: string | null }>(rows: T[]): T[] {
283
+ const seenSchedules = new Set<string>();
284
+ const out: T[] = [];
285
+ for (const row of rows) {
286
+ const scheduleId = row.scheduleId;
287
+ if (typeof scheduleId === "string" && scheduleId.length > 0) {
288
+ if (seenSchedules.has(scheduleId)) continue;
289
+ seenSchedules.add(scheduleId);
290
+ }
291
+ out.push(row);
292
+ }
293
+ return out;
294
+ }
295
+
315
296
  /**
316
297
  * GET `/api/memory/retrievals?taskId=` — best-effort. Returns `[]` on any
317
298
  * failure so a transient API outage never blocks the summary-indexing path.
@@ -28,6 +28,20 @@ export type RetrievalListRow = {
28
28
  /** Up to RETRIEVAL_CONTENT_SNIPPET_CHARS chars of `agent_memory.content`. */
29
29
  content: string;
30
30
  scope: string;
31
+ /**
32
+ * `agent_memory.source` — `'task_completion' | 'session_summary' | 'manual'
33
+ * | 'file_index'`. Surfaced so the worker rater can scope dedup to the
34
+ * memory class that exhibits scheduled-task self-similarity.
35
+ */
36
+ source: string;
37
+ /**
38
+ * `agent_tasks.scheduleId` for the source task that wrote this memory, or
39
+ * `null` if the memory has no source task or the task wasn't a scheduled
40
+ * run. Worker raters use this as a precise cron-clone discriminator —
41
+ * memories sharing a non-null `scheduleId` are by definition from the same
42
+ * scheduled job and safe to dedupe.
43
+ */
44
+ scheduleId: string | null;
31
45
  similarity: number | null;
32
46
  retrievedAt: string;
33
47
  };
@@ -61,15 +75,22 @@ export function getRetrievalsForAgent(
61
75
  params.push(filter.sessionId);
62
76
  }
63
77
 
78
+ // LEFT JOIN agent_tasks so we can surface `scheduleId` to worker raters —
79
+ // a non-null `scheduleId` is the precise cron-clone discriminator that
80
+ // `dedupeRetrievalsForRater` keys on. The LEFT keeps memories with no
81
+ // source task (manual / file_index) in the result set.
64
82
  const sql = `
65
83
  SELECT am.id AS id,
66
84
  am.name AS name,
67
85
  substr(am.content, 1, ?) AS content,
68
86
  am.scope AS scope,
87
+ am.source AS source,
88
+ at.scheduleId AS scheduleId,
69
89
  mr.similarity AS similarity,
70
90
  mr.retrievedAt AS retrievedAt
71
91
  FROM memory_retrieval mr
72
92
  INNER JOIN agent_memory am ON am.id = mr.memoryId
93
+ LEFT JOIN agent_tasks at ON at.id = am.sourceTaskId
73
94
  WHERE ${conditions.join(" AND ")}
74
95
  ORDER BY mr.retrievedAt DESC
75
96
  LIMIT ?
@@ -0,0 +1,21 @@
1
+ -- 054_agent_harness_provider.sql
2
+ --
3
+ -- Phase 1.5 of the cloud-personalization plan
4
+ -- (thoughts/taras/plans/2026-05-08-cloud-personalization-phases-1-4.md).
5
+ --
6
+ -- Add a first-class `harness_provider` column on `agents` so each agent's
7
+ -- harness (claude / codex / pi / devin / claude-managed / opencode) is
8
+ -- queryable per-row, independent of `process.env.HARNESS_PROVIDER` at
9
+ -- worker boot.
10
+ --
11
+ -- Workers push their `HARNESS_PROVIDER` value on registration; an operator
12
+ -- can later re-assign via `PATCH /api/agents/:id/harness-provider`. The
13
+ -- worker itself does NOT yet react in real time — picked up on next worker
14
+ -- restart. Full per-agent harness with dynamic adapter loading lives in
15
+ -- Linear DES-359.
16
+ --
17
+ -- Forward-only. NULL default = backward-compat for already-registered
18
+ -- agents (their column stays NULL until they re-register or an operator
19
+ -- patches it).
20
+
21
+ ALTER TABLE agents ADD COLUMN harness_provider TEXT NULL;
@@ -0,0 +1,15 @@
1
+ -- 055_agent_cred_status.sql
2
+ --
3
+ -- Worker-self-reported credential snapshot. Pairs with `harness_provider`
4
+ -- (054): the JSON describes the agent's creds for whichever harness that
5
+ -- agent runs. NULL = unreported (worker hasn't booted yet, or
6
+ -- CRED_CHECK_DISABLE=1 was set).
7
+ --
8
+ -- The existing `credentialMissing` column (053) stays. This one is additive
9
+ -- and carries the full snapshot (ready, missing, satisfiedBy, hint,
10
+ -- liveTest, reportedAt, reportKind). Once `cred_status.missing` is proven
11
+ -- across deploys, `credentialMissing` can be retired in a later migration.
12
+ --
13
+ -- Forward-only.
14
+
15
+ ALTER TABLE agents ADD COLUMN cred_status TEXT;
@@ -0,0 +1,139 @@
1
+ -- Drop the SQL CHECK constraint on agent_tasks.source.
2
+ -- The Zod layer (`AgentTaskSourceSchema` in src/types.ts) is now the single
3
+ -- source of truth for the allowed enum, so adding a new source no longer
4
+ -- requires a forward-only migration. This makes future source additions
5
+ -- (Phase 1 of the UI chat/session experience plan) cheap.
6
+ --
7
+ -- SQLite cannot ALTER a CHECK constraint in place; we follow the table-rebuild
8
+ -- pattern from migration 043_jira_source.sql verbatim, minus the CHECK clause
9
+ -- on `source`. All other columns, defaults, indexes, and FKs are preserved
10
+ -- exactly. No data migration — existing rows remain valid.
11
+ --
12
+ -- INSERT uses an explicit column list (no `SELECT *`) to be robust against
13
+ -- column-order drift between SQLite versions and against post-043 ALTERs
14
+ -- (migration 044 added `provider` and `providerMeta`).
15
+ PRAGMA foreign_keys=off;
16
+
17
+ CREATE TABLE agent_tasks_new (
18
+ id TEXT PRIMARY KEY,
19
+ agentId TEXT,
20
+ creatorAgentId TEXT,
21
+ task TEXT NOT NULL,
22
+ status TEXT NOT NULL DEFAULT 'pending',
23
+ source TEXT NOT NULL DEFAULT 'mcp',
24
+ taskType TEXT,
25
+ tags TEXT DEFAULT '[]',
26
+ priority INTEGER DEFAULT 50,
27
+ dependsOn TEXT DEFAULT '[]',
28
+ offeredTo TEXT,
29
+ offeredAt TEXT,
30
+ acceptedAt TEXT,
31
+ rejectionReason TEXT,
32
+ slackChannelId TEXT,
33
+ slackThreadTs TEXT,
34
+ slackUserId TEXT,
35
+ mentionMessageId TEXT,
36
+ mentionChannelId TEXT,
37
+ vcsProvider TEXT,
38
+ vcsRepo TEXT,
39
+ vcsEventType TEXT,
40
+ vcsNumber INTEGER,
41
+ vcsCommentId INTEGER,
42
+ vcsAuthor TEXT,
43
+ vcsUrl TEXT,
44
+ parentTaskId TEXT,
45
+ claudeSessionId TEXT,
46
+ agentmailInboxId TEXT,
47
+ agentmailMessageId TEXT,
48
+ agentmailThreadId TEXT,
49
+ model TEXT,
50
+ scheduleId TEXT,
51
+ workflowRunId TEXT REFERENCES workflow_runs(id),
52
+ workflowRunStepId TEXT REFERENCES workflow_run_steps(id),
53
+ createdAt TEXT NOT NULL,
54
+ lastUpdatedAt TEXT NOT NULL,
55
+ finishedAt TEXT,
56
+ failureReason TEXT,
57
+ output TEXT,
58
+ progress TEXT,
59
+ notifiedAt TEXT,
60
+ dir TEXT,
61
+ outputSchema TEXT,
62
+ compactionCount INTEGER DEFAULT 0,
63
+ peakContextPercent REAL,
64
+ totalContextTokensUsed INTEGER,
65
+ contextWindowSize INTEGER,
66
+ was_paused INTEGER NOT NULL DEFAULT 0,
67
+ credentialKeySuffix TEXT,
68
+ credentialKeyType TEXT,
69
+ requestedByUserId TEXT REFERENCES users(id),
70
+ vcsInstallationId INTEGER,
71
+ vcsNodeId TEXT,
72
+ slackReplySent INTEGER DEFAULT 0,
73
+ swarmVersion TEXT,
74
+ contextKey TEXT,
75
+ provider TEXT,
76
+ providerMeta TEXT
77
+ );
78
+
79
+ INSERT INTO agent_tasks_new (
80
+ id, agentId, creatorAgentId, task, status, source, taskType, tags,
81
+ priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
82
+ slackChannelId, slackThreadTs, slackUserId,
83
+ mentionMessageId, mentionChannelId,
84
+ vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
85
+ parentTaskId, claudeSessionId,
86
+ agentmailInboxId, agentmailMessageId, agentmailThreadId,
87
+ model, scheduleId, workflowRunId, workflowRunStepId,
88
+ createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
89
+ dir, outputSchema, compactionCount, peakContextPercent,
90
+ totalContextTokensUsed, contextWindowSize, was_paused,
91
+ credentialKeySuffix, credentialKeyType, requestedByUserId,
92
+ vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
93
+ provider, providerMeta
94
+ )
95
+ SELECT
96
+ id, agentId, creatorAgentId, task, status, source, taskType, tags,
97
+ priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
98
+ slackChannelId, slackThreadTs, slackUserId,
99
+ mentionMessageId, mentionChannelId,
100
+ vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
101
+ parentTaskId, claudeSessionId,
102
+ agentmailInboxId, agentmailMessageId, agentmailThreadId,
103
+ model, scheduleId, workflowRunId, workflowRunStepId,
104
+ createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
105
+ dir, outputSchema, compactionCount, peakContextPercent,
106
+ totalContextTokensUsed, contextWindowSize, was_paused,
107
+ credentialKeySuffix, credentialKeyType, requestedByUserId,
108
+ vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
109
+ provider, providerMeta
110
+ FROM agent_tasks;
111
+
112
+ DROP TABLE agent_tasks;
113
+ ALTER TABLE agent_tasks_new RENAME TO agent_tasks;
114
+
115
+ -- Recreate every index that existed on agent_tasks (mirrors 043 + later additions):
116
+ -- 001/004/006/009/026: agentId, status, offeredTo, taskType, agentmailThreadId, scheduleId, workflowRunId
117
+ -- 031: requestedByUserId (partial)
118
+ -- 034: parentTaskId
119
+ -- 037: swarmVersion
120
+ -- 040: composite (slackChannelId, slackThreadTs, status)
121
+ -- 042: contextKey + (contextKey, status) composite
122
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentId ON agent_tasks(agentId);
123
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_status ON agent_tasks(status);
124
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_offeredTo ON agent_tasks(offeredTo);
125
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_taskType ON agent_tasks(taskType);
126
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentmailThreadId ON agent_tasks(agentmailThreadId);
127
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_schedule_id ON agent_tasks(scheduleId);
128
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_workflow_run ON agent_tasks(workflowRunId);
129
+ CREATE INDEX IF NOT EXISTS idx_tasks_requested_by ON agent_tasks(requestedByUserId) WHERE requestedByUserId IS NOT NULL;
130
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_parentTaskId ON agent_tasks(parentTaskId);
131
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_swarmVersion ON agent_tasks(swarmVersion);
132
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_slack_thread
133
+ ON agent_tasks(slackChannelId, slackThreadTs, status);
134
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key
135
+ ON agent_tasks(contextKey);
136
+ CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key_status
137
+ ON agent_tasks(contextKey, status);
138
+
139
+ PRAGMA foreign_keys=on;
@@ -0,0 +1,27 @@
1
+ -- Inbox item state — per-user dismiss/snooze/done state for action-items inbox
2
+ -- buckets (approval, credential_missing, broken_task, to_read, to_start_template).
3
+ --
4
+ -- itemType is enforced via Zod (`InboxItemTypeSchema` in src/types.ts), not a
5
+ -- SQL CHECK constraint — Phase 1 lesson, lets us extend the enum without a
6
+ -- forward-only migration. Direct SQL inserts can bypass; the HTTP layer
7
+ -- (`PATCH /api/inbox-state`) is the only sanctioned writer.
8
+ --
9
+ -- itemId references the underlying entity (task id, approval-request id,
10
+ -- agent id, template id, …) but is left as a free TEXT column rather than a
11
+ -- typed FK because itemType disambiguates which table it points at.
12
+ CREATE TABLE IF NOT EXISTS inbox_item_state (
13
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
14
+ userId TEXT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
15
+ itemType TEXT NOT NULL,
16
+ itemId TEXT NOT NULL,
17
+ status TEXT NOT NULL DEFAULT 'open',
18
+ snoozeUntil TEXT,
19
+ dismissedAt TEXT,
20
+ doneAt TEXT,
21
+ createdAt TEXT NOT NULL DEFAULT (datetime('now')),
22
+ lastUpdatedAt TEXT NOT NULL DEFAULT (datetime('now')),
23
+ UNIQUE(userId, itemType, itemId)
24
+ );
25
+
26
+ CREATE INDEX IF NOT EXISTS idx_inbox_item_state_userId_status
27
+ ON inbox_item_state(userId, status);
@@ -0,0 +1,31 @@
1
+ -- Task templates — "To start" bucket starters. Polymorphic from day one
2
+ -- (kind = 'task' | 'workflow' | 'schedule') so v2 can register workflow /
3
+ -- schedule starters without a follow-up migration. v1 only inserts/reads
4
+ -- kind='task' rows; the schema is shaped for v2.
5
+ --
6
+ -- The `prompt` column is NOT NULL only because v1 only ever seeds task rows;
7
+ -- a future migration can relax that when workflow/schedule starters land
8
+ -- (workflows carry workflowId in `payload`, schedules carry cron + prompt).
9
+ --
10
+ -- Table name kept as `task_templates` for v1 to match existing references
11
+ -- across the plan; v2 may rename to `quick_starts` if non-task kinds graduate.
12
+ CREATE TABLE IF NOT EXISTS task_templates (
13
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
14
+ title TEXT NOT NULL,
15
+ description TEXT NOT NULL,
16
+ prompt TEXT NOT NULL,
17
+ kind TEXT NOT NULL DEFAULT 'task' CHECK(kind IN ('task','workflow','schedule')),
18
+ payload TEXT NOT NULL DEFAULT '{}',
19
+ category TEXT,
20
+ tags TEXT NOT NULL DEFAULT '[]',
21
+ createdAt TEXT NOT NULL DEFAULT (datetime('now'))
22
+ );
23
+
24
+ CREATE INDEX IF NOT EXISTS idx_task_templates_kind ON task_templates(kind);
25
+
26
+ INSERT INTO task_templates (title, description, prompt, category, tags) VALUES
27
+ ('Refactor a file', 'Improve a file without changing behavior', 'Refactor the file at <path> for readability while preserving behavior. Run typecheck + tests after.', 'engineering', '["refactor"]'),
28
+ ('Investigate a bug', 'Reproduce, root-cause, and propose a fix', 'Investigate the following bug: <symptom>. Reproduce locally, identify the root cause, and propose a fix.', 'engineering', '["debug"]'),
29
+ ('Open a PR', 'Create a PR for the current branch', 'Open a PR from the current branch with a clear summary and test plan.', 'git', '["git","pr"]'),
30
+ ('Write tests for X', 'Cover an under-tested module', 'Write unit tests for <module>. Aim for ~80% line coverage.', 'engineering', '["test"]'),
31
+ ('Daily triage', 'Review failed tasks + pending approvals', 'Triage the action-items inbox: dismiss noise, escalate blockers, summarize unread sessions.', 'ops', '["triage"]');