@gempack/squad-mcp 0.8.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +7 -4
- package/CHANGELOG.md +63 -0
- package/README.md +41 -35
- package/agents/senior-debugger.md +85 -0
- package/commands/debug.md +22 -0
- package/commands/stats.md +22 -0
- package/dist/config/ownership-matrix.d.ts +1 -1
- package/dist/config/ownership-matrix.js +16 -0
- package/dist/config/ownership-matrix.js.map +1 -1
- package/dist/errors.d.ts +1 -1
- package/dist/errors.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/resources/agent-loader.js +1 -0
- package/dist/resources/agent-loader.js.map +1 -1
- package/dist/runs/aggregate.d.ts +166 -0
- package/dist/runs/aggregate.js +381 -0
- package/dist/runs/aggregate.js.map +1 -0
- package/dist/runs/store.d.ts +314 -0
- package/dist/runs/store.js +354 -0
- package/dist/runs/store.js.map +1 -0
- package/dist/tools/list-runs.d.ts +52 -0
- package/dist/tools/list-runs.js +142 -0
- package/dist/tools/list-runs.js.map +1 -0
- package/dist/tools/record-run.d.ts +202 -0
- package/dist/tools/record-run.js +118 -0
- package/dist/tools/record-run.js.map +1 -0
- package/dist/tools/registry.js +4 -0
- package/dist/tools/registry.js.map +1 -1
- package/package.json +1 -1
- package/skills/debug/SKILL.md +345 -0
- package/skills/squad/SKILL.md +83 -0
- package/skills/stats/SKILL.md +189 -0
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* SQUAD RUNS STORE — telemetry journal for skill invocations. As of v0.10.0
|
|
4
|
+
* the legitimate writers are the squad skill (`/squad:implement` and
|
|
5
|
+
* `/squad:review`, invocations `implement | review | task`) and the debug
|
|
6
|
+
* skill (`/squad:debug`, invocation `debug`). Each writer follows the same
|
|
7
|
+
* two-phase contract: one row at start (`in_flight`) and one at end
|
|
8
|
+
* (`completed | aborted`), paired by id. Mirrored line-for-line after
|
|
9
|
+
* `src/learning/store.ts` — same lock + quarantine + mtime cache + atomic-
|
|
10
|
+
* append-under-PIPE_BUF discipline.
|
|
11
|
+
*
|
|
12
|
+
* Plan v4 (cycle 2 advisory consensus) explicit decisions:
|
|
13
|
+
*
|
|
14
|
+
* - NO multi-row partial fallback. If `JSON.stringify(record)` exceeds
|
|
15
|
+
* MAX_RECORD_BYTES the store rejects with `RECORD_TOO_LARGE` rather
|
|
16
|
+
* than splitting into continuation rows. Five advisors converged on
|
|
17
|
+
* "splitting erodes the one-row-per-record JSONL invariant and
|
|
18
|
+
* reopens parsing ambiguities"; rejection puts the burden on the
|
|
19
|
+
* caller to cap their `mode_warning.message` (already capped 512B)
|
|
20
|
+
* or shorten their inputs.
|
|
21
|
+
*
|
|
22
|
+
* - File mode 0o600 (user-only), directory mode 0o700. The journal
|
|
23
|
+
* contains commit refs and prompt-length signals that can leak
|
|
24
|
+
* business context (branch names like `feat/acme-acquisition`); on
|
|
25
|
+
* shared workstations world-readable 0o644 would expose them to
|
|
26
|
+
* co-tenants.
|
|
27
|
+
*
|
|
28
|
+
* - Single-writer contract: the squad skill (`skills/squad/SKILL.md`)
|
|
29
|
+
* AND the debug skill (`skills/debug/SKILL.md`) are the only legitimate
|
|
30
|
+
* callers of `appendRun`. `apply_consolidation_rules` and other server-
|
|
31
|
+
* side code MUST NOT emit terminal rows; doing so breaks the two-phase
|
|
32
|
+
* `(in_flight, completed)` pair-by-id invariant.
|
|
33
|
+
*/
|
|
34
|
+
/**
|
|
35
|
+
* Hard cap per JSONL entry so a single line fits in POSIX PIPE_BUF
|
|
36
|
+
* (4096 bytes) and `fs.appendFile` remains atomic w.r.t. concurrent
|
|
37
|
+
* appenders. Length includes serialised JSON + trailing newline.
|
|
38
|
+
*
|
|
39
|
+
* Realistic finalization row with 9 agents + capped mode_warning.message
|
|
40
|
+
* lands around 1.5-2 KB — well under the limit. Oversize is a hard error,
|
|
41
|
+
* not a soft truncation (see RECORD_TOO_LARGE in errors.ts).
|
|
42
|
+
*/
|
|
43
|
+
export declare const MAX_RECORD_BYTES = 4000;
|
|
44
|
+
/**
|
|
45
|
+
* Default location for the JSONL file, relative to workspace_root.
|
|
46
|
+
* Defaults are gitignored at the v0.9.0 release — the journal contains
|
|
47
|
+
* local-only operational telemetry; users opting into team-wide sharing
|
|
48
|
+
* remove `.squad/runs.jsonl` from their `.gitignore` deliberately.
|
|
49
|
+
*/
|
|
50
|
+
export declare const DEFAULT_RUNS_PATH = ".squad/runs.jsonl";
|
|
51
|
+
/**
|
|
52
|
+
* Severity tally compacted into a single sortable number. The cycle-1
|
|
53
|
+
* design carried `{ Blocker, Major, Minor, Suggestion }` per agent
|
|
54
|
+
* (~30 bytes / agent of JSON overhead); cycle 2 architects + dev flagged
|
|
55
|
+
* this as PIPE_BUF-budget waste on 9-agent runs. Collapsed to one number
|
|
56
|
+
* with positional digits: B*1000 + M*100 + m*10 + s. Inverse decode in
|
|
57
|
+
* aggregate.ts. Safe up to 9 of each severity per agent (more than that
|
|
58
|
+
* is itself a signal something went sideways).
|
|
59
|
+
*/
|
|
60
|
+
declare function severityScore(counts: {
|
|
61
|
+
Blocker: number;
|
|
62
|
+
Major: number;
|
|
63
|
+
Minor: number;
|
|
64
|
+
Suggestion: number;
|
|
65
|
+
}): number;
|
|
66
|
+
/** Inverse of `severityScore`. Used by aggregate.ts. */
|
|
67
|
+
export declare function decodeSeverityScore(n: number): {
|
|
68
|
+
Blocker: number;
|
|
69
|
+
Major: number;
|
|
70
|
+
Minor: number;
|
|
71
|
+
Suggestion: number;
|
|
72
|
+
};
|
|
73
|
+
/** Public re-export so callers can build records without re-implementing the encoding. */
|
|
74
|
+
export { severityScore };
|
|
75
|
+
declare const InvocationEnum: z.ZodEnum<["implement", "review", "task", "question", "brainstorm", "debug"]>;
|
|
76
|
+
declare const StatusEnum: z.ZodEnum<["in_flight", "completed", "aborted"]>;
|
|
77
|
+
declare const WorkTypeEnum: z.ZodEnum<["Feature", "Bug Fix", "Refactor", "Performance", "Security", "Business Rule"]>;
|
|
78
|
+
declare const VerdictEnum: z.ZodEnum<["APPROVED", "CHANGES_REQUIRED", "REJECTED"]>;
|
|
79
|
+
declare const GitRefSchema: z.ZodNullable<z.ZodObject<{
|
|
80
|
+
kind: z.ZodEnum<["head", "diff_base", "pr_head"]>;
|
|
81
|
+
value: z.ZodEffects<z.ZodString, string, string>;
|
|
82
|
+
}, "strip", z.ZodTypeAny, {
|
|
83
|
+
value: string;
|
|
84
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
85
|
+
}, {
|
|
86
|
+
value: string;
|
|
87
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
88
|
+
}>>;
|
|
89
|
+
/**
|
|
90
|
+
* Per-agent dispatch metrics captured by the squad skill orchestrator.
|
|
91
|
+
*
|
|
92
|
+
* - `batch_duration_ms` (renamed from `duration_ms` in v0.9.0): wall-clock
|
|
93
|
+
* from this agent's Task() dispatch to its result. Note that advisors in
|
|
94
|
+
* a parallel batch overlap; this is "round-trip latency for this dispatch"
|
|
95
|
+
* not "exclusive time spent on this agent's work". Reflected in the
|
|
96
|
+
* /squad:stats output label.
|
|
97
|
+
*
|
|
98
|
+
* - `prompt_chars` / `response_chars` (renamed from input/output_chars in
|
|
99
|
+
* v0.9.0): orchestrator-visible character counts of the dispatch prompt
|
|
100
|
+
* and the agent's returned string. EXCLUDES the agent's own internal
|
|
101
|
+
* tool_use roundtrips (file reads, sub-dispatches like code-explorer).
|
|
102
|
+
* For agents that read heavily, the recorded chars are a substantial
|
|
103
|
+
* underestimate — documented in `est_tokens_method` and rendered in the
|
|
104
|
+
* stats panel disclaimer.
|
|
105
|
+
*
|
|
106
|
+
* - `severity_score`: encoded findings tally (see severityScore()).
|
|
107
|
+
*/
|
|
108
|
+
declare const AgentMetricsSchema: z.ZodObject<{
|
|
109
|
+
name: z.ZodEnum<[import("../config/ownership-matrix.js").AgentName, ...import("../config/ownership-matrix.js").AgentName[]]>;
|
|
110
|
+
model: z.ZodEnum<["haiku", "sonnet", "opus", "inherit"]>;
|
|
111
|
+
score: z.ZodNullable<z.ZodNumber>;
|
|
112
|
+
severity_score: z.ZodNullable<z.ZodNumber>;
|
|
113
|
+
batch_duration_ms: z.ZodNumber;
|
|
114
|
+
prompt_chars: z.ZodNumber;
|
|
115
|
+
response_chars: z.ZodNumber;
|
|
116
|
+
}, "strip", z.ZodTypeAny, {
|
|
117
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
118
|
+
score: number | null;
|
|
119
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
120
|
+
severity_score: number | null;
|
|
121
|
+
batch_duration_ms: number;
|
|
122
|
+
prompt_chars: number;
|
|
123
|
+
response_chars: number;
|
|
124
|
+
}, {
|
|
125
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
126
|
+
score: number | null;
|
|
127
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
128
|
+
severity_score: number | null;
|
|
129
|
+
batch_duration_ms: number;
|
|
130
|
+
prompt_chars: number;
|
|
131
|
+
response_chars: number;
|
|
132
|
+
}>;
|
|
133
|
+
/**
|
|
134
|
+
* RunRecord schema_version 1. PUBLIC STABLE CONTRACT from v0.9.0 — readers
|
|
135
|
+
* (the `list_runs` MCP tool, the `/squad:stats` skill) key on
|
|
136
|
+
* `schema_version` and quarantine unknown versions rather than failing.
|
|
137
|
+
*
|
|
138
|
+
* Discriminated by `status`:
|
|
139
|
+
* - `in_flight` rows carry only the Phase-1-known fields (skill knows what
|
|
140
|
+
* it's about to do; verdict/scores are still pending).
|
|
141
|
+
* - `completed | aborted` rows carry full metrics + verdict.
|
|
142
|
+
*
|
|
143
|
+
* For ergonomics under Zod we keep finalisation fields optional on the base
|
|
144
|
+
* schema rather than splitting into two schemas; the writer validates the
|
|
145
|
+
* appropriate subset at the call site (`appendRun` vs `finalizeRun`).
|
|
146
|
+
*/
|
|
147
|
+
declare const runRecordSchema: z.ZodObject<{
|
|
148
|
+
schema_version: z.ZodLiteral<1>;
|
|
149
|
+
id: z.ZodEffects<z.ZodString, string, string>;
|
|
150
|
+
status: z.ZodEnum<["in_flight", "completed", "aborted"]>;
|
|
151
|
+
started_at: z.ZodEffects<z.ZodString, string, string>;
|
|
152
|
+
completed_at: z.ZodOptional<z.ZodEffects<z.ZodString, string, string>>;
|
|
153
|
+
duration_ms: z.ZodOptional<z.ZodNumber>;
|
|
154
|
+
invocation: z.ZodEnum<["implement", "review", "task", "question", "brainstorm", "debug"]>;
|
|
155
|
+
mode: z.ZodEnum<["quick", "normal", "deep"]>;
|
|
156
|
+
mode_source: z.ZodEnum<["user", "auto"]>;
|
|
157
|
+
work_type: z.ZodOptional<z.ZodEnum<["Feature", "Bug Fix", "Refactor", "Performance", "Security", "Business Rule"]>>;
|
|
158
|
+
git_ref: z.ZodNullable<z.ZodObject<{
|
|
159
|
+
kind: z.ZodEnum<["head", "diff_base", "pr_head"]>;
|
|
160
|
+
value: z.ZodEffects<z.ZodString, string, string>;
|
|
161
|
+
}, "strip", z.ZodTypeAny, {
|
|
162
|
+
value: string;
|
|
163
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
164
|
+
}, {
|
|
165
|
+
value: string;
|
|
166
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
167
|
+
}>>;
|
|
168
|
+
files_count: z.ZodNumber;
|
|
169
|
+
agents: z.ZodArray<z.ZodObject<{
|
|
170
|
+
name: z.ZodEnum<[import("../config/ownership-matrix.js").AgentName, ...import("../config/ownership-matrix.js").AgentName[]]>;
|
|
171
|
+
model: z.ZodEnum<["haiku", "sonnet", "opus", "inherit"]>;
|
|
172
|
+
score: z.ZodNullable<z.ZodNumber>;
|
|
173
|
+
severity_score: z.ZodNullable<z.ZodNumber>;
|
|
174
|
+
batch_duration_ms: z.ZodNumber;
|
|
175
|
+
prompt_chars: z.ZodNumber;
|
|
176
|
+
response_chars: z.ZodNumber;
|
|
177
|
+
}, "strip", z.ZodTypeAny, {
|
|
178
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
179
|
+
score: number | null;
|
|
180
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
181
|
+
severity_score: number | null;
|
|
182
|
+
batch_duration_ms: number;
|
|
183
|
+
prompt_chars: number;
|
|
184
|
+
response_chars: number;
|
|
185
|
+
}, {
|
|
186
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
187
|
+
score: number | null;
|
|
188
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
189
|
+
severity_score: number | null;
|
|
190
|
+
batch_duration_ms: number;
|
|
191
|
+
prompt_chars: number;
|
|
192
|
+
response_chars: number;
|
|
193
|
+
}>, "many">;
|
|
194
|
+
verdict: z.ZodOptional<z.ZodNullable<z.ZodEnum<["APPROVED", "CHANGES_REQUIRED", "REJECTED"]>>>;
|
|
195
|
+
weighted_score: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
196
|
+
est_tokens_method: z.ZodLiteral<"chars-div-3.5">;
|
|
197
|
+
mode_warning: z.ZodOptional<z.ZodNullable<z.ZodObject<{
|
|
198
|
+
code: z.ZodEffects<z.ZodString, string, string>;
|
|
199
|
+
message: z.ZodEffects<z.ZodString, string, string>;
|
|
200
|
+
}, "strip", z.ZodTypeAny, {
|
|
201
|
+
code: string;
|
|
202
|
+
message: string;
|
|
203
|
+
}, {
|
|
204
|
+
code: string;
|
|
205
|
+
message: string;
|
|
206
|
+
}>>>;
|
|
207
|
+
}, "strip", z.ZodTypeAny, {
|
|
208
|
+
files_count: number;
|
|
209
|
+
status: "aborted" | "in_flight" | "completed";
|
|
210
|
+
agents: {
|
|
211
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
212
|
+
score: number | null;
|
|
213
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
214
|
+
severity_score: number | null;
|
|
215
|
+
batch_duration_ms: number;
|
|
216
|
+
prompt_chars: number;
|
|
217
|
+
response_chars: number;
|
|
218
|
+
}[];
|
|
219
|
+
mode: "quick" | "normal" | "deep";
|
|
220
|
+
id: string;
|
|
221
|
+
invocation: "debug" | "review" | "task" | "implement" | "question" | "brainstorm";
|
|
222
|
+
schema_version: 1;
|
|
223
|
+
started_at: string;
|
|
224
|
+
mode_source: "user" | "auto";
|
|
225
|
+
git_ref: {
|
|
226
|
+
value: string;
|
|
227
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
228
|
+
} | null;
|
|
229
|
+
est_tokens_method: "chars-div-3.5";
|
|
230
|
+
work_type?: "Feature" | "Bug Fix" | "Refactor" | "Performance" | "Security" | "Business Rule" | undefined;
|
|
231
|
+
duration_ms?: number | undefined;
|
|
232
|
+
weighted_score?: number | null | undefined;
|
|
233
|
+
completed_at?: string | undefined;
|
|
234
|
+
verdict?: "APPROVED" | "CHANGES_REQUIRED" | "REJECTED" | null | undefined;
|
|
235
|
+
mode_warning?: {
|
|
236
|
+
code: string;
|
|
237
|
+
message: string;
|
|
238
|
+
} | null | undefined;
|
|
239
|
+
}, {
|
|
240
|
+
files_count: number;
|
|
241
|
+
status: "aborted" | "in_flight" | "completed";
|
|
242
|
+
agents: {
|
|
243
|
+
name: import("../config/ownership-matrix.js").AgentName;
|
|
244
|
+
score: number | null;
|
|
245
|
+
model: "haiku" | "inherit" | "sonnet" | "opus";
|
|
246
|
+
severity_score: number | null;
|
|
247
|
+
batch_duration_ms: number;
|
|
248
|
+
prompt_chars: number;
|
|
249
|
+
response_chars: number;
|
|
250
|
+
}[];
|
|
251
|
+
mode: "quick" | "normal" | "deep";
|
|
252
|
+
id: string;
|
|
253
|
+
invocation: "debug" | "review" | "task" | "implement" | "question" | "brainstorm";
|
|
254
|
+
schema_version: 1;
|
|
255
|
+
started_at: string;
|
|
256
|
+
mode_source: "user" | "auto";
|
|
257
|
+
git_ref: {
|
|
258
|
+
value: string;
|
|
259
|
+
kind: "head" | "diff_base" | "pr_head";
|
|
260
|
+
} | null;
|
|
261
|
+
est_tokens_method: "chars-div-3.5";
|
|
262
|
+
work_type?: "Feature" | "Bug Fix" | "Refactor" | "Performance" | "Security" | "Business Rule" | undefined;
|
|
263
|
+
duration_ms?: number | undefined;
|
|
264
|
+
weighted_score?: number | null | undefined;
|
|
265
|
+
completed_at?: string | undefined;
|
|
266
|
+
verdict?: "APPROVED" | "CHANGES_REQUIRED" | "REJECTED" | null | undefined;
|
|
267
|
+
mode_warning?: {
|
|
268
|
+
code: string;
|
|
269
|
+
message: string;
|
|
270
|
+
} | null | undefined;
|
|
271
|
+
}>;
|
|
272
|
+
export type RunRecord = z.infer<typeof runRecordSchema>;
|
|
273
|
+
export type AgentMetrics = z.infer<typeof AgentMetricsSchema>;
|
|
274
|
+
export type GitRef = z.infer<typeof GitRefSchema>;
|
|
275
|
+
export type RunStatus = z.infer<typeof StatusEnum>;
|
|
276
|
+
export type RunInvocation = z.infer<typeof InvocationEnum>;
|
|
277
|
+
export type RunVerdict = z.infer<typeof VerdictEnum>;
|
|
278
|
+
export { runRecordSchema, WorkTypeEnum };
|
|
279
|
+
/** Test-only: clear the per-process cache. Production code MUST NOT call this. */
|
|
280
|
+
export declare function __resetRunsStoreCacheForTests(): void;
|
|
281
|
+
/**
|
|
282
|
+
* Generate a fresh run id. Date.now() base36 prefix + 6 chars from
|
|
283
|
+
* [a-z0-9] (36^6 = 2.18B unique values per millisecond — collision
|
|
284
|
+
* chance is effectively zero across realistic concurrent writers in
|
|
285
|
+
* the same ms).
|
|
286
|
+
*/
|
|
287
|
+
export declare function generateRunId(): string;
|
|
288
|
+
/**
|
|
289
|
+
* Read all run records from the JSONL file. Returns `[]` if the file does
|
|
290
|
+
* not exist (fresh repo, first run). Corrupt lines are quarantined to a
|
|
291
|
+
* timestamped sibling file and logged once; the surviving entries return
|
|
292
|
+
* in append order.
|
|
293
|
+
*
|
|
294
|
+
* Unknown `schema_version` rows are quarantined too — readers must NEVER
|
|
295
|
+
* silently include rows they don't understand. The quarantine file is
|
|
296
|
+
* `.squad/runs.jsonl.corrupt-<ts>.jsonl` alongside the source.
|
|
297
|
+
*/
|
|
298
|
+
export declare function readRuns(workspaceRoot: string, options?: {
|
|
299
|
+
configuredPath?: string;
|
|
300
|
+
}): Promise<RunRecord[]>;
|
|
301
|
+
/**
|
|
302
|
+
* Append one RunRecord. Validates against Zod, then enforces
|
|
303
|
+
* MAX_RECORD_BYTES (post-serialisation) before acquiring the file lock.
|
|
304
|
+
* Oversize records throw `RECORD_TOO_LARGE` — no silent split, no soft
|
|
305
|
+
* truncation. The caller (the squad skill) is responsible for keeping
|
|
306
|
+
* `mode_warning.message` capped and the agent list short enough that
|
|
307
|
+
* realistic records stay well under the cap.
|
|
308
|
+
*/
|
|
309
|
+
export declare function appendRun(workspaceRoot: string, record: RunRecord, options?: {
|
|
310
|
+
configuredPath?: string;
|
|
311
|
+
}): Promise<{
|
|
312
|
+
filePath: string;
|
|
313
|
+
record: RunRecord;
|
|
314
|
+
}>;
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import { AGENT_NAMES_TUPLE } from "../config/ownership-matrix.js";
|
|
5
|
+
import { SquadError } from "../errors.js";
|
|
6
|
+
import { ensureRelativeInsideRoot } from "../util/path-safety.js";
|
|
7
|
+
import { withFileLock } from "../util/file-lock.js";
|
|
8
|
+
import { logger } from "../observability/logger.js";
|
|
9
|
+
import { SafeString } from "../tools/_shared/schemas.js";
|
|
10
|
+
/**
|
|
11
|
+
* SQUAD RUNS STORE — telemetry journal for skill invocations. As of v0.10.0
|
|
12
|
+
* the legitimate writers are the squad skill (`/squad:implement` and
|
|
13
|
+
* `/squad:review`, invocations `implement | review | task`) and the debug
|
|
14
|
+
* skill (`/squad:debug`, invocation `debug`). Each writer follows the same
|
|
15
|
+
* two-phase contract: one row at start (`in_flight`) and one at end
|
|
16
|
+
* (`completed | aborted`), paired by id. Mirrored line-for-line after
|
|
17
|
+
* `src/learning/store.ts` — same lock + quarantine + mtime cache + atomic-
|
|
18
|
+
* append-under-PIPE_BUF discipline.
|
|
19
|
+
*
|
|
20
|
+
* Plan v4 (cycle 2 advisory consensus) explicit decisions:
|
|
21
|
+
*
|
|
22
|
+
* - NO multi-row partial fallback. If `JSON.stringify(record)` exceeds
|
|
23
|
+
* MAX_RECORD_BYTES the store rejects with `RECORD_TOO_LARGE` rather
|
|
24
|
+
* than splitting into continuation rows. Five advisors converged on
|
|
25
|
+
* "splitting erodes the one-row-per-record JSONL invariant and
|
|
26
|
+
* reopens parsing ambiguities"; rejection puts the burden on the
|
|
27
|
+
* caller to cap their `mode_warning.message` (already capped 512B)
|
|
28
|
+
* or shorten their inputs.
|
|
29
|
+
*
|
|
30
|
+
* - File mode 0o600 (user-only), directory mode 0o700. The journal
|
|
31
|
+
* contains commit refs and prompt-length signals that can leak
|
|
32
|
+
* business context (branch names like `feat/acme-acquisition`); on
|
|
33
|
+
* shared workstations world-readable 0o644 would expose them to
|
|
34
|
+
* co-tenants.
|
|
35
|
+
*
|
|
36
|
+
* - Single-writer contract: the squad skill (`skills/squad/SKILL.md`)
|
|
37
|
+
* AND the debug skill (`skills/debug/SKILL.md`) are the only legitimate
|
|
38
|
+
* callers of `appendRun`. `apply_consolidation_rules` and other server-
|
|
39
|
+
* side code MUST NOT emit terminal rows; doing so breaks the two-phase
|
|
40
|
+
* `(in_flight, completed)` pair-by-id invariant.
|
|
41
|
+
*/
|
|
42
|
+
/**
|
|
43
|
+
* Hard cap per JSONL entry so a single line fits in POSIX PIPE_BUF
|
|
44
|
+
* (4096 bytes) and `fs.appendFile` remains atomic w.r.t. concurrent
|
|
45
|
+
* appenders. Length includes serialised JSON + trailing newline.
|
|
46
|
+
*
|
|
47
|
+
* Realistic finalization row with 9 agents + capped mode_warning.message
|
|
48
|
+
* lands around 1.5-2 KB — well under the limit. Oversize is a hard error,
|
|
49
|
+
* not a soft truncation (see RECORD_TOO_LARGE in errors.ts).
|
|
50
|
+
*/
|
|
51
|
+
export const MAX_RECORD_BYTES = 4_000;
|
|
52
|
+
/**
|
|
53
|
+
* Default location for the JSONL file, relative to workspace_root.
|
|
54
|
+
* Defaults are gitignored at the v0.9.0 release — the journal contains
|
|
55
|
+
* local-only operational telemetry; users opting into team-wide sharing
|
|
56
|
+
* remove `.squad/runs.jsonl` from their `.gitignore` deliberately.
|
|
57
|
+
*/
|
|
58
|
+
export const DEFAULT_RUNS_PATH = ".squad/runs.jsonl";
|
|
59
|
+
/**
|
|
60
|
+
* Severity tally compacted into a single sortable number. The cycle-1
|
|
61
|
+
* design carried `{ Blocker, Major, Minor, Suggestion }` per agent
|
|
62
|
+
* (~30 bytes / agent of JSON overhead); cycle 2 architects + dev flagged
|
|
63
|
+
* this as PIPE_BUF-budget waste on 9-agent runs. Collapsed to one number
|
|
64
|
+
* with positional digits: B*1000 + M*100 + m*10 + s. Inverse decode in
|
|
65
|
+
* aggregate.ts. Safe up to 9 of each severity per agent (more than that
|
|
66
|
+
* is itself a signal something went sideways).
|
|
67
|
+
*/
|
|
68
|
+
function severityScore(counts) {
|
|
69
|
+
return counts.Blocker * 1000 + counts.Major * 100 + counts.Minor * 10 + counts.Suggestion;
|
|
70
|
+
}
|
|
71
|
+
/** Inverse of `severityScore`. Used by aggregate.ts. */
|
|
72
|
+
export function decodeSeverityScore(n) {
|
|
73
|
+
return {
|
|
74
|
+
Blocker: Math.floor(n / 1000),
|
|
75
|
+
Major: Math.floor((n % 1000) / 100),
|
|
76
|
+
Minor: Math.floor((n % 100) / 10),
|
|
77
|
+
Suggestion: n % 10,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/** Public re-export so callers can build records without re-implementing the encoding. */
|
|
81
|
+
export { severityScore };
|
|
82
|
+
const InvocationEnum = z.enum(["implement", "review", "task", "question", "brainstorm", "debug"]);
|
|
83
|
+
const ModeEnum = z.enum(["quick", "normal", "deep"]);
|
|
84
|
+
const ModeSourceEnum = z.enum(["user", "auto"]);
|
|
85
|
+
const StatusEnum = z.enum(["in_flight", "completed", "aborted"]);
|
|
86
|
+
const WorkTypeEnum = z.enum([
|
|
87
|
+
"Feature",
|
|
88
|
+
"Bug Fix",
|
|
89
|
+
"Refactor",
|
|
90
|
+
"Performance",
|
|
91
|
+
"Security",
|
|
92
|
+
"Business Rule",
|
|
93
|
+
]);
|
|
94
|
+
const VerdictEnum = z.enum(["APPROVED", "CHANGES_REQUIRED", "REJECTED"]);
|
|
95
|
+
const ModelEnum = z.enum(["haiku", "sonnet", "opus", "inherit"]);
|
|
96
|
+
const GitRefSchema = z
|
|
97
|
+
.object({
|
|
98
|
+
kind: z.enum(["head", "diff_base", "pr_head"]),
|
|
99
|
+
value: SafeString(200),
|
|
100
|
+
})
|
|
101
|
+
.nullable();
|
|
102
|
+
/**
|
|
103
|
+
* Per-agent dispatch metrics captured by the squad skill orchestrator.
|
|
104
|
+
*
|
|
105
|
+
* - `batch_duration_ms` (renamed from `duration_ms` in v0.9.0): wall-clock
|
|
106
|
+
* from this agent's Task() dispatch to its result. Note that advisors in
|
|
107
|
+
* a parallel batch overlap; this is "round-trip latency for this dispatch"
|
|
108
|
+
* not "exclusive time spent on this agent's work". Reflected in the
|
|
109
|
+
* /squad:stats output label.
|
|
110
|
+
*
|
|
111
|
+
* - `prompt_chars` / `response_chars` (renamed from input/output_chars in
|
|
112
|
+
* v0.9.0): orchestrator-visible character counts of the dispatch prompt
|
|
113
|
+
* and the agent's returned string. EXCLUDES the agent's own internal
|
|
114
|
+
* tool_use roundtrips (file reads, sub-dispatches like code-explorer).
|
|
115
|
+
* For agents that read heavily, the recorded chars are a substantial
|
|
116
|
+
* underestimate — documented in `est_tokens_method` and rendered in the
|
|
117
|
+
* stats panel disclaimer.
|
|
118
|
+
*
|
|
119
|
+
* - `severity_score`: encoded findings tally (see severityScore()).
|
|
120
|
+
*/
|
|
121
|
+
const AgentMetricsSchema = z.object({
|
|
122
|
+
name: z.enum(AGENT_NAMES_TUPLE),
|
|
123
|
+
model: ModelEnum,
|
|
124
|
+
score: z.number().int().min(0).max(100).nullable(),
|
|
125
|
+
severity_score: z.number().int().min(0).max(9999).nullable(),
|
|
126
|
+
batch_duration_ms: z.number().int().nonnegative().finite(),
|
|
127
|
+
prompt_chars: z.number().int().nonnegative().finite(),
|
|
128
|
+
response_chars: z.number().int().nonnegative().finite(),
|
|
129
|
+
});
|
|
130
|
+
/**
|
|
131
|
+
* RunRecord schema_version 1. PUBLIC STABLE CONTRACT from v0.9.0 — readers
|
|
132
|
+
* (the `list_runs` MCP tool, the `/squad:stats` skill) key on
|
|
133
|
+
* `schema_version` and quarantine unknown versions rather than failing.
|
|
134
|
+
*
|
|
135
|
+
* Discriminated by `status`:
|
|
136
|
+
* - `in_flight` rows carry only the Phase-1-known fields (skill knows what
|
|
137
|
+
* it's about to do; verdict/scores are still pending).
|
|
138
|
+
* - `completed | aborted` rows carry full metrics + verdict.
|
|
139
|
+
*
|
|
140
|
+
* For ergonomics under Zod we keep finalisation fields optional on the base
|
|
141
|
+
* schema rather than splitting into two schemas; the writer validates the
|
|
142
|
+
* appropriate subset at the call site (`appendRun` vs `finalizeRun`).
|
|
143
|
+
*/
|
|
144
|
+
const runRecordSchema = z.object({
|
|
145
|
+
schema_version: z.literal(1),
|
|
146
|
+
id: SafeString(40),
|
|
147
|
+
status: StatusEnum,
|
|
148
|
+
started_at: SafeString(40),
|
|
149
|
+
completed_at: SafeString(40).optional(),
|
|
150
|
+
duration_ms: z.number().int().nonnegative().finite().optional(),
|
|
151
|
+
invocation: InvocationEnum,
|
|
152
|
+
mode: ModeEnum,
|
|
153
|
+
mode_source: ModeSourceEnum,
|
|
154
|
+
work_type: WorkTypeEnum.optional(),
|
|
155
|
+
git_ref: GitRefSchema,
|
|
156
|
+
files_count: z.number().int().nonnegative().finite(),
|
|
157
|
+
agents: z.array(AgentMetricsSchema).max(20),
|
|
158
|
+
verdict: VerdictEnum.nullable().optional(),
|
|
159
|
+
weighted_score: z.number().min(0).max(100).nullable().optional(),
|
|
160
|
+
est_tokens_method: z.literal("chars-div-3.5"),
|
|
161
|
+
mode_warning: z
|
|
162
|
+
.object({
|
|
163
|
+
code: SafeString(64),
|
|
164
|
+
message: SafeString(512),
|
|
165
|
+
})
|
|
166
|
+
.nullable()
|
|
167
|
+
.optional(),
|
|
168
|
+
});
|
|
169
|
+
export { runRecordSchema, WorkTypeEnum };
|
|
170
|
+
const cache = new Map();
|
|
171
|
+
/** Test-only: clear the per-process cache. Production code MUST NOT call this. */
|
|
172
|
+
export function __resetRunsStoreCacheForTests() {
|
|
173
|
+
cache.clear();
|
|
174
|
+
}
|
|
175
|
+
function resolveRunsFile(workspaceRoot, configuredPath) {
|
|
176
|
+
const rel = configuredPath ?? DEFAULT_RUNS_PATH;
|
|
177
|
+
if (configuredPath !== undefined) {
|
|
178
|
+
ensureRelativeInsideRoot(workspaceRoot, rel, "runs.path");
|
|
179
|
+
}
|
|
180
|
+
return path.resolve(workspaceRoot, rel);
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Generate a fresh run id. Date.now() base36 prefix + 6 chars from
|
|
184
|
+
* [a-z0-9] (36^6 = 2.18B unique values per millisecond — collision
|
|
185
|
+
* chance is effectively zero across realistic concurrent writers in
|
|
186
|
+
* the same ms).
|
|
187
|
+
*/
|
|
188
|
+
export function generateRunId() {
|
|
189
|
+
const ts = Date.now().toString(36);
|
|
190
|
+
let suffix = "";
|
|
191
|
+
const ALPHABET = "abcdefghijklmnopqrstuvwxyz0123456789";
|
|
192
|
+
for (let i = 0; i < 6; i++) {
|
|
193
|
+
suffix += ALPHABET[Math.floor(Math.random() * ALPHABET.length)];
|
|
194
|
+
}
|
|
195
|
+
return `${ts}-${suffix}`;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Read all run records from the JSONL file. Returns `[]` if the file does
|
|
199
|
+
* not exist (fresh repo, first run). Corrupt lines are quarantined to a
|
|
200
|
+
* timestamped sibling file and logged once; the surviving entries return
|
|
201
|
+
* in append order.
|
|
202
|
+
*
|
|
203
|
+
* Unknown `schema_version` rows are quarantined too — readers must NEVER
|
|
204
|
+
* silently include rows they don't understand. The quarantine file is
|
|
205
|
+
* `.squad/runs.jsonl.corrupt-<ts>.jsonl` alongside the source.
|
|
206
|
+
*/
|
|
207
|
+
export async function readRuns(workspaceRoot, options = {}) {
|
|
208
|
+
const filePath = resolveRunsFile(workspaceRoot, options.configuredPath);
|
|
209
|
+
const absRoot = path.resolve(workspaceRoot);
|
|
210
|
+
let stat;
|
|
211
|
+
try {
|
|
212
|
+
stat = await fs.stat(filePath);
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
return [];
|
|
216
|
+
}
|
|
217
|
+
if (!stat.isFile())
|
|
218
|
+
return [];
|
|
219
|
+
const cached = cache.get(absRoot);
|
|
220
|
+
if (cached &&
|
|
221
|
+
cached.filePath === filePath &&
|
|
222
|
+
cached.mtimeMs === stat.mtimeMs &&
|
|
223
|
+
cached.size === stat.size) {
|
|
224
|
+
return cached.entries;
|
|
225
|
+
}
|
|
226
|
+
let raw;
|
|
227
|
+
try {
|
|
228
|
+
raw = await fs.readFile(filePath, "utf8");
|
|
229
|
+
}
|
|
230
|
+
catch (err) {
|
|
231
|
+
throw new SquadError("CONFIG_READ_FAILED", `failed to read runs file ${filePath}: ${err.message}`, { source: filePath });
|
|
232
|
+
}
|
|
233
|
+
const lines = raw.split(/\r?\n/);
|
|
234
|
+
const entries = [];
|
|
235
|
+
const corruptLines = [];
|
|
236
|
+
let skippedUnknownVersion = 0;
|
|
237
|
+
let lineNo = 0;
|
|
238
|
+
for (const line of lines) {
|
|
239
|
+
lineNo++;
|
|
240
|
+
const trimmed = line.trim();
|
|
241
|
+
if (trimmed === "")
|
|
242
|
+
continue;
|
|
243
|
+
let parsed;
|
|
244
|
+
try {
|
|
245
|
+
parsed = JSON.parse(trimmed);
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
corruptLines.push({
|
|
249
|
+
line: lineNo,
|
|
250
|
+
raw: trimmed,
|
|
251
|
+
reason: `invalid JSON: ${err.message}`,
|
|
252
|
+
});
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
// Schema_version gate: skip+log instead of throwing. A future v2 writer
|
|
256
|
+
// would otherwise brick v1 readers; this lets a heterogeneous-version
|
|
257
|
+
// journal be partially read by older clients (architect A-6 + dev #11).
|
|
258
|
+
if (typeof parsed === "object" &&
|
|
259
|
+
parsed !== null &&
|
|
260
|
+
"schema_version" in parsed &&
|
|
261
|
+
parsed.schema_version !== 1) {
|
|
262
|
+
skippedUnknownVersion++;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
const validated = runRecordSchema.safeParse(parsed);
|
|
266
|
+
if (!validated.success) {
|
|
267
|
+
corruptLines.push({
|
|
268
|
+
line: lineNo,
|
|
269
|
+
raw: trimmed,
|
|
270
|
+
reason: `schema violation: ${validated.error.message}`,
|
|
271
|
+
});
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
entries.push(validated.data);
|
|
275
|
+
}
|
|
276
|
+
if (skippedUnknownVersion > 0) {
|
|
277
|
+
logger.warn("runs: skipped rows with unknown schema_version", {
|
|
278
|
+
details: { file: filePath, count: skippedUnknownVersion },
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
if (corruptLines.length > 0) {
|
|
282
|
+
const quarantinePath = `${filePath}.corrupt-${Date.now()}.jsonl`;
|
|
283
|
+
try {
|
|
284
|
+
const body = corruptLines.map((c) => `# line ${c.line}: ${c.reason}\n${c.raw}\n`).join("");
|
|
285
|
+
// Write quarantine with same restricted mode as the source.
|
|
286
|
+
await fs.writeFile(quarantinePath, body, { encoding: "utf8", mode: 0o600 });
|
|
287
|
+
}
|
|
288
|
+
catch {
|
|
289
|
+
// Diagnostic, not load-bearing.
|
|
290
|
+
}
|
|
291
|
+
logger.warn("runs: corrupt lines quarantined", {
|
|
292
|
+
details: {
|
|
293
|
+
file: filePath,
|
|
294
|
+
quarantine: quarantinePath,
|
|
295
|
+
count: corruptLines.length,
|
|
296
|
+
lines: corruptLines.map((c) => c.line),
|
|
297
|
+
},
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
cache.set(absRoot, { mtimeMs: stat.mtimeMs, size: stat.size, filePath, entries });
|
|
301
|
+
return entries;
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Append one RunRecord. Validates against Zod, then enforces
|
|
305
|
+
* MAX_RECORD_BYTES (post-serialisation) before acquiring the file lock.
|
|
306
|
+
* Oversize records throw `RECORD_TOO_LARGE` — no silent split, no soft
|
|
307
|
+
* truncation. The caller (the squad skill) is responsible for keeping
|
|
308
|
+
* `mode_warning.message` capped and the agent list short enough that
|
|
309
|
+
* realistic records stay well under the cap.
|
|
310
|
+
*/
|
|
311
|
+
export async function appendRun(workspaceRoot, record, options = {}) {
|
|
312
|
+
const validated = runRecordSchema.safeParse(record);
|
|
313
|
+
if (!validated.success) {
|
|
314
|
+
throw new SquadError("INVALID_INPUT", `run record schema violation: ${validated.error.message}`, { issues: validated.error.issues.length });
|
|
315
|
+
}
|
|
316
|
+
const line = JSON.stringify(validated.data) + "\n";
|
|
317
|
+
const byteLen = Buffer.byteLength(line, "utf8");
|
|
318
|
+
if (byteLen > MAX_RECORD_BYTES) {
|
|
319
|
+
throw new SquadError("RECORD_TOO_LARGE", `run record exceeds MAX_RECORD_BYTES (${byteLen} > ${MAX_RECORD_BYTES}); ` +
|
|
320
|
+
`cap mode_warning.message or shorten inputs`, { byteLen, max: MAX_RECORD_BYTES, id: validated.data.id });
|
|
321
|
+
}
|
|
322
|
+
const filePath = resolveRunsFile(workspaceRoot, options.configuredPath);
|
|
323
|
+
const dir = path.dirname(filePath);
|
|
324
|
+
// Directory mode 0o700 — user-only. Subsequent runs inherit the existing
|
|
325
|
+
// mode if the dir already exists (mkdir recursive is idempotent on mode).
|
|
326
|
+
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
|
327
|
+
// Cross-process lock around the append. The lock file lives in the same
|
|
328
|
+
// directory; file-lock.ts cleans it up in a finally.
|
|
329
|
+
await withFileLock(filePath, async () => {
|
|
330
|
+
// Create the journal with explicit mode 0o600 on first write. fs.open
|
|
331
|
+
// honours mode only when O_CREAT applies (i.e. the file is being
|
|
332
|
+
// created); subsequent appends ride the existing mode.
|
|
333
|
+
const fh = await fs.open(filePath, "a", 0o600);
|
|
334
|
+
try {
|
|
335
|
+
await fh.writeFile(line, "utf8");
|
|
336
|
+
}
|
|
337
|
+
finally {
|
|
338
|
+
await fh.close();
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
// Invalidate cache so the next readRuns picks up the append.
|
|
342
|
+
const absRoot = path.resolve(workspaceRoot);
|
|
343
|
+
cache.delete(absRoot);
|
|
344
|
+
logger.info("run recorded", {
|
|
345
|
+
details: {
|
|
346
|
+
file: filePath,
|
|
347
|
+
id: validated.data.id,
|
|
348
|
+
status: validated.data.status,
|
|
349
|
+
invocation: validated.data.invocation,
|
|
350
|
+
},
|
|
351
|
+
});
|
|
352
|
+
return { filePath, record: validated.data };
|
|
353
|
+
}
|
|
354
|
+
//# sourceMappingURL=store.js.map
|